更换文档检测模型

This commit is contained in:
2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions

View File

@@ -0,0 +1,35 @@
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import operators
from . import batch_operators
from . import keypoint_operators
from . import mot_operators
from . import rotated_operators
from . import keypoints_3d_operators
from . import culane_operators
from .operators import *
from .batch_operators import *
from .keypoint_operators import *
from .mot_operators import *
from .rotated_operators import *
from .keypoints_3d_operators import *
from .culane_operators import *
__all__ = []
__all__ += registered_ops
__all__ += keypoint_operators.__all__
__all__ += mot_operators.__all__
__all__ += culane_operators.__all__

View File

@@ -0,0 +1,421 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# The code is based on:
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/bbox/assigners/atss_assigner.py
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-6):
"""Calculate overlap between two set of bboxes.
If ``is_aligned `` is ``False``, then calculate the overlaps between each
bbox of bboxes1 and bboxes2, otherwise the overlaps between each aligned
pair of bboxes1 and bboxes2.
Args:
bboxes1 (Tensor): shape (B, m, 4) in <x1, y1, x2, y2> format or empty.
bboxes2 (Tensor): shape (B, n, 4) in <x1, y1, x2, y2> format or empty.
B indicates the batch dim, in shape (B1, B2, ..., Bn).
If ``is_aligned `` is ``True``, then m and n must be equal.
mode (str): "iou" (intersection over union) or "iof" (intersection over
foreground).
is_aligned (bool, optional): If True, then m and n must be equal.
Default False.
eps (float, optional): A value added to the denominator for numerical
stability. Default 1e-6.
Returns:
Tensor: shape (m, n) if ``is_aligned `` is False else shape (m,)
"""
assert mode in ['iou', 'iof', 'giou', 'diou'], 'Unsupported mode {}'.format(
mode)
# Either the boxes are empty or the length of boxes's last dimenstion is 4
assert (bboxes1.shape[-1] == 4 or bboxes1.shape[0] == 0)
assert (bboxes2.shape[-1] == 4 or bboxes2.shape[0] == 0)
# Batch dim must be the same
# Batch dim: (B1, B2, ... Bn)
assert bboxes1.shape[:-2] == bboxes2.shape[:-2]
batch_shape = bboxes1.shape[:-2]
rows = bboxes1.shape[-2] if bboxes1.shape[0] > 0 else 0
cols = bboxes2.shape[-2] if bboxes2.shape[0] > 0 else 0
if is_aligned:
assert rows == cols
if rows * cols == 0:
if is_aligned:
return np.random.random(batch_shape + (rows, ))
else:
return np.random.random(batch_shape + (rows, cols))
area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * (
bboxes1[..., 3] - bboxes1[..., 1])
area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * (
bboxes2[..., 3] - bboxes2[..., 1])
if is_aligned:
lt = np.maximum(bboxes1[..., :2], bboxes2[..., :2]) # [B, rows, 2]
rb = np.minimum(bboxes1[..., 2:], bboxes2[..., 2:]) # [B, rows, 2]
wh = (rb - lt).clip(min=0) # [B, rows, 2]
overlap = wh[..., 0] * wh[..., 1]
if mode in ['iou', 'giou']:
union = area1 + area2 - overlap
else:
union = area1
if mode == 'giou':
enclosed_lt = np.minimum(bboxes1[..., :2], bboxes2[..., :2])
enclosed_rb = np.maximum(bboxes1[..., 2:], bboxes2[..., 2:])
if mode == 'diou':
enclosed_lt = np.minimum(bboxes1[..., :2], bboxes2[..., :2])
enclosed_rb = np.maximum(bboxes1[..., 2:], bboxes2[..., 2:])
b1_x1, b1_y1 = bboxes1[..., 0], bboxes1[..., 1]
b1_x2, b1_y2 = bboxes1[..., 2], bboxes1[..., 3]
b2_x1, b2_y1 = bboxes2[..., 0], bboxes2[..., 1]
b2_x2, b2_y2 = bboxes2[..., 2], bboxes2[..., 3]
else:
lt = np.maximum(bboxes1[..., :, None, :2],
bboxes2[..., None, :, :2]) # [B, rows, cols, 2]
rb = np.minimum(bboxes1[..., :, None, 2:],
bboxes2[..., None, :, 2:]) # [B, rows, cols, 2]
wh = (rb - lt).clip(min=0) # [B, rows, cols, 2]
overlap = wh[..., 0] * wh[..., 1]
if mode in ['iou', 'giou']:
union = area1[..., None] + area2[..., None, :] - overlap
else:
union = area1[..., None]
if mode == 'giou':
enclosed_lt = np.minimum(bboxes1[..., :, None, :2],
bboxes2[..., None, :, :2])
enclosed_rb = np.maximum(bboxes1[..., :, None, 2:],
bboxes2[..., None, :, 2:])
if mode == 'diou':
enclosed_lt = np.minimum(bboxes1[..., :, None, :2],
bboxes2[..., None, :, :2])
enclosed_rb = np.maximum(bboxes1[..., :, None, 2:],
bboxes2[..., None, :, 2:])
b1_x1, b1_y1 = bboxes1[..., :, None, 0], bboxes1[..., :, None, 1]
b1_x2, b1_y2 = bboxes1[..., :, None, 2], bboxes1[..., :, None, 3]
b2_x1, b2_y1 = bboxes2[..., None, :, 0], bboxes2[..., None, :, 1]
b2_x2, b2_y2 = bboxes2[..., None, :, 2], bboxes2[..., None, :, 3]
eps = np.array([eps])
union = np.maximum(union, eps)
ious = overlap / union
if mode in ['iou', 'iof']:
return ious
# calculate gious
if mode in ['giou']:
enclose_wh = (enclosed_rb - enclosed_lt).clip(min=0)
enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1]
enclose_area = np.maximum(enclose_area, eps)
gious = ious - (enclose_area - union) / enclose_area
return gious
if mode in ['diou']:
left = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2))**2 / 4
right = ((b2_y1 + b2_y2) - (b1_y1 + b1_y2))**2 / 4
rho2 = left + right
enclose_wh = (enclosed_rb - enclosed_lt).clip(min=0)
enclose_c = enclose_wh[..., 0]**2 + enclose_wh[..., 1]**2
enclose_c = np.maximum(enclose_c, eps)
dious = ious - rho2 / enclose_c
return dious
def topk_(input, k, axis=1, largest=True):
x = -input if largest else input
if axis == 0:
row_index = np.arange(input.shape[1 - axis])
if k == x.shape[0]: # argpartition requires index < len(input)
topk_index = np.argpartition(x, k - 1, axis=axis)[0:k, :]
else:
topk_index = np.argpartition(x, k, axis=axis)[0:k, :]
topk_data = x[topk_index, row_index]
topk_index_sort = np.argsort(topk_data, axis=axis)
topk_data_sort = topk_data[topk_index_sort, row_index]
topk_index_sort = topk_index[0:k, :][topk_index_sort, row_index]
else:
column_index = np.arange(x.shape[1 - axis])[:, None]
topk_index = np.argpartition(x, k, axis=axis)[:, 0:k]
topk_data = x[column_index, topk_index]
topk_data = -topk_data if largest else topk_data
topk_index_sort = np.argsort(topk_data, axis=axis)
topk_data_sort = topk_data[column_index, topk_index_sort]
topk_index_sort = topk_index[:, 0:k][column_index, topk_index_sort]
return topk_data_sort, topk_index_sort
class ATSSAssigner(object):
"""Assign a corresponding gt bbox or background to each bbox.
Each proposals will be assigned with `0` or a positive integer
indicating the ground truth index.
- 0: negative sample, no assigned gt
- positive integer: positive sample, index (1-based) of assigned gt
Args:
topk (float): number of bbox selected in each level
"""
def __init__(self, topk=9):
self.topk = topk
def __call__(self,
bboxes,
num_level_bboxes,
gt_bboxes,
gt_bboxes_ignore=None,
gt_labels=None):
"""Assign gt to bboxes.
The assignment is done in following steps
1. compute iou between all bbox (bbox of all pyramid levels) and gt
2. compute center distance between all bbox and gt
3. on each pyramid level, for each gt, select k bbox whose center
are closest to the gt center, so we total select k*l bbox as
candidates for each gt
4. get corresponding iou for the these candidates, and compute the
mean and std, set mean + std as the iou threshold
5. select these candidates whose iou are greater than or equal to
the threshold as postive
6. limit the positive sample's center in gt
Args:
bboxes (np.array): Bounding boxes to be assigned, shape(n, 4).
num_level_bboxes (List): num of bboxes in each level
gt_bboxes (np.array): Groundtruth boxes, shape (k, 4).
gt_bboxes_ignore (np.array, optional): Ground truth bboxes that are
labelled as `ignored`, e.g., crowd boxes in COCO.
gt_labels (np.array, optional): Label of gt_bboxes, shape (k, ).
"""
bboxes = bboxes[:, :4]
num_gt, num_bboxes = gt_bboxes.shape[0], bboxes.shape[0]
# assign 0 by default
assigned_gt_inds = np.zeros((num_bboxes, ), dtype=np.int64)
if num_gt == 0 or num_bboxes == 0:
# No ground truth or boxes, return empty assignment
max_overlaps = np.zeros((num_bboxes, ))
if num_gt == 0:
# No truth, assign everything to background
assigned_gt_inds[:] = 0
if not np.any(gt_labels):
assigned_labels = None
else:
assigned_labels = -np.ones((num_bboxes, ), dtype=np.int64)
return assigned_gt_inds, max_overlaps
# compute iou between all bbox and gt
overlaps = bbox_overlaps(bboxes, gt_bboxes)
# compute center distance between all bbox and gt
gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
gt_points = np.stack((gt_cx, gt_cy), axis=1)
bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0
bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0
bboxes_points = np.stack((bboxes_cx, bboxes_cy), axis=1)
distances = np.sqrt(
np.power((bboxes_points[:, None, :] - gt_points[None, :, :]), 2)
.sum(-1))
# Selecting candidates based on the center distance
candidate_idxs = []
start_idx = 0
for bboxes_per_level in num_level_bboxes:
# on each pyramid level, for each gt,
# select k bbox whose center are closest to the gt center
end_idx = start_idx + bboxes_per_level
distances_per_level = distances[start_idx:end_idx, :]
selectable_k = min(self.topk, bboxes_per_level)
_, topk_idxs_per_level = topk_(
distances_per_level, selectable_k, axis=0, largest=False)
candidate_idxs.append(topk_idxs_per_level + start_idx)
start_idx = end_idx
candidate_idxs = np.concatenate(candidate_idxs, axis=0)
# get corresponding iou for the these candidates, and compute the
# mean and std, set mean + std as the iou threshold
candidate_overlaps = overlaps[candidate_idxs, np.arange(num_gt)]
overlaps_mean_per_gt = candidate_overlaps.mean(0)
overlaps_std_per_gt = candidate_overlaps.std(0)
overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
is_pos = candidate_overlaps >= overlaps_thr_per_gt[None, :]
# limit the positive sample's center in gt
for gt_idx in range(num_gt):
candidate_idxs[:, gt_idx] += gt_idx * num_bboxes
ep_bboxes_cx = np.broadcast_to(
bboxes_cx.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1)
ep_bboxes_cy = np.broadcast_to(
bboxes_cy.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1)
candidate_idxs = candidate_idxs.reshape(-1)
# calculate the left, top, right, bottom distance between positive
# bbox center and gt side
l_ = ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 0]
t_ = ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 1]
r_ = gt_bboxes[:, 2] - ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt)
b_ = gt_bboxes[:, 3] - ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt)
is_in_gts = np.stack([l_, t_, r_, b_], axis=1).min(axis=1) > 0.01
is_pos = is_pos & is_in_gts
# if an anchor box is assigned to multiple gts,
# the one with the highest IoU will be selected.
overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1)
index = candidate_idxs.reshape(-1)[is_pos.reshape(-1)]
overlaps_inf[index] = overlaps.T.reshape(-1)[index]
overlaps_inf = overlaps_inf.reshape(num_gt, -1).T
max_overlaps = overlaps_inf.max(axis=1)
argmax_overlaps = overlaps_inf.argmax(axis=1)
assigned_gt_inds[max_overlaps !=
-np.inf] = argmax_overlaps[max_overlaps != -np.inf] + 1
return assigned_gt_inds, max_overlaps
def get_vlr_region(self,
bboxes,
num_level_bboxes,
gt_bboxes,
gt_bboxes_ignore=None,
gt_labels=None):
"""get vlr region for ld distillation.
Args:
bboxes (np.array): Bounding boxes to be assigned, shape(n, 4).
num_level_bboxes (List): num of bboxes in each level
gt_bboxes (np.array): Groundtruth boxes, shape (k, 4).
gt_bboxes_ignore (np.array, optional): Ground truth bboxes that are
labelled as `ignored`, e.g., crowd boxes in COCO.
gt_labels (np.array, optional): Label of gt_bboxes, shape (k, ).
"""
bboxes = bboxes[:, :4]
num_gt, num_bboxes = gt_bboxes.shape[0], bboxes.shape[0]
# compute iou between all bbox and gt
overlaps = bbox_overlaps(bboxes, gt_bboxes)
# compute diou between all bbox and gt
diou = bbox_overlaps(bboxes, gt_bboxes, mode='diou')
# assign 0 by default
assigned_gt_inds = np.zeros((num_bboxes, ), dtype=np.int64)
vlr_region_iou = (assigned_gt_inds + 0).astype(np.float32)
if num_gt == 0 or num_bboxes == 0:
# No ground truth or boxes, return empty assignment
max_overlaps = np.zeros((num_bboxes, ))
if num_gt == 0:
# No truth, assign everything to background
assigned_gt_inds[:] = 0
if not np.any(gt_labels):
assigned_labels = None
else:
assigned_labels = -np.ones((num_bboxes, ), dtype=np.int64)
return assigned_gt_inds, max_overlaps
# compute center distance between all bbox and gt
gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
gt_points = np.stack((gt_cx, gt_cy), axis=1)
bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0
bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0
bboxes_points = np.stack((bboxes_cx, bboxes_cy), axis=1)
distances = np.sqrt(
np.power((bboxes_points[:, None, :] - gt_points[None, :, :]), 2)
.sum(-1))
# Selecting candidates based on the center distance
candidate_idxs = []
candidate_idxs_t = []
start_idx = 0
for bboxes_per_level in num_level_bboxes:
# on each pyramid level, for each gt,
# select k bbox whose center are closest to the gt center
end_idx = start_idx + bboxes_per_level
distances_per_level = distances[start_idx:end_idx, :]
selectable_t = min(self.topk, bboxes_per_level)
selectable_k = bboxes_per_level #k for all
_, topt_idxs_per_level = topk_(
distances_per_level, selectable_t, axis=0, largest=False)
_, topk_idxs_per_level = topk_(
distances_per_level, selectable_k, axis=0, largest=False)
candidate_idxs_t.append(topt_idxs_per_level + start_idx)
candidate_idxs.append(topk_idxs_per_level + start_idx)
start_idx = end_idx
candidate_idxs_t = np.concatenate(candidate_idxs_t, axis=0)
candidate_idxs = np.concatenate(candidate_idxs, axis=0)
# get corresponding iou for the these candidates, and compute the
# mean and std, set mean + std as the iou threshold
candidate_overlaps_t = overlaps[candidate_idxs_t, np.arange(num_gt)]
# compute tdiou
t_diou = diou[candidate_idxs, np.arange(num_gt)]
overlaps_mean_per_gt = candidate_overlaps_t.mean(0)
overlaps_std_per_gt = candidate_overlaps_t.std(
0, ddof=1) # NOTE: use Bessel correction
overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
# compute region
is_pos = (t_diou < overlaps_thr_per_gt[None, :]) & (
t_diou >= 0.25 * overlaps_thr_per_gt[None, :])
# limit the positive sample's center in gt
for gt_idx in range(num_gt):
candidate_idxs[:, gt_idx] += gt_idx * num_bboxes
candidate_idxs = candidate_idxs.reshape(-1)
# if an anchor box is assigned to multiple gts,
# the one with the highest IoU will be selected.
overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1)
index = candidate_idxs.reshape(-1)[is_pos.reshape(-1)]
overlaps_inf[index] = overlaps.T.reshape(-1)[index]
overlaps_inf = overlaps_inf.reshape(num_gt, -1).T
max_overlaps = overlaps_inf.max(axis=1)
argmax_overlaps = overlaps_inf.argmax(axis=1)
overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1)
overlaps_inf = overlaps_inf.reshape(num_gt, -1).T
assigned_gt_inds[max_overlaps !=
-np.inf] = argmax_overlaps[max_overlaps != -np.inf] + 1
vlr_region_iou[max_overlaps !=
-np.inf] = max_overlaps[max_overlaps != -np.inf] + 0
return vlr_region_iou

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,366 @@
import numpy as np
import imgaug.augmenters as iaa
from .operators import BaseOperator, register_op
from ppdet.utils.logger import setup_logger
from ppdet.data.culane_utils import linestrings_to_lanes, transform_annotation
logger = setup_logger(__name__)
__all__ = [
"CULaneTrainProcess", "CULaneDataProcess", "HorizontalFlip",
"ChannelShuffle", "CULaneAffine", "CULaneResize", "OneOfBlur",
"MultiplyAndAddToBrightness", "AddToHueAndSaturation"
]
def trainTransforms(img_h, img_w):
transforms = [{
'name': 'Resize',
'parameters': dict(size=dict(
height=img_h, width=img_w)),
'p': 1.0
}, {
'name': 'HorizontalFlip',
'parameters': dict(p=1.0),
'p': 0.5
}, {
'name': 'ChannelShuffle',
'parameters': dict(p=1.0),
'p': 0.1
}, {
'name': 'MultiplyAndAddToBrightness',
'parameters': dict(
mul=(0.85, 1.15), add=(-10, 10)),
'p': 0.6
}, {
'name': 'AddToHueAndSaturation',
'parameters': dict(value=(-10, 10)),
'p': 0.7
}, {
'name': 'OneOf',
'transforms': [
dict(
name='MotionBlur', parameters=dict(k=(3, 5))), dict(
name='MedianBlur', parameters=dict(k=(3, 5)))
],
'p': 0.2
}, {
'name': 'Affine',
'parameters': dict(
translate_percent=dict(
x=(-0.1, 0.1), y=(-0.1, 0.1)),
rotate=(-10, 10),
scale=(0.8, 1.2)),
'p': 0.7
}, {
'name': 'Resize',
'parameters': dict(size=dict(
height=img_h, width=img_w)),
'p': 1.0
}]
return transforms
@register_op
class CULaneTrainProcess(BaseOperator):
def __init__(self, img_w, img_h):
super(CULaneTrainProcess, self).__init__()
self.img_w = img_w
self.img_h = img_h
self.transforms = trainTransforms(self.img_h, self.img_w)
if self.transforms is not None:
img_transforms = []
for aug in self.transforms:
p = aug['p']
if aug['name'] != 'OneOf':
img_transforms.append(
iaa.Sometimes(
p=p,
then_list=getattr(iaa, aug['name'])(**aug[
'parameters'])))
else:
img_transforms.append(
iaa.Sometimes(
p=p,
then_list=iaa.OneOf([
getattr(iaa, aug_['name'])(**aug_['parameters'])
for aug_ in aug['transforms']
])))
else:
img_transforms = []
self.iaa_transform = iaa.Sequential(img_transforms)
def apply(self, sample, context=None):
img, line_strings, seg = self.iaa_transform(
image=sample['image'],
line_strings=sample['lanes'],
segmentation_maps=sample['mask'])
sample['image'] = img
sample['lanes'] = line_strings
sample['mask'] = seg
return sample
@register_op
class CULaneDataProcess(BaseOperator):
def __init__(self, img_w, img_h, num_points, max_lanes):
super(CULaneDataProcess, self).__init__()
self.img_w = img_w
self.img_h = img_h
self.num_points = num_points
self.n_offsets = num_points
self.n_strips = num_points - 1
self.strip_size = self.img_h / self.n_strips
self.max_lanes = max_lanes
self.offsets_ys = np.arange(self.img_h, -1, -self.strip_size)
def apply(self, sample, context=None):
data = {}
line_strings = sample['lanes']
line_strings.clip_out_of_image_()
new_anno = {'lanes': linestrings_to_lanes(line_strings)}
for i in range(30):
try:
annos = transform_annotation(
self.img_w, self.img_h, self.max_lanes, self.n_offsets,
self.offsets_ys, self.n_strips, self.strip_size, new_anno)
label = annos['label']
lane_endpoints = annos['lane_endpoints']
break
except:
if (i + 1) == 30:
logger.critical('Transform annotation failed 30 times :(')
exit()
sample['image'] = sample['image'].astype(np.float32) / 255.
data['image'] = sample['image'].transpose(2, 0, 1)
data['lane_line'] = label
data['seg'] = sample['seg']
data['full_img_path'] = sample['full_img_path']
data['img_name'] = sample['img_name']
data['im_id'] = sample['im_id']
if 'mask' in sample.keys():
data['seg'] = sample['mask'].get_arr()
data['im_shape'] = np.array([self.img_w, self.img_h], dtype=np.float32)
data['scale_factor'] = np.array([1., 1.], dtype=np.float32)
return data
@register_op
class CULaneResize(BaseOperator):
def __init__(self, img_h, img_w, prob=0.5):
super(CULaneResize, self).__init__()
self.img_h = img_h
self.img_w = img_w
self.prob = prob
def apply(self, sample, context=None):
transform = iaa.Sometimes(self.prob,
iaa.Resize({
"height": self.img_h,
"width": self.img_w
}))
if 'mask' in sample.keys():
img, line_strings, seg = transform(
image=sample['image'],
line_strings=sample['lanes'],
segmentation_maps=sample['mask'])
sample['image'] = img
sample['lanes'] = line_strings
sample['mask'] = seg
else:
img, line_strings = transform(
image=sample['image'].copy().astype(np.uint8),
line_strings=sample['lanes'])
sample['image'] = img
sample['lanes'] = line_strings
return sample
@register_op
class HorizontalFlip(BaseOperator):
def __init__(self, prob=0.5):
super(HorizontalFlip, self).__init__()
self.prob = prob
def apply(self, sample, context=None):
transform = iaa.Sometimes(self.prob, iaa.HorizontalFlip(1.0))
if 'mask' in sample.keys():
img, line_strings, seg = transform(
image=sample['image'],
line_strings=sample['lanes'],
segmentation_maps=sample['mask'])
sample['image'] = img
sample['lanes'] = line_strings
sample['mask'] = seg
else:
img, line_strings = transform(
image=sample['image'], line_strings=sample['lanes'])
sample['image'] = img
sample['lanes'] = line_strings
return sample
@register_op
class ChannelShuffle(BaseOperator):
def __init__(self, prob=0.1):
super(ChannelShuffle, self).__init__()
self.prob = prob
def apply(self, sample, context=None):
transform = iaa.Sometimes(self.prob, iaa.ChannelShuffle(1.0))
if 'mask' in sample.keys():
img, line_strings, seg = transform(
image=sample['image'],
line_strings=sample['lanes'],
segmentation_maps=sample['mask'])
sample['image'] = img
sample['lanes'] = line_strings
sample['mask'] = seg
else:
img, line_strings = transform(
image=sample['image'], line_strings=sample['lanes'])
sample['image'] = img
sample['lanes'] = line_strings
return sample
@register_op
class MultiplyAndAddToBrightness(BaseOperator):
def __init__(self, mul=(0.85, 1.15), add=(-10, 10), prob=0.5):
super(MultiplyAndAddToBrightness, self).__init__()
self.mul = tuple(mul)
self.add = tuple(add)
self.prob = prob
def apply(self, sample, context=None):
transform = iaa.Sometimes(
self.prob,
iaa.MultiplyAndAddToBrightness(
mul=self.mul, add=self.add))
if 'mask' in sample.keys():
img, line_strings, seg = transform(
image=sample['image'],
line_strings=sample['lanes'],
segmentation_maps=sample['mask'])
sample['image'] = img
sample['lanes'] = line_strings
sample['mask'] = seg
else:
img, line_strings = transform(
image=sample['image'], line_strings=sample['lanes'])
sample['image'] = img
sample['lanes'] = line_strings
return sample
@register_op
class AddToHueAndSaturation(BaseOperator):
def __init__(self, value=(-10, 10), prob=0.5):
super(AddToHueAndSaturation, self).__init__()
self.value = tuple(value)
self.prob = prob
def apply(self, sample, context=None):
transform = iaa.Sometimes(
self.prob, iaa.AddToHueAndSaturation(value=self.value))
if 'mask' in sample.keys():
img, line_strings, seg = transform(
image=sample['image'],
line_strings=sample['lanes'],
segmentation_maps=sample['mask'])
sample['image'] = img
sample['lanes'] = line_strings
sample['mask'] = seg
else:
img, line_strings = transform(
image=sample['image'], line_strings=sample['lanes'])
sample['image'] = img
sample['lanes'] = line_strings
return sample
@register_op
class OneOfBlur(BaseOperator):
def __init__(self, MotionBlur_k=(3, 5), MedianBlur_k=(3, 5), prob=0.5):
super(OneOfBlur, self).__init__()
self.MotionBlur_k = tuple(MotionBlur_k)
self.MedianBlur_k = tuple(MedianBlur_k)
self.prob = prob
def apply(self, sample, context=None):
transform = iaa.Sometimes(
self.prob,
iaa.OneOf([
iaa.MotionBlur(k=self.MotionBlur_k),
iaa.MedianBlur(k=self.MedianBlur_k)
]))
if 'mask' in sample.keys():
img, line_strings, seg = transform(
image=sample['image'],
line_strings=sample['lanes'],
segmentation_maps=sample['mask'])
sample['image'] = img
sample['lanes'] = line_strings
sample['mask'] = seg
else:
img, line_strings = transform(
image=sample['image'], line_strings=sample['lanes'])
sample['image'] = img
sample['lanes'] = line_strings
return sample
@register_op
class CULaneAffine(BaseOperator):
def __init__(self,
translate_percent_x=(-0.1, 0.1),
translate_percent_y=(-0.1, 0.1),
rotate=(3, 5),
scale=(0.8, 1.2),
prob=0.5):
super(CULaneAffine, self).__init__()
self.translate_percent = {
'x': tuple(translate_percent_x),
'y': tuple(translate_percent_y)
}
self.rotate = tuple(rotate)
self.scale = tuple(scale)
self.prob = prob
def apply(self, sample, context=None):
transform = iaa.Sometimes(
self.prob,
iaa.Affine(
translate_percent=self.translate_percent,
rotate=self.rotate,
scale=self.scale))
if 'mask' in sample.keys():
img, line_strings, seg = transform(
image=sample['image'],
line_strings=sample['lanes'],
segmentation_maps=sample['mask'])
sample['image'] = img
sample['lanes'] = line_strings
sample['mask'] = seg
else:
img, line_strings = transform(
image=sample['image'], line_strings=sample['lanes'])
sample['image'] = img
sample['lanes'] = line_strings
return sample

View File

@@ -0,0 +1,86 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# The code is based on:
# https://github.com/dvlab-research/GridMask/blob/master/detection_grid/maskrcnn_benchmark/data/transforms/grid.py
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import numpy as np
from PIL import Image
class Gridmask(object):
def __init__(self,
use_h=True,
use_w=True,
rotate=1,
offset=False,
ratio=0.5,
mode=1,
prob=0.7,
upper_iter=360000):
super(Gridmask, self).__init__()
self.use_h = use_h
self.use_w = use_w
self.rotate = rotate
self.offset = offset
self.ratio = ratio
self.mode = mode
self.prob = prob
self.st_prob = prob
self.upper_iter = upper_iter
def __call__(self, x, curr_iter):
self.prob = self.st_prob * min(1, 1.0 * curr_iter / self.upper_iter)
if np.random.rand() > self.prob:
return x
h, w, _ = x.shape
hh = int(1.5 * h)
ww = int(1.5 * w)
d = np.random.randint(2, h)
self.l = min(max(int(d * self.ratio + 0.5), 1), d - 1)
mask = np.ones((hh, ww), np.float32)
st_h = np.random.randint(d)
st_w = np.random.randint(d)
if self.use_h:
for i in range(hh // d):
s = d * i + st_h
t = min(s + self.l, hh)
mask[s:t, :] *= 0
if self.use_w:
for i in range(ww // d):
s = d * i + st_w
t = min(s + self.l, ww)
mask[:, s:t] *= 0
r = np.random.randint(self.rotate)
mask = Image.fromarray(np.uint8(mask))
mask = mask.rotate(r)
mask = np.asarray(mask)
mask = mask[(hh - h) // 2:(hh - h) // 2 + h, (ww - w) // 2:(ww - w) // 2
+ w].astype(np.float32)
if self.mode == 1:
mask = 1 - mask
mask = np.expand_dims(mask, axis=-1)
if self.offset:
offset = (2 * (np.random.rand(h, w) - 0.5)).astype(np.float32)
x = (x * mask + offset * (1 - mask)).astype(x.dtype)
else:
x = (x * mask).astype(x.dtype)
return x

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,296 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
import cv2
import numpy as np
import math
import copy
import random
import uuid
from numbers import Number, Integral
from ...modeling.keypoint_utils import get_affine_mat_kernel, warp_affine_joints, get_affine_transform, affine_transform, get_warp_matrix
from ppdet.core.workspace import serializable
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
registered_ops = []
__all__ = [
'CropAndFlipImages', 'PermuteImages', 'RandomFlipHalfBody3DTransformImages'
]
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw
from mpl_toolkits.mplot3d import Axes3D
def register_keypointop(cls):
return serializable(cls)
def register_op(cls):
registered_ops.append(cls.__name__)
if not hasattr(BaseOperator, cls.__name__):
setattr(BaseOperator, cls.__name__, cls)
else:
raise KeyError("The {} class has been registered.".format(cls.__name__))
return serializable(cls)
class BaseOperator(object):
def __init__(self, name=None):
if name is None:
name = self.__class__.__name__
self._id = name + '_' + str(uuid.uuid4())[-6:]
def apply(self, sample, context=None):
""" Process a sample.
Args:
sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
context (dict): info about this sample processing
Returns:
result (dict): a processed sample
"""
return sample
def __call__(self, sample, context=None):
""" Process a sample.
Args:
sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
context (dict): info about this sample processing
Returns:
result (dict): a processed sample
"""
if isinstance(sample, Sequence): # for batch_size
for i in range(len(sample)):
sample[i] = self.apply(sample[i], context)
else:
# image.shape changed
sample = self.apply(sample, context)
return sample
def __str__(self):
return str(self._id)
@register_keypointop
class CropAndFlipImages(object):
"""Crop all images"""
def __init__(self, crop_range, flip_pairs=None):
super(CropAndFlipImages, self).__init__()
self.crop_range = crop_range
self.flip_pairs = flip_pairs
def __call__(self, records): # tuple
images = records["image"]
images = images[:, :, ::-1, :]
images = images[:, :, self.crop_range[0]:self.crop_range[1]]
records["image"] = images
if "kps2d" in records.keys():
kps2d = records["kps2d"]
width, height = images.shape[2], images.shape[1]
kps2d = np.array(kps2d)
kps2d[:, :, 0] = kps2d[:, :, 0] - self.crop_range[0]
for pair in self.flip_pairs:
kps2d[:, pair[0], :], kps2d[:,pair[1], :] = \
kps2d[:,pair[1], :], kps2d[:,pair[0], :].copy()
records["kps2d"] = kps2d
return records
@register_op
class PermuteImages(BaseOperator):
def __init__(self):
"""
Change the channel to be (batch_size, C, H, W) #(6, 3, 1080, 1920)
"""
super(PermuteImages, self).__init__()
def apply(self, sample, context=None):
images = sample["image"]
images = images.transpose((0, 3, 1, 2))
sample["image"] = images
return sample
@register_keypointop
class RandomFlipHalfBody3DTransformImages(object):
"""apply data augment to images and coords
to achieve the flip, scale, rotate and half body transform effect for training image
Args:
trainsize (list):[w, h], Image target size
upper_body_ids (list): The upper body joint ids
flip_pairs (list): The left-right joints exchange order list
pixel_std (int): The pixel std of the scale
scale (float): The scale factor to transform the image
rot (int): The rotate factor to transform the image
num_joints_half_body (int): The joints threshold of the half body transform
prob_half_body (float): The threshold of the half body transform
flip (bool): Whether to flip the image
Returns:
records(dict): contain the image and coords after tranformed
"""
def __init__(self,
trainsize,
upper_body_ids,
flip_pairs,
pixel_std,
scale=0.35,
rot=40,
num_joints_half_body=8,
prob_half_body=0.3,
flip=True,
rot_prob=0.6,
do_occlusion=False):
super(RandomFlipHalfBody3DTransformImages, self).__init__()
self.trainsize = trainsize
self.upper_body_ids = upper_body_ids
self.flip_pairs = flip_pairs
self.pixel_std = pixel_std
self.scale = scale
self.rot = rot
self.num_joints_half_body = num_joints_half_body
self.prob_half_body = prob_half_body
self.flip = flip
self.aspect_ratio = trainsize[0] * 1.0 / trainsize[1]
self.rot_prob = rot_prob
self.do_occlusion = do_occlusion
def halfbody_transform(self, joints, joints_vis):
upper_joints = []
lower_joints = []
for joint_id in range(joints.shape[0]):
if joints_vis[joint_id][0] > 0:
if joint_id in self.upper_body_ids:
upper_joints.append(joints[joint_id])
else:
lower_joints.append(joints[joint_id])
if np.random.randn() < 0.5 and len(upper_joints) > 2:
selected_joints = upper_joints
else:
selected_joints = lower_joints if len(
lower_joints) > 2 else upper_joints
if len(selected_joints) < 2:
return None, None
selected_joints = np.array(selected_joints, dtype=np.float32)
center = selected_joints.mean(axis=0)[:2]
left_top = np.amin(selected_joints, axis=0)
right_bottom = np.amax(selected_joints, axis=0)
w = right_bottom[0] - left_top[0]
h = right_bottom[1] - left_top[1]
if w > self.aspect_ratio * h:
h = w * 1.0 / self.aspect_ratio
elif w < self.aspect_ratio * h:
w = h * self.aspect_ratio
scale = np.array(
[w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
dtype=np.float32)
scale = scale * 1.5
return center, scale
def flip_joints(self, joints, joints_vis, width, matched_parts, kps2d=None):
# joints: (6, 24, 3),(num_frames, num_joints, 3)
joints[:, :, 0] = width - joints[:, :, 0] - 1 # x
if kps2d is not None:
kps2d[:, :, 0] = width - kps2d[:, :, 0] - 1
for pair in matched_parts:
joints[:, pair[0], :], joints[:,pair[1], :] = \
joints[:,pair[1], :], joints[:,pair[0], :].copy()
joints_vis[:,pair[0], :], joints_vis[:,pair[1], :] = \
joints_vis[:,pair[1], :], joints_vis[:,pair[0], :].copy()
if kps2d is not None:
kps2d[:, pair[0], :], kps2d[:,pair[1], :] = \
kps2d[:,pair[1], :], kps2d[:,pair[0], :].copy()
# move to zero
joints -= joints[:, [0], :] # (batch_size, 24, 3),numpy.ndarray
return joints, joints_vis, kps2d
def __call__(self, records):
images = records[
'image'] #kps3d, kps3d_vis, images. images.shape(num_frames, width, height, 3)
joints = records['kps3d']
joints_vis = records['kps3d_vis']
kps2d = None
if 'kps2d' in records.keys():
kps2d = records['kps2d']
if self.flip and np.random.random() <= 0.5:
images = images[:, :, ::-1, :] # 图像水平翻转 (6, 1080, 810, 3)
joints, joints_vis, kps2d = self.flip_joints(
joints, joints_vis, images.shape[2], self.flip_pairs,
kps2d) # 关键点左右对称翻转
occlusion = False
if self.do_occlusion and random.random() <= 0.5: # 随机遮挡
height = images[0].shape[0]
width = images[0].shape[1]
occlusion = True
while True:
area_min = 0.0
area_max = 0.2
synth_area = (random.random() *
(area_max - area_min) + area_min) * width * height
ratio_min = 0.3
ratio_max = 1 / 0.3
synth_ratio = (random.random() *
(ratio_max - ratio_min) + ratio_min)
synth_h = math.sqrt(synth_area * synth_ratio)
synth_w = math.sqrt(synth_area / synth_ratio)
synth_xmin = random.random() * (width - synth_w - 1)
synth_ymin = random.random() * (height - synth_h - 1)
if synth_xmin >= 0 and synth_ymin >= 0 and synth_xmin + synth_w < width and synth_ymin + synth_h < height:
xmin = int(synth_xmin)
ymin = int(synth_ymin)
w = int(synth_w)
h = int(synth_h)
mask = np.random.rand(h, w, 3) * 255
images[:, ymin:ymin + h, xmin:xmin + w, :] = mask[
None, :, :, :]
break
records['image'] = images
records['kps3d'] = joints
records['kps3d_vis'] = joints_vis
if kps2d is not None:
records['kps2d'] = kps2d
return records

View File

@@ -0,0 +1,627 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
from numbers import Integral
import cv2
import copy
import numpy as np
import random
import math
from .operators import BaseOperator, register_op
from .batch_operators import Gt2TTFTarget
from ppdet.modeling.bbox_utils import bbox_iou_np_expand
from ppdet.utils.logger import setup_logger
from .op_helper import gaussian_radius
logger = setup_logger(__name__)
__all__ = [
'RGBReverse', 'LetterBoxResize', 'MOTRandomAffine', 'Gt2JDETargetThres',
'Gt2JDETargetMax', 'Gt2FairMOTTarget'
]
@register_op
class RGBReverse(BaseOperator):
"""RGB to BGR, or BGR to RGB, sensitive to MOTRandomAffine
"""
def __init__(self):
super(RGBReverse, self).__init__()
def apply(self, sample, context=None):
im = sample['image']
sample['image'] = np.ascontiguousarray(im[:, :, ::-1])
return sample
@register_op
class LetterBoxResize(BaseOperator):
def __init__(self, target_size):
"""
Resize image to target size, convert normalized xywh to pixel xyxy
format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
Args:
target_size (int|list): image target size.
"""
super(LetterBoxResize, self).__init__()
if not isinstance(target_size, (Integral, Sequence)):
raise TypeError(
"Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
format(type(target_size)))
if isinstance(target_size, Integral):
target_size = [target_size, target_size]
self.target_size = target_size
def apply_image(self, img, height, width, color=(127.5, 127.5, 127.5)):
# letterbox: resize a rectangular image to a padded rectangular
shape = img.shape[:2] # [height, width]
ratio_h = float(height) / shape[0]
ratio_w = float(width) / shape[1]
ratio = min(ratio_h, ratio_w)
new_shape = (round(shape[1] * ratio),
round(shape[0] * ratio)) # [width, height]
padw = (width - new_shape[0]) / 2
padh = (height - new_shape[1]) / 2
top, bottom = round(padh - 0.1), round(padh + 0.1)
left, right = round(padw - 0.1), round(padw + 0.1)
img = cv2.resize(
img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
img = cv2.copyMakeBorder(
img, top, bottom, left, right, cv2.BORDER_CONSTANT,
value=color) # padded rectangular
return img, ratio, padw, padh
def apply_bbox(self, bbox0, h, w, ratio, padw, padh):
bboxes = bbox0.copy()
bboxes[:, 0] = ratio * w * (bbox0[:, 0] - bbox0[:, 2] / 2) + padw
bboxes[:, 1] = ratio * h * (bbox0[:, 1] - bbox0[:, 3] / 2) + padh
bboxes[:, 2] = ratio * w * (bbox0[:, 0] + bbox0[:, 2] / 2) + padw
bboxes[:, 3] = ratio * h * (bbox0[:, 1] + bbox0[:, 3] / 2) + padh
return bboxes
def apply(self, sample, context=None):
""" Resize the image numpy.
"""
im = sample['image']
h, w = sample['im_shape']
if not isinstance(im, np.ndarray):
raise TypeError("{}: image type is not numpy.".format(self))
if len(im.shape) != 3:
from PIL import UnidentifiedImageError
raise UnidentifiedImageError(
'{}: image is not 3-dimensional.'.format(self))
# apply image
height, width = self.target_size
img, ratio, padw, padh = self.apply_image(
im, height=height, width=width)
sample['image'] = img
new_shape = (round(h * ratio), round(w * ratio))
sample['im_shape'] = np.asarray(new_shape, dtype=np.float32)
sample['scale_factor'] = np.asarray([ratio, ratio], dtype=np.float32)
# apply bbox
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], h, w, ratio,
padw, padh)
return sample
@register_op
class MOTRandomAffine(BaseOperator):
"""
Affine transform to image and coords to achieve the rotate, scale and
shift effect for training image.
Args:
degrees (list[2]): the rotate range to apply, transform range is [min, max]
translate (list[2]): the translate range to apply, transform range is [min, max]
scale (list[2]): the scale range to apply, transform range is [min, max]
shear (list[2]): the shear range to apply, transform range is [min, max]
borderValue (list[3]): value used in case of a constant border when appling
the perspective transformation
reject_outside (bool): reject warped bounding bboxes outside of image
Returns:
records(dict): contain the image and coords after tranformed
"""
def __init__(self,
degrees=(-5, 5),
translate=(0.10, 0.10),
scale=(0.50, 1.20),
shear=(-2, 2),
borderValue=(127.5, 127.5, 127.5),
reject_outside=True):
super(MOTRandomAffine, self).__init__()
self.degrees = degrees
self.translate = translate
self.scale = scale
self.shear = shear
self.borderValue = borderValue
self.reject_outside = reject_outside
def apply(self, sample, context=None):
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
border = 0 # width of added border (optional)
img = sample['image']
height, width = img.shape[0], img.shape[1]
# Rotation and Scale
R = np.eye(3)
a = random.random() * (self.degrees[1] - self.degrees[0]
) + self.degrees[0]
s = random.random() * (self.scale[1] - self.scale[0]) + self.scale[0]
R[:2] = cv2.getRotationMatrix2D(
angle=a, center=(width / 2, height / 2), scale=s)
# Translation
T = np.eye(3)
T[0, 2] = (
random.random() * 2 - 1
) * self.translate[0] * height + border # x translation (pixels)
T[1, 2] = (
random.random() * 2 - 1
) * self.translate[1] * width + border # y translation (pixels)
# Shear
S = np.eye(3)
S[0, 1] = math.tan((random.random() *
(self.shear[1] - self.shear[0]) + self.shear[0]) *
math.pi / 180) # x shear (deg)
S[1, 0] = math.tan((random.random() *
(self.shear[1] - self.shear[0]) + self.shear[0]) *
math.pi / 180) # y shear (deg)
M = S @T @R # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
imw = cv2.warpPerspective(
img,
M,
dsize=(width, height),
flags=cv2.INTER_LINEAR,
borderValue=self.borderValue) # BGR order borderValue
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
targets = sample['gt_bbox']
n = targets.shape[0]
points = targets.copy()
area0 = (points[:, 2] - points[:, 0]) * (
points[:, 3] - points[:, 1])
# warp points
xy = np.ones((n * 4, 3))
xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
n * 4, 2) # x1y1, x2y2, x1y2, x2y1
xy = (xy @M.T)[:, :2].reshape(n, 8)
# create new boxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
xy = np.concatenate(
(x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# apply angle-based reduction
radians = a * math.pi / 180
reduction = max(abs(math.sin(radians)), abs(math.cos(radians)))**0.5
x = (xy[:, 2] + xy[:, 0]) / 2
y = (xy[:, 3] + xy[:, 1]) / 2
w = (xy[:, 2] - xy[:, 0]) * reduction
h = (xy[:, 3] - xy[:, 1]) * reduction
xy = np.concatenate(
(x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
# reject warped points outside of image
if self.reject_outside:
np.clip(xy[:, 0], 0, width, out=xy[:, 0])
np.clip(xy[:, 2], 0, width, out=xy[:, 2])
np.clip(xy[:, 1], 0, height, out=xy[:, 1])
np.clip(xy[:, 3], 0, height, out=xy[:, 3])
w = xy[:, 2] - xy[:, 0]
h = xy[:, 3] - xy[:, 1]
area = w * h
ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
if sum(i) > 0:
sample['gt_bbox'] = xy[i].astype(sample['gt_bbox'].dtype)
sample['gt_class'] = sample['gt_class'][i]
if 'difficult' in sample:
sample['difficult'] = sample['difficult'][i]
if 'gt_ide' in sample:
sample['gt_ide'] = sample['gt_ide'][i]
if 'is_crowd' in sample:
sample['is_crowd'] = sample['is_crowd'][i]
sample['image'] = imw
return sample
else:
return sample
@register_op
class Gt2JDETargetThres(BaseOperator):
__shared__ = ['num_classes']
"""
Generate JDE targets by groud truth data when training
Args:
anchors (list): anchors of JDE model
anchor_masks (list): anchor_masks of JDE model
downsample_ratios (list): downsample ratios of JDE model
ide_thresh (float): thresh of identity, higher is groud truth
fg_thresh (float): thresh of foreground, higher is foreground
bg_thresh (float): thresh of background, lower is background
num_classes (int): number of classes
"""
def __init__(self,
anchors,
anchor_masks,
downsample_ratios,
ide_thresh=0.5,
fg_thresh=0.5,
bg_thresh=0.4,
num_classes=1):
super(Gt2JDETargetThres, self).__init__()
self.anchors = anchors
self.anchor_masks = anchor_masks
self.downsample_ratios = downsample_ratios
self.ide_thresh = ide_thresh
self.fg_thresh = fg_thresh
self.bg_thresh = bg_thresh
self.num_classes = num_classes
def generate_anchor(self, nGh, nGw, anchor_hw):
nA = len(anchor_hw)
yy, xx = np.meshgrid(np.arange(nGh), np.arange(nGw))
mesh = np.stack([xx.T, yy.T], axis=0) # [2, nGh, nGw]
mesh = np.repeat(mesh[None, :], nA, axis=0) # [nA, 2, nGh, nGw]
anchor_offset_mesh = anchor_hw[:, :, None][:, :, :, None]
anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGh, axis=-2)
anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGw, axis=-1)
anchor_mesh = np.concatenate(
[mesh, anchor_offset_mesh], axis=1) # [nA, 4, nGh, nGw]
return anchor_mesh
def encode_delta(self, gt_box_list, fg_anchor_list):
px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \
fg_anchor_list[:, 2], fg_anchor_list[:,3]
gx, gy, gw, gh = gt_box_list[:, 0], gt_box_list[:, 1], \
gt_box_list[:, 2], gt_box_list[:, 3]
dx = (gx - px) / pw
dy = (gy - py) / ph
dw = np.log(gw / pw)
dh = np.log(gh / ph)
return np.stack([dx, dy, dw, dh], axis=1)
def pad_box(self, sample, num_max):
assert 'gt_bbox' in sample
bbox = sample['gt_bbox']
gt_num = len(bbox)
pad_bbox = np.zeros((num_max, 4), dtype=np.float32)
if gt_num > 0:
pad_bbox[:gt_num, :] = bbox[:gt_num, :]
sample['gt_bbox'] = pad_bbox
if 'gt_score' in sample:
pad_score = np.zeros((num_max, ), dtype=np.float32)
if gt_num > 0:
pad_score[:gt_num] = sample['gt_score'][:gt_num, 0]
sample['gt_score'] = pad_score
if 'difficult' in sample:
pad_diff = np.zeros((num_max, ), dtype=np.int32)
if gt_num > 0:
pad_diff[:gt_num] = sample['difficult'][:gt_num, 0]
sample['difficult'] = pad_diff
if 'is_crowd' in sample:
pad_crowd = np.zeros((num_max, ), dtype=np.int32)
if gt_num > 0:
pad_crowd[:gt_num] = sample['is_crowd'][:gt_num, 0]
sample['is_crowd'] = pad_crowd
if 'gt_ide' in sample:
pad_ide = np.zeros((num_max, ), dtype=np.int32)
if gt_num > 0:
pad_ide[:gt_num] = sample['gt_ide'][:gt_num, 0]
sample['gt_ide'] = pad_ide
return sample
def __call__(self, samples, context=None):
assert len(self.anchor_masks) == len(self.downsample_ratios), \
"anchor_masks', and 'downsample_ratios' should have same length."
h, w = samples[0]['image'].shape[1:3]
num_max = 0
for sample in samples:
num_max = max(num_max, len(sample['gt_bbox']))
for sample in samples:
gt_bbox = sample['gt_bbox']
gt_ide = sample['gt_ide']
for i, (anchor_hw, downsample_ratio
) in enumerate(zip(self.anchors, self.downsample_ratios)):
anchor_hw = np.array(
anchor_hw, dtype=np.float32) / downsample_ratio
nA = len(anchor_hw)
nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio)
tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32)
tconf = np.zeros((nA, nGh, nGw), dtype=np.float32)
tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32)
gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy()
gxy[:, 0] = gxy[:, 0] * nGw
gxy[:, 1] = gxy[:, 1] * nGh
gwh[:, 0] = gwh[:, 0] * nGw
gwh[:, 1] = gwh[:, 1] * nGh
gxy[:, 0] = np.clip(gxy[:, 0], 0, nGw - 1)
gxy[:, 1] = np.clip(gxy[:, 1], 0, nGh - 1)
tboxes = np.concatenate([gxy, gwh], axis=1)
anchor_mesh = self.generate_anchor(nGh, nGw, anchor_hw)
anchor_list = np.transpose(anchor_mesh,
(0, 2, 3, 1)).reshape(-1, 4)
iou_pdist = bbox_iou_np_expand(
anchor_list, tboxes, x1y1x2y2=False)
iou_max = np.max(iou_pdist, axis=1)
max_gt_index = np.argmax(iou_pdist, axis=1)
iou_map = iou_max.reshape(nA, nGh, nGw)
gt_index_map = max_gt_index.reshape(nA, nGh, nGw)
id_index = iou_map > self.ide_thresh
fg_index = iou_map > self.fg_thresh
bg_index = iou_map < self.bg_thresh
ign_index = (iou_map < self.fg_thresh) * (
iou_map > self.bg_thresh)
tconf[fg_index] = 1
tconf[bg_index] = 0
tconf[ign_index] = -1
gt_index = gt_index_map[fg_index]
gt_box_list = tboxes[gt_index]
gt_id_list = gt_ide[gt_index_map[id_index]]
if np.sum(fg_index) > 0:
tid[id_index] = gt_id_list
fg_anchor_list = anchor_list.reshape(nA, nGh, nGw,
4)[fg_index]
delta_target = self.encode_delta(gt_box_list,
fg_anchor_list)
tbox[fg_index] = delta_target
sample['tbox{}'.format(i)] = tbox
sample['tconf{}'.format(i)] = tconf
sample['tide{}'.format(i)] = tid
sample.pop('gt_class')
sample = self.pad_box(sample, num_max)
return samples
@register_op
class Gt2JDETargetMax(BaseOperator):
__shared__ = ['num_classes']
"""
Generate JDE targets by groud truth data when evaluating
Args:
anchors (list): anchors of JDE model
anchor_masks (list): anchor_masks of JDE model
downsample_ratios (list): downsample ratios of JDE model
max_iou_thresh (float): iou thresh for high quality anchor
num_classes (int): number of classes
"""
def __init__(self,
anchors,
anchor_masks,
downsample_ratios,
max_iou_thresh=0.60,
num_classes=1):
super(Gt2JDETargetMax, self).__init__()
self.anchors = anchors
self.anchor_masks = anchor_masks
self.downsample_ratios = downsample_ratios
self.max_iou_thresh = max_iou_thresh
self.num_classes = num_classes
def __call__(self, samples, context=None):
assert len(self.anchor_masks) == len(self.downsample_ratios), \
"anchor_masks', and 'downsample_ratios' should have same length."
h, w = samples[0]['image'].shape[1:3]
for sample in samples:
gt_bbox = sample['gt_bbox']
gt_ide = sample['gt_ide']
for i, (anchor_hw, downsample_ratio
) in enumerate(zip(self.anchors, self.downsample_ratios)):
anchor_hw = np.array(
anchor_hw, dtype=np.float32) / downsample_ratio
nA = len(anchor_hw)
nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio)
tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32)
tconf = np.zeros((nA, nGh, nGw), dtype=np.float32)
tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32)
gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy()
gxy[:, 0] = gxy[:, 0] * nGw
gxy[:, 1] = gxy[:, 1] * nGh
gwh[:, 0] = gwh[:, 0] * nGw
gwh[:, 1] = gwh[:, 1] * nGh
gi = np.clip(gxy[:, 0], 0, nGw - 1).astype(int)
gj = np.clip(gxy[:, 1], 0, nGh - 1).astype(int)
# iou of targets-anchors (using wh only)
box1 = gwh
box2 = anchor_hw[:, None, :]
inter_area = np.minimum(box1, box2).prod(2)
iou = inter_area / (
box1.prod(1) + box2.prod(2) - inter_area + 1e-16)
# Select best iou_pred and anchor
iou_best = iou.max(0) # best anchor [0-2] for each target
a = np.argmax(iou, axis=0)
# Select best unique target-anchor combinations
iou_order = np.argsort(-iou_best) # best to worst
# Unique anchor selection
u = np.stack((gi, gj, a), 0)[:, iou_order]
_, first_unique = np.unique(u, axis=1, return_index=True)
mask = iou_order[first_unique]
# best anchor must share significant commonality (iou) with target
# TODO: examine arbitrary threshold
idx = mask[iou_best[mask] > self.max_iou_thresh]
if len(idx) > 0:
a_i, gj_i, gi_i = a[idx], gj[idx], gi[idx]
t_box = gt_bbox[idx]
t_id = gt_ide[idx]
if len(t_box.shape) == 1:
t_box = t_box.reshape(1, 4)
gxy, gwh = t_box[:, 0:2].copy(), t_box[:, 2:4].copy()
gxy[:, 0] = gxy[:, 0] * nGw
gxy[:, 1] = gxy[:, 1] * nGh
gwh[:, 0] = gwh[:, 0] * nGw
gwh[:, 1] = gwh[:, 1] * nGh
# XY coordinates
tbox[:, :, :, 0:2][a_i, gj_i, gi_i] = gxy - gxy.astype(int)
# Width and height in yolo method
tbox[:, :, :, 2:4][a_i, gj_i, gi_i] = np.log(gwh /
anchor_hw[a_i])
tconf[a_i, gj_i, gi_i] = 1
tid[a_i, gj_i, gi_i] = t_id
sample['tbox{}'.format(i)] = tbox
sample['tconf{}'.format(i)] = tconf
sample['tide{}'.format(i)] = tid
class Gt2FairMOTTarget(Gt2TTFTarget):
__shared__ = ['num_classes']
"""
Generate FairMOT targets by ground truth data.
Difference between Gt2FairMOTTarget and Gt2TTFTarget are:
1. the gaussian kernal radius to generate a heatmap.
2. the targets needed during training.
Args:
num_classes(int): the number of classes.
down_ratio(int): the down ratio from images to heatmap, 4 by default.
max_objs(int): the maximum number of ground truth objects in a image, 500 by default.
"""
def __init__(self, num_classes=1, down_ratio=4, max_objs=500):
super(Gt2TTFTarget, self).__init__()
self.down_ratio = down_ratio
self.num_classes = num_classes
self.max_objs = max_objs
def __call__(self, samples, context=None):
for b_id, sample in enumerate(samples):
output_h = sample['image'].shape[1] // self.down_ratio
output_w = sample['image'].shape[2] // self.down_ratio
heatmap = np.zeros(
(self.num_classes, output_h, output_w), dtype='float32')
bbox_size = np.zeros((self.max_objs, 4), dtype=np.float32)
center_offset = np.zeros((self.max_objs, 2), dtype=np.float32)
index = np.zeros((self.max_objs, ), dtype=np.int64)
index_mask = np.zeros((self.max_objs, ), dtype=np.int32)
reid = np.zeros((self.max_objs, ), dtype=np.int64)
bbox_xys = np.zeros((self.max_objs, 4), dtype=np.float32)
if self.num_classes > 1:
# each category corresponds to a set of track ids
cls_tr_ids = np.zeros(
(self.num_classes, output_h, output_w), dtype=np.int64)
cls_id_map = np.full((output_h, output_w), -1, dtype=np.int64)
gt_bbox = sample['gt_bbox']
gt_class = sample['gt_class']
gt_ide = sample['gt_ide']
for k in range(len(gt_bbox)):
cls_id = gt_class[k][0]
bbox = gt_bbox[k]
ide = gt_ide[k][0]
bbox[[0, 2]] = bbox[[0, 2]] * output_w
bbox[[1, 3]] = bbox[[1, 3]] * output_h
bbox_amodal = copy.deepcopy(bbox)
bbox_amodal[0] = bbox_amodal[0] - bbox_amodal[2] / 2.
bbox_amodal[1] = bbox_amodal[1] - bbox_amodal[3] / 2.
bbox_amodal[2] = bbox_amodal[0] + bbox_amodal[2]
bbox_amodal[3] = bbox_amodal[1] + bbox_amodal[3]
bbox[0] = np.clip(bbox[0], 0, output_w - 1)
bbox[1] = np.clip(bbox[1], 0, output_h - 1)
h = bbox[3]
w = bbox[2]
bbox_xy = copy.deepcopy(bbox)
bbox_xy[0] = bbox_xy[0] - bbox_xy[2] / 2
bbox_xy[1] = bbox_xy[1] - bbox_xy[3] / 2
bbox_xy[2] = bbox_xy[0] + bbox_xy[2]
bbox_xy[3] = bbox_xy[1] + bbox_xy[3]
if h > 0 and w > 0:
radius = gaussian_radius((math.ceil(h), math.ceil(w)), 0.7)
radius = max(0, int(radius))
ct = np.array([bbox[0], bbox[1]], dtype=np.float32)
ct_int = ct.astype(np.int32)
self.draw_truncate_gaussian(heatmap[cls_id], ct_int, radius,
radius)
bbox_size[k] = ct[0] - bbox_amodal[0], ct[1] - bbox_amodal[1], \
bbox_amodal[2] - ct[0], bbox_amodal[3] - ct[1]
index[k] = ct_int[1] * output_w + ct_int[0]
center_offset[k] = ct - ct_int
index_mask[k] = 1
reid[k] = ide
bbox_xys[k] = bbox_xy
if self.num_classes > 1:
cls_id_map[ct_int[1], ct_int[0]] = cls_id
cls_tr_ids[cls_id][ct_int[1]][ct_int[0]] = ide - 1
# track id start from 0
sample['heatmap'] = heatmap
sample['index'] = index
sample['offset'] = center_offset
sample['size'] = bbox_size
sample['index_mask'] = index_mask
sample['reid'] = reid
if self.num_classes > 1:
sample['cls_id_map'] = cls_id_map
sample['cls_tr_ids'] = cls_tr_ids
sample['bbox_xys'] = bbox_xys
sample.pop('is_crowd', None)
sample.pop('difficult', None)
sample.pop('gt_class', None)
sample.pop('gt_bbox', None)
sample.pop('gt_score', None)
sample.pop('gt_ide', None)
return samples

View File

@@ -0,0 +1,494 @@
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# this file contains helper methods for BBOX processing
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import random
import math
import cv2
def meet_emit_constraint(src_bbox, sample_bbox):
center_x = (src_bbox[2] + src_bbox[0]) / 2
center_y = (src_bbox[3] + src_bbox[1]) / 2
if center_x >= sample_bbox[0] and \
center_x <= sample_bbox[2] and \
center_y >= sample_bbox[1] and \
center_y <= sample_bbox[3]:
return True
return False
def clip_bbox(src_bbox):
src_bbox[0] = max(min(src_bbox[0], 1.0), 0.0)
src_bbox[1] = max(min(src_bbox[1], 1.0), 0.0)
src_bbox[2] = max(min(src_bbox[2], 1.0), 0.0)
src_bbox[3] = max(min(src_bbox[3], 1.0), 0.0)
return src_bbox
def bbox_area(src_bbox):
if src_bbox[2] < src_bbox[0] or src_bbox[3] < src_bbox[1]:
return 0.
else:
width = src_bbox[2] - src_bbox[0]
height = src_bbox[3] - src_bbox[1]
return width * height
def is_overlap(object_bbox, sample_bbox):
if object_bbox[0] >= sample_bbox[2] or \
object_bbox[2] <= sample_bbox[0] or \
object_bbox[1] >= sample_bbox[3] or \
object_bbox[3] <= sample_bbox[1]:
return False
else:
return True
def filter_and_process(sample_bbox, bboxes, labels, scores=None,
keypoints=None):
new_bboxes = []
new_labels = []
new_scores = []
new_keypoints = []
new_kp_ignore = []
for i in range(len(bboxes)):
new_bbox = [0, 0, 0, 0]
obj_bbox = [bboxes[i][0], bboxes[i][1], bboxes[i][2], bboxes[i][3]]
if not meet_emit_constraint(obj_bbox, sample_bbox):
continue
if not is_overlap(obj_bbox, sample_bbox):
continue
sample_width = sample_bbox[2] - sample_bbox[0]
sample_height = sample_bbox[3] - sample_bbox[1]
new_bbox[0] = (obj_bbox[0] - sample_bbox[0]) / sample_width
new_bbox[1] = (obj_bbox[1] - sample_bbox[1]) / sample_height
new_bbox[2] = (obj_bbox[2] - sample_bbox[0]) / sample_width
new_bbox[3] = (obj_bbox[3] - sample_bbox[1]) / sample_height
new_bbox = clip_bbox(new_bbox)
if bbox_area(new_bbox) > 0:
new_bboxes.append(new_bbox)
new_labels.append([labels[i][0]])
if scores is not None:
new_scores.append([scores[i][0]])
if keypoints is not None:
sample_keypoint = keypoints[0][i]
for j in range(len(sample_keypoint)):
kp_len = sample_height if j % 2 else sample_width
sample_coord = sample_bbox[1] if j % 2 else sample_bbox[0]
sample_keypoint[j] = (
sample_keypoint[j] - sample_coord) / kp_len
sample_keypoint[j] = max(min(sample_keypoint[j], 1.0), 0.0)
new_keypoints.append(sample_keypoint)
new_kp_ignore.append(keypoints[1][i])
bboxes = np.array(new_bboxes)
labels = np.array(new_labels)
scores = np.array(new_scores)
if keypoints is not None:
keypoints = np.array(new_keypoints)
new_kp_ignore = np.array(new_kp_ignore)
return bboxes, labels, scores, (keypoints, new_kp_ignore)
return bboxes, labels, scores
def bbox_area_sampling(bboxes, labels, scores, target_size, min_size):
new_bboxes = []
new_labels = []
new_scores = []
for i, bbox in enumerate(bboxes):
w = float((bbox[2] - bbox[0]) * target_size)
h = float((bbox[3] - bbox[1]) * target_size)
if w * h < float(min_size * min_size):
continue
else:
new_bboxes.append(bbox)
new_labels.append(labels[i])
if scores is not None and scores.size != 0:
new_scores.append(scores[i])
bboxes = np.array(new_bboxes)
labels = np.array(new_labels)
scores = np.array(new_scores)
return bboxes, labels, scores
def generate_sample_bbox(sampler):
scale = np.random.uniform(sampler[2], sampler[3])
aspect_ratio = np.random.uniform(sampler[4], sampler[5])
aspect_ratio = max(aspect_ratio, (scale**2.0))
aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))
bbox_width = scale * (aspect_ratio**0.5)
bbox_height = scale / (aspect_ratio**0.5)
xmin_bound = 1 - bbox_width
ymin_bound = 1 - bbox_height
xmin = np.random.uniform(0, xmin_bound)
ymin = np.random.uniform(0, ymin_bound)
xmax = xmin + bbox_width
ymax = ymin + bbox_height
sampled_bbox = [xmin, ymin, xmax, ymax]
return sampled_bbox
def generate_sample_bbox_square(sampler, image_width, image_height):
scale = np.random.uniform(sampler[2], sampler[3])
aspect_ratio = np.random.uniform(sampler[4], sampler[5])
aspect_ratio = max(aspect_ratio, (scale**2.0))
aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))
bbox_width = scale * (aspect_ratio**0.5)
bbox_height = scale / (aspect_ratio**0.5)
if image_height < image_width:
bbox_width = bbox_height * image_height / image_width
else:
bbox_height = bbox_width * image_width / image_height
xmin_bound = 1 - bbox_width
ymin_bound = 1 - bbox_height
xmin = np.random.uniform(0, xmin_bound)
ymin = np.random.uniform(0, ymin_bound)
xmax = xmin + bbox_width
ymax = ymin + bbox_height
sampled_bbox = [xmin, ymin, xmax, ymax]
return sampled_bbox
def data_anchor_sampling(bbox_labels, image_width, image_height, scale_array,
resize_width):
num_gt = len(bbox_labels)
# np.random.randint range: [low, high)
rand_idx = np.random.randint(0, num_gt) if num_gt != 0 else 0
if num_gt != 0:
norm_xmin = bbox_labels[rand_idx][0]
norm_ymin = bbox_labels[rand_idx][1]
norm_xmax = bbox_labels[rand_idx][2]
norm_ymax = bbox_labels[rand_idx][3]
xmin = norm_xmin * image_width
ymin = norm_ymin * image_height
wid = image_width * (norm_xmax - norm_xmin)
hei = image_height * (norm_ymax - norm_ymin)
range_size = 0
area = wid * hei
for scale_ind in range(0, len(scale_array) - 1):
if area > scale_array[scale_ind] ** 2 and area < \
scale_array[scale_ind + 1] ** 2:
range_size = scale_ind + 1
break
if area > scale_array[len(scale_array) - 2]**2:
range_size = len(scale_array) - 2
scale_choose = 0.0
if range_size == 0:
rand_idx_size = 0
else:
# np.random.randint range: [low, high)
rng_rand_size = np.random.randint(0, range_size + 1)
rand_idx_size = rng_rand_size % (range_size + 1)
if rand_idx_size == range_size:
min_resize_val = scale_array[rand_idx_size] / 2.0
max_resize_val = min(2.0 * scale_array[rand_idx_size],
2 * math.sqrt(wid * hei))
scale_choose = random.uniform(min_resize_val, max_resize_val)
else:
min_resize_val = scale_array[rand_idx_size] / 2.0
max_resize_val = 2.0 * scale_array[rand_idx_size]
scale_choose = random.uniform(min_resize_val, max_resize_val)
sample_bbox_size = wid * resize_width / scale_choose
w_off_orig = 0.0
h_off_orig = 0.0
if sample_bbox_size < max(image_height, image_width):
if wid <= sample_bbox_size:
w_off_orig = np.random.uniform(xmin + wid - sample_bbox_size,
xmin)
else:
w_off_orig = np.random.uniform(xmin,
xmin + wid - sample_bbox_size)
if hei <= sample_bbox_size:
h_off_orig = np.random.uniform(ymin + hei - sample_bbox_size,
ymin)
else:
h_off_orig = np.random.uniform(ymin,
ymin + hei - sample_bbox_size)
else:
w_off_orig = np.random.uniform(image_width - sample_bbox_size, 0.0)
h_off_orig = np.random.uniform(image_height - sample_bbox_size, 0.0)
w_off_orig = math.floor(w_off_orig)
h_off_orig = math.floor(h_off_orig)
# Figure out top left coordinates.
w_off = float(w_off_orig / image_width)
h_off = float(h_off_orig / image_height)
sampled_bbox = [
w_off, h_off, w_off + float(sample_bbox_size / image_width),
h_off + float(sample_bbox_size / image_height)
]
return sampled_bbox
else:
return 0
def jaccard_overlap(sample_bbox, object_bbox):
if sample_bbox[0] >= object_bbox[2] or \
sample_bbox[2] <= object_bbox[0] or \
sample_bbox[1] >= object_bbox[3] or \
sample_bbox[3] <= object_bbox[1]:
return 0
intersect_xmin = max(sample_bbox[0], object_bbox[0])
intersect_ymin = max(sample_bbox[1], object_bbox[1])
intersect_xmax = min(sample_bbox[2], object_bbox[2])
intersect_ymax = min(sample_bbox[3], object_bbox[3])
intersect_size = (intersect_xmax - intersect_xmin) * (
intersect_ymax - intersect_ymin)
sample_bbox_size = bbox_area(sample_bbox)
object_bbox_size = bbox_area(object_bbox)
overlap = intersect_size / (
sample_bbox_size + object_bbox_size - intersect_size)
return overlap
def intersect_bbox(bbox1, bbox2):
if bbox2[0] > bbox1[2] or bbox2[2] < bbox1[0] or \
bbox2[1] > bbox1[3] or bbox2[3] < bbox1[1]:
intersection_box = [0.0, 0.0, 0.0, 0.0]
else:
intersection_box = [
max(bbox1[0], bbox2[0]), max(bbox1[1], bbox2[1]),
min(bbox1[2], bbox2[2]), min(bbox1[3], bbox2[3])
]
return intersection_box
def bbox_coverage(bbox1, bbox2):
inter_box = intersect_bbox(bbox1, bbox2)
intersect_size = bbox_area(inter_box)
if intersect_size > 0:
bbox1_size = bbox_area(bbox1)
return intersect_size / bbox1_size
else:
return 0.
def satisfy_sample_constraint(sampler,
sample_bbox,
gt_bboxes,
satisfy_all=False):
if sampler[6] == 0 and sampler[7] == 0:
return True
satisfied = []
for i in range(len(gt_bboxes)):
object_bbox = [
gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3]
]
overlap = jaccard_overlap(sample_bbox, object_bbox)
if sampler[6] != 0 and \
overlap < sampler[6]:
satisfied.append(False)
continue
if sampler[7] != 0 and \
overlap > sampler[7]:
satisfied.append(False)
continue
satisfied.append(True)
if not satisfy_all:
return True
if satisfy_all:
return np.all(satisfied)
else:
return False
def satisfy_sample_constraint_coverage(sampler, sample_bbox, gt_bboxes):
if sampler[6] == 0 and sampler[7] == 0:
has_jaccard_overlap = False
else:
has_jaccard_overlap = True
if sampler[8] == 0 and sampler[9] == 0:
has_object_coverage = False
else:
has_object_coverage = True
if not has_jaccard_overlap and not has_object_coverage:
return True
found = False
for i in range(len(gt_bboxes)):
object_bbox = [
gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3]
]
if has_jaccard_overlap:
overlap = jaccard_overlap(sample_bbox, object_bbox)
if sampler[6] != 0 and \
overlap < sampler[6]:
continue
if sampler[7] != 0 and \
overlap > sampler[7]:
continue
found = True
if has_object_coverage:
object_coverage = bbox_coverage(object_bbox, sample_bbox)
if sampler[8] != 0 and \
object_coverage < sampler[8]:
continue
if sampler[9] != 0 and \
object_coverage > sampler[9]:
continue
found = True
if found:
return True
return found
def crop_image_sampling(img, sample_bbox, image_width, image_height,
target_size):
# no clipping here
xmin = int(sample_bbox[0] * image_width)
xmax = int(sample_bbox[2] * image_width)
ymin = int(sample_bbox[1] * image_height)
ymax = int(sample_bbox[3] * image_height)
w_off = xmin
h_off = ymin
width = xmax - xmin
height = ymax - ymin
cross_xmin = max(0.0, float(w_off))
cross_ymin = max(0.0, float(h_off))
cross_xmax = min(float(w_off + width - 1.0), float(image_width))
cross_ymax = min(float(h_off + height - 1.0), float(image_height))
cross_width = cross_xmax - cross_xmin
cross_height = cross_ymax - cross_ymin
roi_xmin = 0 if w_off >= 0 else abs(w_off)
roi_ymin = 0 if h_off >= 0 else abs(h_off)
roi_width = cross_width
roi_height = cross_height
roi_y1 = int(roi_ymin)
roi_y2 = int(roi_ymin + roi_height)
roi_x1 = int(roi_xmin)
roi_x2 = int(roi_xmin + roi_width)
cross_y1 = int(cross_ymin)
cross_y2 = int(cross_ymin + cross_height)
cross_x1 = int(cross_xmin)
cross_x2 = int(cross_xmin + cross_width)
sample_img = np.zeros((height, width, 3))
sample_img[roi_y1: roi_y2, roi_x1: roi_x2] = \
img[cross_y1: cross_y2, cross_x1: cross_x2]
sample_img = cv2.resize(
sample_img, (target_size, target_size), interpolation=cv2.INTER_AREA)
return sample_img
def is_poly(segm):
assert isinstance(segm, (list, dict)), \
"Invalid segm type: {}".format(type(segm))
return isinstance(segm, list)
def gaussian_radius(bbox_size, min_overlap):
height, width = bbox_size
a1 = 1
b1 = (height + width)
c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
sq1 = np.sqrt(b1**2 - 4 * a1 * c1)
radius1 = (b1 + sq1) / (2 * a1)
a2 = 4
b2 = 2 * (height + width)
c2 = (1 - min_overlap) * width * height
sq2 = np.sqrt(b2**2 - 4 * a2 * c2)
radius2 = (b2 + sq2) / 2
a3 = 4 * min_overlap
b3 = -2 * min_overlap * (height + width)
c3 = (min_overlap - 1) * width * height
sq3 = np.sqrt(b3**2 - 4 * a3 * c3)
radius3 = (b3 + sq3) / 2
return min(radius1, radius2, radius3)
def draw_gaussian(heatmap, center, radius, k=1, delte=6):
diameter = 2 * radius + 1
sigma = diameter / delte
gaussian = gaussian2D((diameter, diameter), sigma_x=sigma, sigma_y=sigma)
x, y = center
height, width = heatmap.shape[0:2]
left, right = min(x, radius), min(width - x, radius + 1)
top, bottom = min(y, radius), min(height - y, radius + 1)
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:
radius + right]
np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
def gaussian2D(shape, sigma_x=1, sigma_y=1):
m, n = [(ss - 1.) / 2. for ss in shape]
y, x = np.ogrid[-m:m + 1, -n:n + 1]
h = np.exp(-(x * x / (2 * sigma_x * sigma_x) + y * y / (2 * sigma_y *
sigma_y)))
h[h < np.finfo(h.dtype).eps * h.max()] = 0
return h
def draw_umich_gaussian(heatmap, center, radius, k=1):
"""
draw_umich_gaussian, refer to https://github.com/xingyizhou/CenterNet/blob/master/src/lib/utils/image.py#L126
"""
diameter = 2 * radius + 1
gaussian = gaussian2D(
(diameter, diameter), sigma_x=diameter / 6, sigma_y=diameter / 6)
x, y = int(center[0]), int(center[1])
height, width = heatmap.shape[0:2]
left, right = min(x, radius), min(width - x, radius + 1)
top, bottom = min(y, radius), min(height - y, radius + 1)
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:
radius + right]
if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:
np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
return heatmap
def get_border(border, size):
i = 1
while size - border // i <= border // i:
i *= 2
return border // i

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,480 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
from numbers import Number, Integral
import cv2
import numpy as np
import math
import copy
from .operators import register_op, BaseOperator
from ppdet.modeling.rbox_utils import poly2rbox_le135_np, poly2rbox_oc_np, rbox2poly_np
from ppdet.utils.logger import setup_logger
from ppdet.utils.compact import imagedraw_textsize_c
logger = setup_logger(__name__)
@register_op
class RRotate(BaseOperator):
""" Rotate Image, Polygon, Box
Args:
scale (float): rotate scale
angle (float): rotate angle
fill_value (int, tuple): fill color
auto_bound (bool): whether auto bound or not
"""
def __init__(self, scale=1.0, angle=0., fill_value=0., auto_bound=True):
super(RRotate, self).__init__()
self.scale = scale
self.angle = angle
self.fill_value = fill_value
self.auto_bound = auto_bound
def get_rotated_matrix(self, angle, scale, h, w):
center = ((w - 1) * 0.5, (h - 1) * 0.5)
matrix = cv2.getRotationMatrix2D(center, -angle, scale)
# calculate the new size
cos = np.abs(matrix[0, 0])
sin = np.abs(matrix[0, 1])
new_w = h * sin + w * cos
new_h = h * cos + w * sin
# calculate offset
n_w = int(np.round(new_w))
n_h = int(np.round(new_h))
if self.auto_bound:
ratio = min(w / n_w, h / n_h)
matrix = cv2.getRotationMatrix2D(center, -angle, ratio)
else:
matrix[0, 2] += (new_w - w) * 0.5
matrix[1, 2] += (new_h - h) * 0.5
w = n_w
h = n_h
return matrix, h, w
def get_rect_from_pts(self, pts, h, w):
""" get minimum rectangle of points
"""
assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
min_x, min_y = np.min(pts[:, 0::2], axis=1), np.min(pts[:, 1::2],
axis=1)
max_x, max_y = np.max(pts[:, 0::2], axis=1), np.max(pts[:, 1::2],
axis=1)
min_x, min_y = np.clip(min_x, 0, w), np.clip(min_y, 0, h)
max_x, max_y = np.clip(max_x, 0, w), np.clip(max_y, 0, h)
boxes = np.stack([min_x, min_y, max_x, max_y], axis=-1)
return boxes
def apply_image(self, image, matrix, h, w):
return cv2.warpAffine(
image, matrix, (w, h), borderValue=self.fill_value)
def apply_pts(self, pts, matrix, h, w):
assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
# n is number of samples and m is two times the number of points due to (x, y)
_, m = pts.shape
# transpose points
pts_ = pts.reshape(-1, 2).T
# pad 1 to convert the points to homogeneous coordinates
padding = np.ones((1, pts_.shape[1]), pts.dtype)
rotated_pts = np.matmul(matrix, np.concatenate((pts_, padding), axis=0))
return rotated_pts[:2, :].T.reshape(-1, m)
def apply(self, sample, context=None):
image = sample['image']
h, w = image.shape[:2]
matrix, h, w = self.get_rotated_matrix(self.angle, self.scale, h, w)
sample['image'] = self.apply_image(image, matrix, h, w)
polys = sample['gt_poly']
# TODO: segment or keypoint to be processed
if len(polys) > 0:
pts = self.apply_pts(polys, matrix, h, w)
sample['gt_poly'] = pts
sample['gt_bbox'] = self.get_rect_from_pts(pts, h, w)
return sample
@register_op
class RandomRRotate(BaseOperator):
""" Random Rotate Image
Args:
scale (float, tuple, list): rotate scale
scale_mode (str): mode of scale, [range, value, None]
angle (float, tuple, list): rotate angle
angle_mode (str): mode of angle, [range, value, None]
fill_value (float, tuple, list): fill value
rotate_prob (float): probability of rotation
auto_bound (bool): whether auto bound or not
"""
def __init__(self,
scale=1.0,
scale_mode=None,
angle=0.,
angle_mode=None,
fill_value=0.,
rotate_prob=1.0,
auto_bound=True):
super(RandomRRotate, self).__init__()
self.scale = scale
self.scale_mode = scale_mode
self.angle = angle
self.angle_mode = angle_mode
self.fill_value = fill_value
self.rotate_prob = rotate_prob
self.auto_bound = auto_bound
def get_angle(self, angle, angle_mode):
assert not angle_mode or angle_mode in [
'range', 'value'
], 'angle mode should be in [range, value, None]'
if not angle_mode:
return angle
elif angle_mode == 'range':
low, high = angle
return np.random.rand() * (high - low) + low
elif angle_mode == 'value':
return np.random.choice(angle)
def get_scale(self, scale, scale_mode):
assert not scale_mode or scale_mode in [
'range', 'value'
], 'scale mode should be in [range, value, None]'
if not scale_mode:
return scale
elif scale_mode == 'range':
low, high = scale
return np.random.rand() * (high - low) + low
elif scale_mode == 'value':
return np.random.choice(scale)
def apply(self, sample, context=None):
if np.random.rand() > self.rotate_prob:
return sample
angle = self.get_angle(self.angle, self.angle_mode)
scale = self.get_scale(self.scale, self.scale_mode)
rotator = RRotate(scale, angle, self.fill_value, self.auto_bound)
return rotator(sample)
@register_op
class Poly2RBox(BaseOperator):
""" Polygon to Rotated Box, using new OpenCV definition since 4.5.1
Args:
filter_threshold (int, float): threshold to filter annotations
filter_mode (str): filter mode, ['area', 'edge']
rbox_type (str): rbox type, ['le135', 'oc']
"""
def __init__(self, filter_threshold=4, filter_mode=None, rbox_type='le135'):
super(Poly2RBox, self).__init__()
self.filter_fn = lambda size: self.filter(size, filter_threshold, filter_mode)
self.rbox_fn = poly2rbox_le135_np if rbox_type == 'le135' else poly2rbox_oc_np
def filter(self, size, threshold, mode):
if mode == 'area':
if size[0] * size[1] < threshold:
return True
elif mode == 'edge':
if min(size) < threshold:
return True
return False
def get_rbox(self, polys):
valid_ids, rboxes, bboxes = [], [], []
for i, poly in enumerate(polys):
cx, cy, w, h, angle = self.rbox_fn(poly)
if self.filter_fn((w, h)):
continue
rboxes.append(np.array([cx, cy, w, h, angle], dtype=np.float32))
valid_ids.append(i)
xmin, ymin = min(poly[0::2]), min(poly[1::2])
xmax, ymax = max(poly[0::2]), max(poly[1::2])
bboxes.append(np.array([xmin, ymin, xmax, ymax], dtype=np.float32))
if len(valid_ids) == 0:
rboxes = np.zeros((0, 5), dtype=np.float32)
bboxes = np.zeros((0, 4), dtype=np.float32)
else:
rboxes = np.stack(rboxes)
bboxes = np.stack(bboxes)
return rboxes, bboxes, valid_ids
def apply(self, sample, context=None):
rboxes, bboxes, valid_ids = self.get_rbox(sample['gt_poly'])
sample['gt_rbox'] = rboxes
sample['gt_bbox'] = bboxes
for k in ['gt_class', 'gt_score', 'gt_poly', 'is_crowd', 'difficult']:
if k in sample:
sample[k] = sample[k][valid_ids]
return sample
@register_op
class Poly2Array(BaseOperator):
""" convert gt_poly to np.array for rotated bboxes
"""
def __init__(self):
super(Poly2Array, self).__init__()
def apply(self, sample, context=None):
if 'gt_poly' in sample:
sample['gt_poly'] = np.array(
sample['gt_poly'], dtype=np.float32).reshape((-1, 8))
return sample
@register_op
class RResize(BaseOperator):
def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
"""
Resize image to target size. if keep_ratio is True,
resize the image's long side to the maximum of target_size
if keep_ratio is False, resize the image to target size(h, w)
Args:
target_size (int|list): image target size
keep_ratio (bool): whether keep_ratio or not, default true
interp (int): the interpolation method
"""
super(RResize, self).__init__()
self.keep_ratio = keep_ratio
self.interp = interp
if not isinstance(target_size, (Integral, Sequence)):
raise TypeError(
"Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
format(type(target_size)))
if isinstance(target_size, Integral):
target_size = [target_size, target_size]
self.target_size = target_size
def apply_image(self, image, scale):
im_scale_x, im_scale_y = scale
return cv2.resize(
image,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
def apply_pts(self, pts, scale, size):
im_scale_x, im_scale_y = scale
resize_w, resize_h = size
pts[:, 0::2] *= im_scale_x
pts[:, 1::2] *= im_scale_y
pts[:, 0::2] = np.clip(pts[:, 0::2], 0, resize_w)
pts[:, 1::2] = np.clip(pts[:, 1::2], 0, resize_h)
return pts
def apply(self, sample, context=None):
""" Resize the image numpy.
"""
im = sample['image']
if not isinstance(im, np.ndarray):
raise TypeError("{}: image type is not numpy.".format(self))
if len(im.shape) != 3:
raise ImageError('{}: image is not 3-dimensional.'.format(self))
# apply image
im_shape = im.shape
if self.keep_ratio:
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
target_size_min = np.min(self.target_size)
target_size_max = np.max(self.target_size)
im_scale = min(target_size_min / im_size_min,
target_size_max / im_size_max)
resize_h = im_scale * float(im_shape[0])
resize_w = im_scale * float(im_shape[1])
im_scale_x = im_scale
im_scale_y = im_scale
else:
resize_h, resize_w = self.target_size
im_scale_y = resize_h / im_shape[0]
im_scale_x = resize_w / im_shape[1]
im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
sample['image'] = im.astype(np.float32)
sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
if 'scale_factor' in sample:
scale_factor = sample['scale_factor']
sample['scale_factor'] = np.asarray(
[scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
dtype=np.float32)
else:
sample['scale_factor'] = np.asarray(
[im_scale_y, im_scale_x], dtype=np.float32)
# apply bbox
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'],
[im_scale_x, im_scale_y],
[resize_w, resize_h])
# apply polygon
if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
sample['gt_poly'] = self.apply_pts(sample['gt_poly'],
[im_scale_x, im_scale_y],
[resize_w, resize_h])
return sample
@register_op
class RandomRFlip(BaseOperator):
def __init__(self, prob=0.5):
"""
Args:
prob (float): the probability of flipping image
"""
super(RandomRFlip, self).__init__()
self.prob = prob
if not (isinstance(self.prob, float)):
raise TypeError("{}: input type is invalid.".format(self))
def apply_image(self, image):
return image[:, ::-1, :]
def apply_pts(self, pts, width):
oldx = pts[:, 0::2].copy()
pts[:, 0::2] = width - oldx - 1
return pts
def apply(self, sample, context=None):
"""Filp the image and bounding box.
Operators:
1. Flip the image numpy.
2. Transform the bboxes' x coordinates.
(Must judge whether the coordinates are normalized!)
3. Transform the segmentations' x coordinates.
(Must judge whether the coordinates are normalized!)
Output:
sample: the image, bounding box and segmentation part
in sample are flipped.
"""
if np.random.uniform(0, 1) < self.prob:
im = sample['image']
height, width = im.shape[:2]
im = self.apply_image(im)
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'], width)
if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
sample['gt_poly'] = self.apply_pts(sample['gt_poly'], width)
sample['flipped'] = True
sample['image'] = im
return sample
@register_op
class VisibleRBox(BaseOperator):
"""
In debug mode, visualize images according to `gt_box`.
(Currently only supported when not cropping and flipping image.)
"""
def __init__(self, output_dir='debug'):
super(VisibleRBox, self).__init__()
self.output_dir = output_dir
if not os.path.isdir(output_dir):
os.makedirs(output_dir)
def apply(self, sample, context=None):
image = Image.fromarray(sample['image'].astype(np.uint8))
out_file_name = '{:012d}.jpg'.format(sample['im_id'][0])
width = sample['w']
height = sample['h']
# gt_poly = sample['gt_rbox']
gt_poly = sample['gt_poly']
gt_class = sample['gt_class']
draw = ImageDraw.Draw(image)
for i in range(gt_poly.shape[0]):
x1, y1, x2, y2, x3, y3, x4, y4 = gt_poly[i]
draw.line(
[(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)],
width=2,
fill='green')
# draw label
xmin = min(x1, x2, x3, x4)
ymin = min(y1, y2, y3, y4)
text = str(gt_class[i][0])
tw, th = imagedraw_textsize_c(draw, text)
draw.rectangle(
[(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green')
draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
if 'gt_keypoint' in sample.keys():
gt_keypoint = sample['gt_keypoint']
if self.is_normalized:
for i in range(gt_keypoint.shape[1]):
if i % 2:
gt_keypoint[:, i] = gt_keypoint[:, i] * height
else:
gt_keypoint[:, i] = gt_keypoint[:, i] * width
for i in range(gt_keypoint.shape[0]):
keypoint = gt_keypoint[i]
for j in range(int(keypoint.shape[0] / 2)):
x1 = round(keypoint[2 * j]).astype(np.int32)
y1 = round(keypoint[2 * j + 1]).astype(np.int32)
draw.ellipse(
(x1, y1, x1 + 5, y1 + 5), fill='green', outline='green')
save_path = os.path.join(self.output_dir, out_file_name)
image.save(save_path, quality=95)
return sample
@register_op
class Rbox2Poly(BaseOperator):
"""
Convert rbbox format to poly format.
"""
def __init__(self):
super(Rbox2Poly, self).__init__()
def apply(self, sample, context=None):
assert 'gt_rbox' in sample
assert sample['gt_rbox'].shape[1] == 5
rboxes = sample['gt_rbox']
polys = rbox2poly_np(rboxes)
sample['gt_poly'] = polys
xmin, ymin = polys[:, 0::2].min(1), polys[:, 1::2].min(1)
xmax, ymax = polys[:, 0::2].max(1), polys[:, 1::2].max(1)
sample['gt_bbox'] = np.stack([xmin, ymin, xmin, ymin], axis=1)
return sample