更换文档检测模型
This commit is contained in:
35
paddle_detection/ppdet/data/transform/__init__.py
Normal file
35
paddle_detection/ppdet/data/transform/__init__.py
Normal file
@@ -0,0 +1,35 @@
|
||||
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from . import operators
|
||||
from . import batch_operators
|
||||
from . import keypoint_operators
|
||||
from . import mot_operators
|
||||
from . import rotated_operators
|
||||
from . import keypoints_3d_operators
|
||||
from . import culane_operators
|
||||
|
||||
from .operators import *
|
||||
from .batch_operators import *
|
||||
from .keypoint_operators import *
|
||||
from .mot_operators import *
|
||||
from .rotated_operators import *
|
||||
from .keypoints_3d_operators import *
|
||||
from .culane_operators import *
|
||||
|
||||
__all__ = []
|
||||
__all__ += registered_ops
|
||||
__all__ += keypoint_operators.__all__
|
||||
__all__ += mot_operators.__all__
|
||||
__all__ += culane_operators.__all__
|
||||
421
paddle_detection/ppdet/data/transform/atss_assigner.py
Normal file
421
paddle_detection/ppdet/data/transform/atss_assigner.py
Normal file
@@ -0,0 +1,421 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# The code is based on:
|
||||
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/bbox/assigners/atss_assigner.py
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
from ppdet.utils.logger import setup_logger
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
|
||||
def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-6):
|
||||
"""Calculate overlap between two set of bboxes.
|
||||
If ``is_aligned `` is ``False``, then calculate the overlaps between each
|
||||
bbox of bboxes1 and bboxes2, otherwise the overlaps between each aligned
|
||||
pair of bboxes1 and bboxes2.
|
||||
Args:
|
||||
bboxes1 (Tensor): shape (B, m, 4) in <x1, y1, x2, y2> format or empty.
|
||||
bboxes2 (Tensor): shape (B, n, 4) in <x1, y1, x2, y2> format or empty.
|
||||
B indicates the batch dim, in shape (B1, B2, ..., Bn).
|
||||
If ``is_aligned `` is ``True``, then m and n must be equal.
|
||||
mode (str): "iou" (intersection over union) or "iof" (intersection over
|
||||
foreground).
|
||||
is_aligned (bool, optional): If True, then m and n must be equal.
|
||||
Default False.
|
||||
eps (float, optional): A value added to the denominator for numerical
|
||||
stability. Default 1e-6.
|
||||
Returns:
|
||||
Tensor: shape (m, n) if ``is_aligned `` is False else shape (m,)
|
||||
"""
|
||||
assert mode in ['iou', 'iof', 'giou', 'diou'], 'Unsupported mode {}'.format(
|
||||
mode)
|
||||
# Either the boxes are empty or the length of boxes's last dimenstion is 4
|
||||
assert (bboxes1.shape[-1] == 4 or bboxes1.shape[0] == 0)
|
||||
assert (bboxes2.shape[-1] == 4 or bboxes2.shape[0] == 0)
|
||||
|
||||
# Batch dim must be the same
|
||||
# Batch dim: (B1, B2, ... Bn)
|
||||
assert bboxes1.shape[:-2] == bboxes2.shape[:-2]
|
||||
batch_shape = bboxes1.shape[:-2]
|
||||
|
||||
rows = bboxes1.shape[-2] if bboxes1.shape[0] > 0 else 0
|
||||
cols = bboxes2.shape[-2] if bboxes2.shape[0] > 0 else 0
|
||||
if is_aligned:
|
||||
assert rows == cols
|
||||
|
||||
if rows * cols == 0:
|
||||
if is_aligned:
|
||||
return np.random.random(batch_shape + (rows, ))
|
||||
else:
|
||||
return np.random.random(batch_shape + (rows, cols))
|
||||
|
||||
area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * (
|
||||
bboxes1[..., 3] - bboxes1[..., 1])
|
||||
area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * (
|
||||
bboxes2[..., 3] - bboxes2[..., 1])
|
||||
|
||||
if is_aligned:
|
||||
lt = np.maximum(bboxes1[..., :2], bboxes2[..., :2]) # [B, rows, 2]
|
||||
rb = np.minimum(bboxes1[..., 2:], bboxes2[..., 2:]) # [B, rows, 2]
|
||||
|
||||
wh = (rb - lt).clip(min=0) # [B, rows, 2]
|
||||
overlap = wh[..., 0] * wh[..., 1]
|
||||
|
||||
if mode in ['iou', 'giou']:
|
||||
union = area1 + area2 - overlap
|
||||
else:
|
||||
union = area1
|
||||
if mode == 'giou':
|
||||
enclosed_lt = np.minimum(bboxes1[..., :2], bboxes2[..., :2])
|
||||
enclosed_rb = np.maximum(bboxes1[..., 2:], bboxes2[..., 2:])
|
||||
if mode == 'diou':
|
||||
enclosed_lt = np.minimum(bboxes1[..., :2], bboxes2[..., :2])
|
||||
enclosed_rb = np.maximum(bboxes1[..., 2:], bboxes2[..., 2:])
|
||||
b1_x1, b1_y1 = bboxes1[..., 0], bboxes1[..., 1]
|
||||
b1_x2, b1_y2 = bboxes1[..., 2], bboxes1[..., 3]
|
||||
b2_x1, b2_y1 = bboxes2[..., 0], bboxes2[..., 1]
|
||||
b2_x2, b2_y2 = bboxes2[..., 2], bboxes2[..., 3]
|
||||
else:
|
||||
lt = np.maximum(bboxes1[..., :, None, :2],
|
||||
bboxes2[..., None, :, :2]) # [B, rows, cols, 2]
|
||||
rb = np.minimum(bboxes1[..., :, None, 2:],
|
||||
bboxes2[..., None, :, 2:]) # [B, rows, cols, 2]
|
||||
|
||||
wh = (rb - lt).clip(min=0) # [B, rows, cols, 2]
|
||||
overlap = wh[..., 0] * wh[..., 1]
|
||||
|
||||
if mode in ['iou', 'giou']:
|
||||
union = area1[..., None] + area2[..., None, :] - overlap
|
||||
else:
|
||||
union = area1[..., None]
|
||||
if mode == 'giou':
|
||||
enclosed_lt = np.minimum(bboxes1[..., :, None, :2],
|
||||
bboxes2[..., None, :, :2])
|
||||
enclosed_rb = np.maximum(bboxes1[..., :, None, 2:],
|
||||
bboxes2[..., None, :, 2:])
|
||||
if mode == 'diou':
|
||||
enclosed_lt = np.minimum(bboxes1[..., :, None, :2],
|
||||
bboxes2[..., None, :, :2])
|
||||
enclosed_rb = np.maximum(bboxes1[..., :, None, 2:],
|
||||
bboxes2[..., None, :, 2:])
|
||||
b1_x1, b1_y1 = bboxes1[..., :, None, 0], bboxes1[..., :, None, 1]
|
||||
b1_x2, b1_y2 = bboxes1[..., :, None, 2], bboxes1[..., :, None, 3]
|
||||
b2_x1, b2_y1 = bboxes2[..., None, :, 0], bboxes2[..., None, :, 1]
|
||||
b2_x2, b2_y2 = bboxes2[..., None, :, 2], bboxes2[..., None, :, 3]
|
||||
|
||||
eps = np.array([eps])
|
||||
union = np.maximum(union, eps)
|
||||
ious = overlap / union
|
||||
if mode in ['iou', 'iof']:
|
||||
return ious
|
||||
# calculate gious
|
||||
if mode in ['giou']:
|
||||
enclose_wh = (enclosed_rb - enclosed_lt).clip(min=0)
|
||||
enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1]
|
||||
enclose_area = np.maximum(enclose_area, eps)
|
||||
gious = ious - (enclose_area - union) / enclose_area
|
||||
return gious
|
||||
if mode in ['diou']:
|
||||
left = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2))**2 / 4
|
||||
right = ((b2_y1 + b2_y2) - (b1_y1 + b1_y2))**2 / 4
|
||||
rho2 = left + right
|
||||
enclose_wh = (enclosed_rb - enclosed_lt).clip(min=0)
|
||||
enclose_c = enclose_wh[..., 0]**2 + enclose_wh[..., 1]**2
|
||||
enclose_c = np.maximum(enclose_c, eps)
|
||||
dious = ious - rho2 / enclose_c
|
||||
return dious
|
||||
|
||||
|
||||
def topk_(input, k, axis=1, largest=True):
|
||||
x = -input if largest else input
|
||||
if axis == 0:
|
||||
row_index = np.arange(input.shape[1 - axis])
|
||||
if k == x.shape[0]: # argpartition requires index < len(input)
|
||||
topk_index = np.argpartition(x, k - 1, axis=axis)[0:k, :]
|
||||
else:
|
||||
topk_index = np.argpartition(x, k, axis=axis)[0:k, :]
|
||||
|
||||
topk_data = x[topk_index, row_index]
|
||||
|
||||
topk_index_sort = np.argsort(topk_data, axis=axis)
|
||||
topk_data_sort = topk_data[topk_index_sort, row_index]
|
||||
topk_index_sort = topk_index[0:k, :][topk_index_sort, row_index]
|
||||
else:
|
||||
column_index = np.arange(x.shape[1 - axis])[:, None]
|
||||
topk_index = np.argpartition(x, k, axis=axis)[:, 0:k]
|
||||
topk_data = x[column_index, topk_index]
|
||||
topk_data = -topk_data if largest else topk_data
|
||||
topk_index_sort = np.argsort(topk_data, axis=axis)
|
||||
topk_data_sort = topk_data[column_index, topk_index_sort]
|
||||
topk_index_sort = topk_index[:, 0:k][column_index, topk_index_sort]
|
||||
|
||||
return topk_data_sort, topk_index_sort
|
||||
|
||||
|
||||
class ATSSAssigner(object):
|
||||
"""Assign a corresponding gt bbox or background to each bbox.
|
||||
|
||||
Each proposals will be assigned with `0` or a positive integer
|
||||
indicating the ground truth index.
|
||||
|
||||
- 0: negative sample, no assigned gt
|
||||
- positive integer: positive sample, index (1-based) of assigned gt
|
||||
|
||||
Args:
|
||||
topk (float): number of bbox selected in each level
|
||||
"""
|
||||
|
||||
def __init__(self, topk=9):
|
||||
self.topk = topk
|
||||
|
||||
def __call__(self,
|
||||
bboxes,
|
||||
num_level_bboxes,
|
||||
gt_bboxes,
|
||||
gt_bboxes_ignore=None,
|
||||
gt_labels=None):
|
||||
"""Assign gt to bboxes.
|
||||
The assignment is done in following steps
|
||||
1. compute iou between all bbox (bbox of all pyramid levels) and gt
|
||||
2. compute center distance between all bbox and gt
|
||||
3. on each pyramid level, for each gt, select k bbox whose center
|
||||
are closest to the gt center, so we total select k*l bbox as
|
||||
candidates for each gt
|
||||
4. get corresponding iou for the these candidates, and compute the
|
||||
mean and std, set mean + std as the iou threshold
|
||||
5. select these candidates whose iou are greater than or equal to
|
||||
the threshold as postive
|
||||
6. limit the positive sample's center in gt
|
||||
Args:
|
||||
bboxes (np.array): Bounding boxes to be assigned, shape(n, 4).
|
||||
num_level_bboxes (List): num of bboxes in each level
|
||||
gt_bboxes (np.array): Groundtruth boxes, shape (k, 4).
|
||||
gt_bboxes_ignore (np.array, optional): Ground truth bboxes that are
|
||||
labelled as `ignored`, e.g., crowd boxes in COCO.
|
||||
gt_labels (np.array, optional): Label of gt_bboxes, shape (k, ).
|
||||
"""
|
||||
bboxes = bboxes[:, :4]
|
||||
num_gt, num_bboxes = gt_bboxes.shape[0], bboxes.shape[0]
|
||||
|
||||
# assign 0 by default
|
||||
assigned_gt_inds = np.zeros((num_bboxes, ), dtype=np.int64)
|
||||
|
||||
if num_gt == 0 or num_bboxes == 0:
|
||||
# No ground truth or boxes, return empty assignment
|
||||
max_overlaps = np.zeros((num_bboxes, ))
|
||||
if num_gt == 0:
|
||||
# No truth, assign everything to background
|
||||
assigned_gt_inds[:] = 0
|
||||
if not np.any(gt_labels):
|
||||
assigned_labels = None
|
||||
else:
|
||||
assigned_labels = -np.ones((num_bboxes, ), dtype=np.int64)
|
||||
return assigned_gt_inds, max_overlaps
|
||||
|
||||
# compute iou between all bbox and gt
|
||||
overlaps = bbox_overlaps(bboxes, gt_bboxes)
|
||||
# compute center distance between all bbox and gt
|
||||
gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
|
||||
gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
|
||||
gt_points = np.stack((gt_cx, gt_cy), axis=1)
|
||||
|
||||
bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0
|
||||
bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0
|
||||
bboxes_points = np.stack((bboxes_cx, bboxes_cy), axis=1)
|
||||
|
||||
distances = np.sqrt(
|
||||
np.power((bboxes_points[:, None, :] - gt_points[None, :, :]), 2)
|
||||
.sum(-1))
|
||||
|
||||
# Selecting candidates based on the center distance
|
||||
candidate_idxs = []
|
||||
start_idx = 0
|
||||
for bboxes_per_level in num_level_bboxes:
|
||||
# on each pyramid level, for each gt,
|
||||
# select k bbox whose center are closest to the gt center
|
||||
end_idx = start_idx + bboxes_per_level
|
||||
distances_per_level = distances[start_idx:end_idx, :]
|
||||
selectable_k = min(self.topk, bboxes_per_level)
|
||||
_, topk_idxs_per_level = topk_(
|
||||
distances_per_level, selectable_k, axis=0, largest=False)
|
||||
candidate_idxs.append(topk_idxs_per_level + start_idx)
|
||||
start_idx = end_idx
|
||||
candidate_idxs = np.concatenate(candidate_idxs, axis=0)
|
||||
|
||||
# get corresponding iou for the these candidates, and compute the
|
||||
# mean and std, set mean + std as the iou threshold
|
||||
candidate_overlaps = overlaps[candidate_idxs, np.arange(num_gt)]
|
||||
overlaps_mean_per_gt = candidate_overlaps.mean(0)
|
||||
overlaps_std_per_gt = candidate_overlaps.std(0)
|
||||
overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
|
||||
|
||||
is_pos = candidate_overlaps >= overlaps_thr_per_gt[None, :]
|
||||
|
||||
# limit the positive sample's center in gt
|
||||
for gt_idx in range(num_gt):
|
||||
candidate_idxs[:, gt_idx] += gt_idx * num_bboxes
|
||||
ep_bboxes_cx = np.broadcast_to(
|
||||
bboxes_cx.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1)
|
||||
ep_bboxes_cy = np.broadcast_to(
|
||||
bboxes_cy.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1)
|
||||
candidate_idxs = candidate_idxs.reshape(-1)
|
||||
|
||||
# calculate the left, top, right, bottom distance between positive
|
||||
# bbox center and gt side
|
||||
l_ = ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 0]
|
||||
t_ = ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 1]
|
||||
r_ = gt_bboxes[:, 2] - ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt)
|
||||
b_ = gt_bboxes[:, 3] - ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt)
|
||||
is_in_gts = np.stack([l_, t_, r_, b_], axis=1).min(axis=1) > 0.01
|
||||
is_pos = is_pos & is_in_gts
|
||||
|
||||
# if an anchor box is assigned to multiple gts,
|
||||
# the one with the highest IoU will be selected.
|
||||
overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1)
|
||||
index = candidate_idxs.reshape(-1)[is_pos.reshape(-1)]
|
||||
overlaps_inf[index] = overlaps.T.reshape(-1)[index]
|
||||
overlaps_inf = overlaps_inf.reshape(num_gt, -1).T
|
||||
|
||||
max_overlaps = overlaps_inf.max(axis=1)
|
||||
argmax_overlaps = overlaps_inf.argmax(axis=1)
|
||||
assigned_gt_inds[max_overlaps !=
|
||||
-np.inf] = argmax_overlaps[max_overlaps != -np.inf] + 1
|
||||
|
||||
return assigned_gt_inds, max_overlaps
|
||||
|
||||
def get_vlr_region(self,
|
||||
bboxes,
|
||||
num_level_bboxes,
|
||||
gt_bboxes,
|
||||
gt_bboxes_ignore=None,
|
||||
gt_labels=None):
|
||||
"""get vlr region for ld distillation.
|
||||
Args:
|
||||
bboxes (np.array): Bounding boxes to be assigned, shape(n, 4).
|
||||
num_level_bboxes (List): num of bboxes in each level
|
||||
gt_bboxes (np.array): Groundtruth boxes, shape (k, 4).
|
||||
gt_bboxes_ignore (np.array, optional): Ground truth bboxes that are
|
||||
labelled as `ignored`, e.g., crowd boxes in COCO.
|
||||
gt_labels (np.array, optional): Label of gt_bboxes, shape (k, ).
|
||||
"""
|
||||
bboxes = bboxes[:, :4]
|
||||
|
||||
num_gt, num_bboxes = gt_bboxes.shape[0], bboxes.shape[0]
|
||||
|
||||
# compute iou between all bbox and gt
|
||||
overlaps = bbox_overlaps(bboxes, gt_bboxes)
|
||||
|
||||
# compute diou between all bbox and gt
|
||||
diou = bbox_overlaps(bboxes, gt_bboxes, mode='diou')
|
||||
|
||||
# assign 0 by default
|
||||
assigned_gt_inds = np.zeros((num_bboxes, ), dtype=np.int64)
|
||||
|
||||
vlr_region_iou = (assigned_gt_inds + 0).astype(np.float32)
|
||||
|
||||
if num_gt == 0 or num_bboxes == 0:
|
||||
# No ground truth or boxes, return empty assignment
|
||||
max_overlaps = np.zeros((num_bboxes, ))
|
||||
if num_gt == 0:
|
||||
# No truth, assign everything to background
|
||||
assigned_gt_inds[:] = 0
|
||||
if not np.any(gt_labels):
|
||||
assigned_labels = None
|
||||
else:
|
||||
assigned_labels = -np.ones((num_bboxes, ), dtype=np.int64)
|
||||
return assigned_gt_inds, max_overlaps
|
||||
|
||||
# compute center distance between all bbox and gt
|
||||
gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
|
||||
gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
|
||||
gt_points = np.stack((gt_cx, gt_cy), axis=1)
|
||||
|
||||
bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0
|
||||
bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0
|
||||
bboxes_points = np.stack((bboxes_cx, bboxes_cy), axis=1)
|
||||
|
||||
distances = np.sqrt(
|
||||
np.power((bboxes_points[:, None, :] - gt_points[None, :, :]), 2)
|
||||
.sum(-1))
|
||||
|
||||
# Selecting candidates based on the center distance
|
||||
candidate_idxs = []
|
||||
candidate_idxs_t = []
|
||||
start_idx = 0
|
||||
for bboxes_per_level in num_level_bboxes:
|
||||
# on each pyramid level, for each gt,
|
||||
# select k bbox whose center are closest to the gt center
|
||||
end_idx = start_idx + bboxes_per_level
|
||||
distances_per_level = distances[start_idx:end_idx, :]
|
||||
selectable_t = min(self.topk, bboxes_per_level)
|
||||
selectable_k = bboxes_per_level #k for all
|
||||
_, topt_idxs_per_level = topk_(
|
||||
distances_per_level, selectable_t, axis=0, largest=False)
|
||||
_, topk_idxs_per_level = topk_(
|
||||
distances_per_level, selectable_k, axis=0, largest=False)
|
||||
candidate_idxs_t.append(topt_idxs_per_level + start_idx)
|
||||
candidate_idxs.append(topk_idxs_per_level + start_idx)
|
||||
start_idx = end_idx
|
||||
|
||||
candidate_idxs_t = np.concatenate(candidate_idxs_t, axis=0)
|
||||
candidate_idxs = np.concatenate(candidate_idxs, axis=0)
|
||||
|
||||
# get corresponding iou for the these candidates, and compute the
|
||||
# mean and std, set mean + std as the iou threshold
|
||||
candidate_overlaps_t = overlaps[candidate_idxs_t, np.arange(num_gt)]
|
||||
|
||||
# compute tdiou
|
||||
t_diou = diou[candidate_idxs, np.arange(num_gt)]
|
||||
|
||||
overlaps_mean_per_gt = candidate_overlaps_t.mean(0)
|
||||
overlaps_std_per_gt = candidate_overlaps_t.std(
|
||||
0, ddof=1) # NOTE: use Bessel correction
|
||||
overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
|
||||
|
||||
# compute region
|
||||
is_pos = (t_diou < overlaps_thr_per_gt[None, :]) & (
|
||||
t_diou >= 0.25 * overlaps_thr_per_gt[None, :])
|
||||
|
||||
# limit the positive sample's center in gt
|
||||
for gt_idx in range(num_gt):
|
||||
candidate_idxs[:, gt_idx] += gt_idx * num_bboxes
|
||||
|
||||
candidate_idxs = candidate_idxs.reshape(-1)
|
||||
|
||||
# if an anchor box is assigned to multiple gts,
|
||||
# the one with the highest IoU will be selected.
|
||||
overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1)
|
||||
index = candidate_idxs.reshape(-1)[is_pos.reshape(-1)]
|
||||
|
||||
overlaps_inf[index] = overlaps.T.reshape(-1)[index]
|
||||
overlaps_inf = overlaps_inf.reshape(num_gt, -1).T
|
||||
|
||||
max_overlaps = overlaps_inf.max(axis=1)
|
||||
argmax_overlaps = overlaps_inf.argmax(axis=1)
|
||||
|
||||
overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1)
|
||||
overlaps_inf = overlaps_inf.reshape(num_gt, -1).T
|
||||
|
||||
assigned_gt_inds[max_overlaps !=
|
||||
-np.inf] = argmax_overlaps[max_overlaps != -np.inf] + 1
|
||||
|
||||
vlr_region_iou[max_overlaps !=
|
||||
-np.inf] = max_overlaps[max_overlaps != -np.inf] + 0
|
||||
|
||||
return vlr_region_iou
|
||||
1586
paddle_detection/ppdet/data/transform/autoaugment_utils.py
Normal file
1586
paddle_detection/ppdet/data/transform/autoaugment_utils.py
Normal file
File diff suppressed because it is too large
Load Diff
1532
paddle_detection/ppdet/data/transform/batch_operators.py
Normal file
1532
paddle_detection/ppdet/data/transform/batch_operators.py
Normal file
File diff suppressed because it is too large
Load Diff
366
paddle_detection/ppdet/data/transform/culane_operators.py
Normal file
366
paddle_detection/ppdet/data/transform/culane_operators.py
Normal file
@@ -0,0 +1,366 @@
|
||||
import numpy as np
|
||||
import imgaug.augmenters as iaa
|
||||
from .operators import BaseOperator, register_op
|
||||
from ppdet.utils.logger import setup_logger
|
||||
from ppdet.data.culane_utils import linestrings_to_lanes, transform_annotation
|
||||
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
__all__ = [
|
||||
"CULaneTrainProcess", "CULaneDataProcess", "HorizontalFlip",
|
||||
"ChannelShuffle", "CULaneAffine", "CULaneResize", "OneOfBlur",
|
||||
"MultiplyAndAddToBrightness", "AddToHueAndSaturation"
|
||||
]
|
||||
|
||||
|
||||
def trainTransforms(img_h, img_w):
|
||||
transforms = [{
|
||||
'name': 'Resize',
|
||||
'parameters': dict(size=dict(
|
||||
height=img_h, width=img_w)),
|
||||
'p': 1.0
|
||||
}, {
|
||||
'name': 'HorizontalFlip',
|
||||
'parameters': dict(p=1.0),
|
||||
'p': 0.5
|
||||
}, {
|
||||
'name': 'ChannelShuffle',
|
||||
'parameters': dict(p=1.0),
|
||||
'p': 0.1
|
||||
}, {
|
||||
'name': 'MultiplyAndAddToBrightness',
|
||||
'parameters': dict(
|
||||
mul=(0.85, 1.15), add=(-10, 10)),
|
||||
'p': 0.6
|
||||
}, {
|
||||
'name': 'AddToHueAndSaturation',
|
||||
'parameters': dict(value=(-10, 10)),
|
||||
'p': 0.7
|
||||
}, {
|
||||
'name': 'OneOf',
|
||||
'transforms': [
|
||||
dict(
|
||||
name='MotionBlur', parameters=dict(k=(3, 5))), dict(
|
||||
name='MedianBlur', parameters=dict(k=(3, 5)))
|
||||
],
|
||||
'p': 0.2
|
||||
}, {
|
||||
'name': 'Affine',
|
||||
'parameters': dict(
|
||||
translate_percent=dict(
|
||||
x=(-0.1, 0.1), y=(-0.1, 0.1)),
|
||||
rotate=(-10, 10),
|
||||
scale=(0.8, 1.2)),
|
||||
'p': 0.7
|
||||
}, {
|
||||
'name': 'Resize',
|
||||
'parameters': dict(size=dict(
|
||||
height=img_h, width=img_w)),
|
||||
'p': 1.0
|
||||
}]
|
||||
return transforms
|
||||
|
||||
|
||||
@register_op
|
||||
class CULaneTrainProcess(BaseOperator):
|
||||
def __init__(self, img_w, img_h):
|
||||
super(CULaneTrainProcess, self).__init__()
|
||||
self.img_w = img_w
|
||||
self.img_h = img_h
|
||||
self.transforms = trainTransforms(self.img_h, self.img_w)
|
||||
|
||||
if self.transforms is not None:
|
||||
img_transforms = []
|
||||
for aug in self.transforms:
|
||||
p = aug['p']
|
||||
if aug['name'] != 'OneOf':
|
||||
img_transforms.append(
|
||||
iaa.Sometimes(
|
||||
p=p,
|
||||
then_list=getattr(iaa, aug['name'])(**aug[
|
||||
'parameters'])))
|
||||
else:
|
||||
img_transforms.append(
|
||||
iaa.Sometimes(
|
||||
p=p,
|
||||
then_list=iaa.OneOf([
|
||||
getattr(iaa, aug_['name'])(**aug_['parameters'])
|
||||
for aug_ in aug['transforms']
|
||||
])))
|
||||
else:
|
||||
img_transforms = []
|
||||
self.iaa_transform = iaa.Sequential(img_transforms)
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
img, line_strings, seg = self.iaa_transform(
|
||||
image=sample['image'],
|
||||
line_strings=sample['lanes'],
|
||||
segmentation_maps=sample['mask'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
sample['mask'] = seg
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class CULaneDataProcess(BaseOperator):
|
||||
def __init__(self, img_w, img_h, num_points, max_lanes):
|
||||
super(CULaneDataProcess, self).__init__()
|
||||
self.img_w = img_w
|
||||
self.img_h = img_h
|
||||
self.num_points = num_points
|
||||
self.n_offsets = num_points
|
||||
self.n_strips = num_points - 1
|
||||
self.strip_size = self.img_h / self.n_strips
|
||||
|
||||
self.max_lanes = max_lanes
|
||||
self.offsets_ys = np.arange(self.img_h, -1, -self.strip_size)
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
data = {}
|
||||
line_strings = sample['lanes']
|
||||
line_strings.clip_out_of_image_()
|
||||
new_anno = {'lanes': linestrings_to_lanes(line_strings)}
|
||||
|
||||
for i in range(30):
|
||||
try:
|
||||
annos = transform_annotation(
|
||||
self.img_w, self.img_h, self.max_lanes, self.n_offsets,
|
||||
self.offsets_ys, self.n_strips, self.strip_size, new_anno)
|
||||
label = annos['label']
|
||||
lane_endpoints = annos['lane_endpoints']
|
||||
break
|
||||
except:
|
||||
if (i + 1) == 30:
|
||||
logger.critical('Transform annotation failed 30 times :(')
|
||||
exit()
|
||||
|
||||
sample['image'] = sample['image'].astype(np.float32) / 255.
|
||||
data['image'] = sample['image'].transpose(2, 0, 1)
|
||||
data['lane_line'] = label
|
||||
data['seg'] = sample['seg']
|
||||
data['full_img_path'] = sample['full_img_path']
|
||||
data['img_name'] = sample['img_name']
|
||||
data['im_id'] = sample['im_id']
|
||||
|
||||
if 'mask' in sample.keys():
|
||||
data['seg'] = sample['mask'].get_arr()
|
||||
|
||||
data['im_shape'] = np.array([self.img_w, self.img_h], dtype=np.float32)
|
||||
data['scale_factor'] = np.array([1., 1.], dtype=np.float32)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
@register_op
|
||||
class CULaneResize(BaseOperator):
|
||||
def __init__(self, img_h, img_w, prob=0.5):
|
||||
super(CULaneResize, self).__init__()
|
||||
self.img_h = img_h
|
||||
self.img_w = img_w
|
||||
self.prob = prob
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
transform = iaa.Sometimes(self.prob,
|
||||
iaa.Resize({
|
||||
"height": self.img_h,
|
||||
"width": self.img_w
|
||||
}))
|
||||
if 'mask' in sample.keys():
|
||||
img, line_strings, seg = transform(
|
||||
image=sample['image'],
|
||||
line_strings=sample['lanes'],
|
||||
segmentation_maps=sample['mask'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
sample['mask'] = seg
|
||||
else:
|
||||
img, line_strings = transform(
|
||||
image=sample['image'].copy().astype(np.uint8),
|
||||
line_strings=sample['lanes'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class HorizontalFlip(BaseOperator):
|
||||
def __init__(self, prob=0.5):
|
||||
super(HorizontalFlip, self).__init__()
|
||||
self.prob = prob
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
transform = iaa.Sometimes(self.prob, iaa.HorizontalFlip(1.0))
|
||||
if 'mask' in sample.keys():
|
||||
img, line_strings, seg = transform(
|
||||
image=sample['image'],
|
||||
line_strings=sample['lanes'],
|
||||
segmentation_maps=sample['mask'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
sample['mask'] = seg
|
||||
else:
|
||||
img, line_strings = transform(
|
||||
image=sample['image'], line_strings=sample['lanes'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class ChannelShuffle(BaseOperator):
|
||||
def __init__(self, prob=0.1):
|
||||
super(ChannelShuffle, self).__init__()
|
||||
self.prob = prob
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
transform = iaa.Sometimes(self.prob, iaa.ChannelShuffle(1.0))
|
||||
if 'mask' in sample.keys():
|
||||
img, line_strings, seg = transform(
|
||||
image=sample['image'],
|
||||
line_strings=sample['lanes'],
|
||||
segmentation_maps=sample['mask'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
sample['mask'] = seg
|
||||
else:
|
||||
img, line_strings = transform(
|
||||
image=sample['image'], line_strings=sample['lanes'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class MultiplyAndAddToBrightness(BaseOperator):
|
||||
def __init__(self, mul=(0.85, 1.15), add=(-10, 10), prob=0.5):
|
||||
super(MultiplyAndAddToBrightness, self).__init__()
|
||||
self.mul = tuple(mul)
|
||||
self.add = tuple(add)
|
||||
self.prob = prob
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
transform = iaa.Sometimes(
|
||||
self.prob,
|
||||
iaa.MultiplyAndAddToBrightness(
|
||||
mul=self.mul, add=self.add))
|
||||
if 'mask' in sample.keys():
|
||||
img, line_strings, seg = transform(
|
||||
image=sample['image'],
|
||||
line_strings=sample['lanes'],
|
||||
segmentation_maps=sample['mask'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
sample['mask'] = seg
|
||||
else:
|
||||
img, line_strings = transform(
|
||||
image=sample['image'], line_strings=sample['lanes'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class AddToHueAndSaturation(BaseOperator):
|
||||
def __init__(self, value=(-10, 10), prob=0.5):
|
||||
super(AddToHueAndSaturation, self).__init__()
|
||||
self.value = tuple(value)
|
||||
self.prob = prob
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
transform = iaa.Sometimes(
|
||||
self.prob, iaa.AddToHueAndSaturation(value=self.value))
|
||||
if 'mask' in sample.keys():
|
||||
img, line_strings, seg = transform(
|
||||
image=sample['image'],
|
||||
line_strings=sample['lanes'],
|
||||
segmentation_maps=sample['mask'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
sample['mask'] = seg
|
||||
else:
|
||||
img, line_strings = transform(
|
||||
image=sample['image'], line_strings=sample['lanes'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class OneOfBlur(BaseOperator):
|
||||
def __init__(self, MotionBlur_k=(3, 5), MedianBlur_k=(3, 5), prob=0.5):
|
||||
super(OneOfBlur, self).__init__()
|
||||
self.MotionBlur_k = tuple(MotionBlur_k)
|
||||
self.MedianBlur_k = tuple(MedianBlur_k)
|
||||
self.prob = prob
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
transform = iaa.Sometimes(
|
||||
self.prob,
|
||||
iaa.OneOf([
|
||||
iaa.MotionBlur(k=self.MotionBlur_k),
|
||||
iaa.MedianBlur(k=self.MedianBlur_k)
|
||||
]))
|
||||
|
||||
if 'mask' in sample.keys():
|
||||
img, line_strings, seg = transform(
|
||||
image=sample['image'],
|
||||
line_strings=sample['lanes'],
|
||||
segmentation_maps=sample['mask'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
sample['mask'] = seg
|
||||
else:
|
||||
img, line_strings = transform(
|
||||
image=sample['image'], line_strings=sample['lanes'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class CULaneAffine(BaseOperator):
|
||||
def __init__(self,
|
||||
translate_percent_x=(-0.1, 0.1),
|
||||
translate_percent_y=(-0.1, 0.1),
|
||||
rotate=(3, 5),
|
||||
scale=(0.8, 1.2),
|
||||
prob=0.5):
|
||||
super(CULaneAffine, self).__init__()
|
||||
self.translate_percent = {
|
||||
'x': tuple(translate_percent_x),
|
||||
'y': tuple(translate_percent_y)
|
||||
}
|
||||
self.rotate = tuple(rotate)
|
||||
self.scale = tuple(scale)
|
||||
self.prob = prob
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
transform = iaa.Sometimes(
|
||||
self.prob,
|
||||
iaa.Affine(
|
||||
translate_percent=self.translate_percent,
|
||||
rotate=self.rotate,
|
||||
scale=self.scale))
|
||||
|
||||
if 'mask' in sample.keys():
|
||||
img, line_strings, seg = transform(
|
||||
image=sample['image'],
|
||||
line_strings=sample['lanes'],
|
||||
segmentation_maps=sample['mask'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
sample['mask'] = seg
|
||||
else:
|
||||
img, line_strings = transform(
|
||||
image=sample['image'], line_strings=sample['lanes'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
|
||||
return sample
|
||||
86
paddle_detection/ppdet/data/transform/gridmask_utils.py
Normal file
86
paddle_detection/ppdet/data/transform/gridmask_utils.py
Normal file
@@ -0,0 +1,86 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# The code is based on:
|
||||
# https://github.com/dvlab-research/GridMask/blob/master/detection_grid/maskrcnn_benchmark/data/transforms/grid.py
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
from __future__ import division
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class Gridmask(object):
|
||||
def __init__(self,
|
||||
use_h=True,
|
||||
use_w=True,
|
||||
rotate=1,
|
||||
offset=False,
|
||||
ratio=0.5,
|
||||
mode=1,
|
||||
prob=0.7,
|
||||
upper_iter=360000):
|
||||
super(Gridmask, self).__init__()
|
||||
self.use_h = use_h
|
||||
self.use_w = use_w
|
||||
self.rotate = rotate
|
||||
self.offset = offset
|
||||
self.ratio = ratio
|
||||
self.mode = mode
|
||||
self.prob = prob
|
||||
self.st_prob = prob
|
||||
self.upper_iter = upper_iter
|
||||
|
||||
def __call__(self, x, curr_iter):
|
||||
self.prob = self.st_prob * min(1, 1.0 * curr_iter / self.upper_iter)
|
||||
if np.random.rand() > self.prob:
|
||||
return x
|
||||
h, w, _ = x.shape
|
||||
hh = int(1.5 * h)
|
||||
ww = int(1.5 * w)
|
||||
d = np.random.randint(2, h)
|
||||
self.l = min(max(int(d * self.ratio + 0.5), 1), d - 1)
|
||||
mask = np.ones((hh, ww), np.float32)
|
||||
st_h = np.random.randint(d)
|
||||
st_w = np.random.randint(d)
|
||||
if self.use_h:
|
||||
for i in range(hh // d):
|
||||
s = d * i + st_h
|
||||
t = min(s + self.l, hh)
|
||||
mask[s:t, :] *= 0
|
||||
if self.use_w:
|
||||
for i in range(ww // d):
|
||||
s = d * i + st_w
|
||||
t = min(s + self.l, ww)
|
||||
mask[:, s:t] *= 0
|
||||
|
||||
r = np.random.randint(self.rotate)
|
||||
mask = Image.fromarray(np.uint8(mask))
|
||||
mask = mask.rotate(r)
|
||||
mask = np.asarray(mask)
|
||||
mask = mask[(hh - h) // 2:(hh - h) // 2 + h, (ww - w) // 2:(ww - w) // 2
|
||||
+ w].astype(np.float32)
|
||||
|
||||
if self.mode == 1:
|
||||
mask = 1 - mask
|
||||
mask = np.expand_dims(mask, axis=-1)
|
||||
if self.offset:
|
||||
offset = (2 * (np.random.rand(h, w) - 0.5)).astype(np.float32)
|
||||
x = (x * mask + offset * (1 - mask)).astype(x.dtype)
|
||||
else:
|
||||
x = (x * mask).astype(x.dtype)
|
||||
|
||||
return x
|
||||
1742
paddle_detection/ppdet/data/transform/keypoint_operators.py
Normal file
1742
paddle_detection/ppdet/data/transform/keypoint_operators.py
Normal file
File diff suppressed because it is too large
Load Diff
296
paddle_detection/ppdet/data/transform/keypoints_3d_operators.py
Normal file
296
paddle_detection/ppdet/data/transform/keypoints_3d_operators.py
Normal file
@@ -0,0 +1,296 @@
|
||||
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
try:
|
||||
from collections.abc import Sequence
|
||||
except Exception:
|
||||
from collections import Sequence
|
||||
import cv2
|
||||
import numpy as np
|
||||
import math
|
||||
import copy
|
||||
import random
|
||||
import uuid
|
||||
from numbers import Number, Integral
|
||||
|
||||
from ...modeling.keypoint_utils import get_affine_mat_kernel, warp_affine_joints, get_affine_transform, affine_transform, get_warp_matrix
|
||||
from ppdet.core.workspace import serializable
|
||||
from ppdet.utils.logger import setup_logger
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
registered_ops = []
|
||||
|
||||
__all__ = [
|
||||
'CropAndFlipImages', 'PermuteImages', 'RandomFlipHalfBody3DTransformImages'
|
||||
]
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
from PIL import Image, ImageDraw
|
||||
from mpl_toolkits.mplot3d import Axes3D
|
||||
|
||||
|
||||
def register_keypointop(cls):
|
||||
return serializable(cls)
|
||||
|
||||
|
||||
def register_op(cls):
|
||||
registered_ops.append(cls.__name__)
|
||||
if not hasattr(BaseOperator, cls.__name__):
|
||||
setattr(BaseOperator, cls.__name__, cls)
|
||||
else:
|
||||
raise KeyError("The {} class has been registered.".format(cls.__name__))
|
||||
return serializable(cls)
|
||||
|
||||
|
||||
class BaseOperator(object):
|
||||
def __init__(self, name=None):
|
||||
if name is None:
|
||||
name = self.__class__.__name__
|
||||
self._id = name + '_' + str(uuid.uuid4())[-6:]
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
""" Process a sample.
|
||||
Args:
|
||||
sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
|
||||
context (dict): info about this sample processing
|
||||
Returns:
|
||||
result (dict): a processed sample
|
||||
"""
|
||||
return sample
|
||||
|
||||
def __call__(self, sample, context=None):
|
||||
""" Process a sample.
|
||||
Args:
|
||||
sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
|
||||
context (dict): info about this sample processing
|
||||
Returns:
|
||||
result (dict): a processed sample
|
||||
"""
|
||||
if isinstance(sample, Sequence): # for batch_size
|
||||
for i in range(len(sample)):
|
||||
sample[i] = self.apply(sample[i], context)
|
||||
else:
|
||||
# image.shape changed
|
||||
sample = self.apply(sample, context)
|
||||
return sample
|
||||
|
||||
def __str__(self):
|
||||
return str(self._id)
|
||||
|
||||
|
||||
@register_keypointop
|
||||
class CropAndFlipImages(object):
|
||||
"""Crop all images"""
|
||||
|
||||
def __init__(self, crop_range, flip_pairs=None):
|
||||
super(CropAndFlipImages, self).__init__()
|
||||
self.crop_range = crop_range
|
||||
self.flip_pairs = flip_pairs
|
||||
|
||||
def __call__(self, records): # tuple
|
||||
images = records["image"]
|
||||
images = images[:, :, ::-1, :]
|
||||
images = images[:, :, self.crop_range[0]:self.crop_range[1]]
|
||||
records["image"] = images
|
||||
|
||||
if "kps2d" in records.keys():
|
||||
kps2d = records["kps2d"]
|
||||
|
||||
width, height = images.shape[2], images.shape[1]
|
||||
kps2d = np.array(kps2d)
|
||||
kps2d[:, :, 0] = kps2d[:, :, 0] - self.crop_range[0]
|
||||
|
||||
for pair in self.flip_pairs:
|
||||
kps2d[:, pair[0], :], kps2d[:,pair[1], :] = \
|
||||
kps2d[:,pair[1], :], kps2d[:,pair[0], :].copy()
|
||||
|
||||
records["kps2d"] = kps2d
|
||||
|
||||
return records
|
||||
|
||||
|
||||
@register_op
|
||||
class PermuteImages(BaseOperator):
|
||||
def __init__(self):
|
||||
"""
|
||||
Change the channel to be (batch_size, C, H, W) #(6, 3, 1080, 1920)
|
||||
"""
|
||||
super(PermuteImages, self).__init__()
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
images = sample["image"]
|
||||
images = images.transpose((0, 3, 1, 2))
|
||||
|
||||
sample["image"] = images
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_keypointop
|
||||
class RandomFlipHalfBody3DTransformImages(object):
|
||||
"""apply data augment to images and coords
|
||||
to achieve the flip, scale, rotate and half body transform effect for training image
|
||||
Args:
|
||||
trainsize (list):[w, h], Image target size
|
||||
upper_body_ids (list): The upper body joint ids
|
||||
flip_pairs (list): The left-right joints exchange order list
|
||||
pixel_std (int): The pixel std of the scale
|
||||
scale (float): The scale factor to transform the image
|
||||
rot (int): The rotate factor to transform the image
|
||||
num_joints_half_body (int): The joints threshold of the half body transform
|
||||
prob_half_body (float): The threshold of the half body transform
|
||||
flip (bool): Whether to flip the image
|
||||
Returns:
|
||||
records(dict): contain the image and coords after tranformed
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
trainsize,
|
||||
upper_body_ids,
|
||||
flip_pairs,
|
||||
pixel_std,
|
||||
scale=0.35,
|
||||
rot=40,
|
||||
num_joints_half_body=8,
|
||||
prob_half_body=0.3,
|
||||
flip=True,
|
||||
rot_prob=0.6,
|
||||
do_occlusion=False):
|
||||
super(RandomFlipHalfBody3DTransformImages, self).__init__()
|
||||
self.trainsize = trainsize
|
||||
self.upper_body_ids = upper_body_ids
|
||||
self.flip_pairs = flip_pairs
|
||||
self.pixel_std = pixel_std
|
||||
self.scale = scale
|
||||
self.rot = rot
|
||||
self.num_joints_half_body = num_joints_half_body
|
||||
self.prob_half_body = prob_half_body
|
||||
self.flip = flip
|
||||
self.aspect_ratio = trainsize[0] * 1.0 / trainsize[1]
|
||||
self.rot_prob = rot_prob
|
||||
self.do_occlusion = do_occlusion
|
||||
|
||||
def halfbody_transform(self, joints, joints_vis):
|
||||
upper_joints = []
|
||||
lower_joints = []
|
||||
for joint_id in range(joints.shape[0]):
|
||||
if joints_vis[joint_id][0] > 0:
|
||||
if joint_id in self.upper_body_ids:
|
||||
upper_joints.append(joints[joint_id])
|
||||
else:
|
||||
lower_joints.append(joints[joint_id])
|
||||
if np.random.randn() < 0.5 and len(upper_joints) > 2:
|
||||
selected_joints = upper_joints
|
||||
else:
|
||||
selected_joints = lower_joints if len(
|
||||
lower_joints) > 2 else upper_joints
|
||||
if len(selected_joints) < 2:
|
||||
return None, None
|
||||
selected_joints = np.array(selected_joints, dtype=np.float32)
|
||||
center = selected_joints.mean(axis=0)[:2]
|
||||
left_top = np.amin(selected_joints, axis=0)
|
||||
right_bottom = np.amax(selected_joints, axis=0)
|
||||
w = right_bottom[0] - left_top[0]
|
||||
h = right_bottom[1] - left_top[1]
|
||||
if w > self.aspect_ratio * h:
|
||||
h = w * 1.0 / self.aspect_ratio
|
||||
elif w < self.aspect_ratio * h:
|
||||
w = h * self.aspect_ratio
|
||||
scale = np.array(
|
||||
[w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
|
||||
dtype=np.float32)
|
||||
scale = scale * 1.5
|
||||
|
||||
return center, scale
|
||||
|
||||
def flip_joints(self, joints, joints_vis, width, matched_parts, kps2d=None):
|
||||
# joints: (6, 24, 3),(num_frames, num_joints, 3)
|
||||
|
||||
joints[:, :, 0] = width - joints[:, :, 0] - 1 # x
|
||||
if kps2d is not None:
|
||||
kps2d[:, :, 0] = width - kps2d[:, :, 0] - 1
|
||||
|
||||
for pair in matched_parts:
|
||||
joints[:, pair[0], :], joints[:,pair[1], :] = \
|
||||
joints[:,pair[1], :], joints[:,pair[0], :].copy()
|
||||
|
||||
joints_vis[:,pair[0], :], joints_vis[:,pair[1], :] = \
|
||||
joints_vis[:,pair[1], :], joints_vis[:,pair[0], :].copy()
|
||||
|
||||
if kps2d is not None:
|
||||
kps2d[:, pair[0], :], kps2d[:,pair[1], :] = \
|
||||
kps2d[:,pair[1], :], kps2d[:,pair[0], :].copy()
|
||||
|
||||
# move to zero
|
||||
joints -= joints[:, [0], :] # (batch_size, 24, 3),numpy.ndarray
|
||||
|
||||
return joints, joints_vis, kps2d
|
||||
|
||||
def __call__(self, records):
|
||||
images = records[
|
||||
'image'] #kps3d, kps3d_vis, images. images.shape(num_frames, width, height, 3)
|
||||
|
||||
joints = records['kps3d']
|
||||
joints_vis = records['kps3d_vis']
|
||||
|
||||
kps2d = None
|
||||
if 'kps2d' in records.keys():
|
||||
kps2d = records['kps2d']
|
||||
|
||||
if self.flip and np.random.random() <= 0.5:
|
||||
images = images[:, :, ::-1, :] # 图像水平翻转 (6, 1080, 810, 3)
|
||||
joints, joints_vis, kps2d = self.flip_joints(
|
||||
joints, joints_vis, images.shape[2], self.flip_pairs,
|
||||
kps2d) # 关键点左右对称翻转
|
||||
occlusion = False
|
||||
if self.do_occlusion and random.random() <= 0.5: # 随机遮挡
|
||||
height = images[0].shape[0]
|
||||
width = images[0].shape[1]
|
||||
occlusion = True
|
||||
while True:
|
||||
area_min = 0.0
|
||||
area_max = 0.2
|
||||
synth_area = (random.random() *
|
||||
(area_max - area_min) + area_min) * width * height
|
||||
|
||||
ratio_min = 0.3
|
||||
ratio_max = 1 / 0.3
|
||||
synth_ratio = (random.random() *
|
||||
(ratio_max - ratio_min) + ratio_min)
|
||||
|
||||
synth_h = math.sqrt(synth_area * synth_ratio)
|
||||
synth_w = math.sqrt(synth_area / synth_ratio)
|
||||
synth_xmin = random.random() * (width - synth_w - 1)
|
||||
synth_ymin = random.random() * (height - synth_h - 1)
|
||||
|
||||
if synth_xmin >= 0 and synth_ymin >= 0 and synth_xmin + synth_w < width and synth_ymin + synth_h < height:
|
||||
xmin = int(synth_xmin)
|
||||
ymin = int(synth_ymin)
|
||||
w = int(synth_w)
|
||||
h = int(synth_h)
|
||||
|
||||
mask = np.random.rand(h, w, 3) * 255
|
||||
images[:, ymin:ymin + h, xmin:xmin + w, :] = mask[
|
||||
None, :, :, :]
|
||||
break
|
||||
|
||||
records['image'] = images
|
||||
records['kps3d'] = joints
|
||||
records['kps3d_vis'] = joints_vis
|
||||
if kps2d is not None:
|
||||
records['kps2d'] = kps2d
|
||||
|
||||
return records
|
||||
627
paddle_detection/ppdet/data/transform/mot_operators.py
Normal file
627
paddle_detection/ppdet/data/transform/mot_operators.py
Normal file
@@ -0,0 +1,627 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
try:
|
||||
from collections.abc import Sequence
|
||||
except Exception:
|
||||
from collections import Sequence
|
||||
from numbers import Integral
|
||||
|
||||
import cv2
|
||||
import copy
|
||||
import numpy as np
|
||||
import random
|
||||
import math
|
||||
|
||||
from .operators import BaseOperator, register_op
|
||||
from .batch_operators import Gt2TTFTarget
|
||||
from ppdet.modeling.bbox_utils import bbox_iou_np_expand
|
||||
from ppdet.utils.logger import setup_logger
|
||||
from .op_helper import gaussian_radius
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
__all__ = [
|
||||
'RGBReverse', 'LetterBoxResize', 'MOTRandomAffine', 'Gt2JDETargetThres',
|
||||
'Gt2JDETargetMax', 'Gt2FairMOTTarget'
|
||||
]
|
||||
|
||||
|
||||
@register_op
|
||||
class RGBReverse(BaseOperator):
|
||||
"""RGB to BGR, or BGR to RGB, sensitive to MOTRandomAffine
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(RGBReverse, self).__init__()
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
im = sample['image']
|
||||
sample['image'] = np.ascontiguousarray(im[:, :, ::-1])
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class LetterBoxResize(BaseOperator):
|
||||
def __init__(self, target_size):
|
||||
"""
|
||||
Resize image to target size, convert normalized xywh to pixel xyxy
|
||||
format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
|
||||
Args:
|
||||
target_size (int|list): image target size.
|
||||
"""
|
||||
super(LetterBoxResize, self).__init__()
|
||||
if not isinstance(target_size, (Integral, Sequence)):
|
||||
raise TypeError(
|
||||
"Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
|
||||
format(type(target_size)))
|
||||
if isinstance(target_size, Integral):
|
||||
target_size = [target_size, target_size]
|
||||
self.target_size = target_size
|
||||
|
||||
def apply_image(self, img, height, width, color=(127.5, 127.5, 127.5)):
|
||||
# letterbox: resize a rectangular image to a padded rectangular
|
||||
shape = img.shape[:2] # [height, width]
|
||||
ratio_h = float(height) / shape[0]
|
||||
ratio_w = float(width) / shape[1]
|
||||
ratio = min(ratio_h, ratio_w)
|
||||
new_shape = (round(shape[1] * ratio),
|
||||
round(shape[0] * ratio)) # [width, height]
|
||||
padw = (width - new_shape[0]) / 2
|
||||
padh = (height - new_shape[1]) / 2
|
||||
top, bottom = round(padh - 0.1), round(padh + 0.1)
|
||||
left, right = round(padw - 0.1), round(padw + 0.1)
|
||||
|
||||
img = cv2.resize(
|
||||
img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
|
||||
img = cv2.copyMakeBorder(
|
||||
img, top, bottom, left, right, cv2.BORDER_CONSTANT,
|
||||
value=color) # padded rectangular
|
||||
return img, ratio, padw, padh
|
||||
|
||||
def apply_bbox(self, bbox0, h, w, ratio, padw, padh):
|
||||
bboxes = bbox0.copy()
|
||||
bboxes[:, 0] = ratio * w * (bbox0[:, 0] - bbox0[:, 2] / 2) + padw
|
||||
bboxes[:, 1] = ratio * h * (bbox0[:, 1] - bbox0[:, 3] / 2) + padh
|
||||
bboxes[:, 2] = ratio * w * (bbox0[:, 0] + bbox0[:, 2] / 2) + padw
|
||||
bboxes[:, 3] = ratio * h * (bbox0[:, 1] + bbox0[:, 3] / 2) + padh
|
||||
return bboxes
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
""" Resize the image numpy.
|
||||
"""
|
||||
im = sample['image']
|
||||
h, w = sample['im_shape']
|
||||
if not isinstance(im, np.ndarray):
|
||||
raise TypeError("{}: image type is not numpy.".format(self))
|
||||
if len(im.shape) != 3:
|
||||
from PIL import UnidentifiedImageError
|
||||
raise UnidentifiedImageError(
|
||||
'{}: image is not 3-dimensional.'.format(self))
|
||||
|
||||
# apply image
|
||||
height, width = self.target_size
|
||||
img, ratio, padw, padh = self.apply_image(
|
||||
im, height=height, width=width)
|
||||
|
||||
sample['image'] = img
|
||||
new_shape = (round(h * ratio), round(w * ratio))
|
||||
sample['im_shape'] = np.asarray(new_shape, dtype=np.float32)
|
||||
sample['scale_factor'] = np.asarray([ratio, ratio], dtype=np.float32)
|
||||
|
||||
# apply bbox
|
||||
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
|
||||
sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], h, w, ratio,
|
||||
padw, padh)
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class MOTRandomAffine(BaseOperator):
|
||||
"""
|
||||
Affine transform to image and coords to achieve the rotate, scale and
|
||||
shift effect for training image.
|
||||
|
||||
Args:
|
||||
degrees (list[2]): the rotate range to apply, transform range is [min, max]
|
||||
translate (list[2]): the translate range to apply, transform range is [min, max]
|
||||
scale (list[2]): the scale range to apply, transform range is [min, max]
|
||||
shear (list[2]): the shear range to apply, transform range is [min, max]
|
||||
borderValue (list[3]): value used in case of a constant border when appling
|
||||
the perspective transformation
|
||||
reject_outside (bool): reject warped bounding bboxes outside of image
|
||||
|
||||
Returns:
|
||||
records(dict): contain the image and coords after tranformed
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
degrees=(-5, 5),
|
||||
translate=(0.10, 0.10),
|
||||
scale=(0.50, 1.20),
|
||||
shear=(-2, 2),
|
||||
borderValue=(127.5, 127.5, 127.5),
|
||||
reject_outside=True):
|
||||
super(MOTRandomAffine, self).__init__()
|
||||
self.degrees = degrees
|
||||
self.translate = translate
|
||||
self.scale = scale
|
||||
self.shear = shear
|
||||
self.borderValue = borderValue
|
||||
self.reject_outside = reject_outside
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
|
||||
border = 0 # width of added border (optional)
|
||||
|
||||
img = sample['image']
|
||||
height, width = img.shape[0], img.shape[1]
|
||||
|
||||
# Rotation and Scale
|
||||
R = np.eye(3)
|
||||
a = random.random() * (self.degrees[1] - self.degrees[0]
|
||||
) + self.degrees[0]
|
||||
s = random.random() * (self.scale[1] - self.scale[0]) + self.scale[0]
|
||||
R[:2] = cv2.getRotationMatrix2D(
|
||||
angle=a, center=(width / 2, height / 2), scale=s)
|
||||
|
||||
# Translation
|
||||
T = np.eye(3)
|
||||
T[0, 2] = (
|
||||
random.random() * 2 - 1
|
||||
) * self.translate[0] * height + border # x translation (pixels)
|
||||
T[1, 2] = (
|
||||
random.random() * 2 - 1
|
||||
) * self.translate[1] * width + border # y translation (pixels)
|
||||
|
||||
# Shear
|
||||
S = np.eye(3)
|
||||
S[0, 1] = math.tan((random.random() *
|
||||
(self.shear[1] - self.shear[0]) + self.shear[0]) *
|
||||
math.pi / 180) # x shear (deg)
|
||||
S[1, 0] = math.tan((random.random() *
|
||||
(self.shear[1] - self.shear[0]) + self.shear[0]) *
|
||||
math.pi / 180) # y shear (deg)
|
||||
|
||||
M = S @T @R # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
|
||||
imw = cv2.warpPerspective(
|
||||
img,
|
||||
M,
|
||||
dsize=(width, height),
|
||||
flags=cv2.INTER_LINEAR,
|
||||
borderValue=self.borderValue) # BGR order borderValue
|
||||
|
||||
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
|
||||
targets = sample['gt_bbox']
|
||||
n = targets.shape[0]
|
||||
points = targets.copy()
|
||||
area0 = (points[:, 2] - points[:, 0]) * (
|
||||
points[:, 3] - points[:, 1])
|
||||
|
||||
# warp points
|
||||
xy = np.ones((n * 4, 3))
|
||||
xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
|
||||
n * 4, 2) # x1y1, x2y2, x1y2, x2y1
|
||||
xy = (xy @M.T)[:, :2].reshape(n, 8)
|
||||
|
||||
# create new boxes
|
||||
x = xy[:, [0, 2, 4, 6]]
|
||||
y = xy[:, [1, 3, 5, 7]]
|
||||
xy = np.concatenate(
|
||||
(x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
|
||||
|
||||
# apply angle-based reduction
|
||||
radians = a * math.pi / 180
|
||||
reduction = max(abs(math.sin(radians)), abs(math.cos(radians)))**0.5
|
||||
x = (xy[:, 2] + xy[:, 0]) / 2
|
||||
y = (xy[:, 3] + xy[:, 1]) / 2
|
||||
w = (xy[:, 2] - xy[:, 0]) * reduction
|
||||
h = (xy[:, 3] - xy[:, 1]) * reduction
|
||||
xy = np.concatenate(
|
||||
(x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
|
||||
|
||||
# reject warped points outside of image
|
||||
if self.reject_outside:
|
||||
np.clip(xy[:, 0], 0, width, out=xy[:, 0])
|
||||
np.clip(xy[:, 2], 0, width, out=xy[:, 2])
|
||||
np.clip(xy[:, 1], 0, height, out=xy[:, 1])
|
||||
np.clip(xy[:, 3], 0, height, out=xy[:, 3])
|
||||
w = xy[:, 2] - xy[:, 0]
|
||||
h = xy[:, 3] - xy[:, 1]
|
||||
area = w * h
|
||||
ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
|
||||
i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
|
||||
|
||||
if sum(i) > 0:
|
||||
sample['gt_bbox'] = xy[i].astype(sample['gt_bbox'].dtype)
|
||||
sample['gt_class'] = sample['gt_class'][i]
|
||||
if 'difficult' in sample:
|
||||
sample['difficult'] = sample['difficult'][i]
|
||||
if 'gt_ide' in sample:
|
||||
sample['gt_ide'] = sample['gt_ide'][i]
|
||||
if 'is_crowd' in sample:
|
||||
sample['is_crowd'] = sample['is_crowd'][i]
|
||||
sample['image'] = imw
|
||||
return sample
|
||||
else:
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class Gt2JDETargetThres(BaseOperator):
|
||||
__shared__ = ['num_classes']
|
||||
"""
|
||||
Generate JDE targets by groud truth data when training
|
||||
Args:
|
||||
anchors (list): anchors of JDE model
|
||||
anchor_masks (list): anchor_masks of JDE model
|
||||
downsample_ratios (list): downsample ratios of JDE model
|
||||
ide_thresh (float): thresh of identity, higher is groud truth
|
||||
fg_thresh (float): thresh of foreground, higher is foreground
|
||||
bg_thresh (float): thresh of background, lower is background
|
||||
num_classes (int): number of classes
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
anchors,
|
||||
anchor_masks,
|
||||
downsample_ratios,
|
||||
ide_thresh=0.5,
|
||||
fg_thresh=0.5,
|
||||
bg_thresh=0.4,
|
||||
num_classes=1):
|
||||
super(Gt2JDETargetThres, self).__init__()
|
||||
self.anchors = anchors
|
||||
self.anchor_masks = anchor_masks
|
||||
self.downsample_ratios = downsample_ratios
|
||||
self.ide_thresh = ide_thresh
|
||||
self.fg_thresh = fg_thresh
|
||||
self.bg_thresh = bg_thresh
|
||||
self.num_classes = num_classes
|
||||
|
||||
def generate_anchor(self, nGh, nGw, anchor_hw):
|
||||
nA = len(anchor_hw)
|
||||
yy, xx = np.meshgrid(np.arange(nGh), np.arange(nGw))
|
||||
|
||||
mesh = np.stack([xx.T, yy.T], axis=0) # [2, nGh, nGw]
|
||||
mesh = np.repeat(mesh[None, :], nA, axis=0) # [nA, 2, nGh, nGw]
|
||||
|
||||
anchor_offset_mesh = anchor_hw[:, :, None][:, :, :, None]
|
||||
anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGh, axis=-2)
|
||||
anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGw, axis=-1)
|
||||
|
||||
anchor_mesh = np.concatenate(
|
||||
[mesh, anchor_offset_mesh], axis=1) # [nA, 4, nGh, nGw]
|
||||
return anchor_mesh
|
||||
|
||||
def encode_delta(self, gt_box_list, fg_anchor_list):
|
||||
px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \
|
||||
fg_anchor_list[:, 2], fg_anchor_list[:,3]
|
||||
gx, gy, gw, gh = gt_box_list[:, 0], gt_box_list[:, 1], \
|
||||
gt_box_list[:, 2], gt_box_list[:, 3]
|
||||
dx = (gx - px) / pw
|
||||
dy = (gy - py) / ph
|
||||
dw = np.log(gw / pw)
|
||||
dh = np.log(gh / ph)
|
||||
return np.stack([dx, dy, dw, dh], axis=1)
|
||||
|
||||
def pad_box(self, sample, num_max):
|
||||
assert 'gt_bbox' in sample
|
||||
bbox = sample['gt_bbox']
|
||||
gt_num = len(bbox)
|
||||
pad_bbox = np.zeros((num_max, 4), dtype=np.float32)
|
||||
if gt_num > 0:
|
||||
pad_bbox[:gt_num, :] = bbox[:gt_num, :]
|
||||
sample['gt_bbox'] = pad_bbox
|
||||
if 'gt_score' in sample:
|
||||
pad_score = np.zeros((num_max, ), dtype=np.float32)
|
||||
if gt_num > 0:
|
||||
pad_score[:gt_num] = sample['gt_score'][:gt_num, 0]
|
||||
sample['gt_score'] = pad_score
|
||||
if 'difficult' in sample:
|
||||
pad_diff = np.zeros((num_max, ), dtype=np.int32)
|
||||
if gt_num > 0:
|
||||
pad_diff[:gt_num] = sample['difficult'][:gt_num, 0]
|
||||
sample['difficult'] = pad_diff
|
||||
if 'is_crowd' in sample:
|
||||
pad_crowd = np.zeros((num_max, ), dtype=np.int32)
|
||||
if gt_num > 0:
|
||||
pad_crowd[:gt_num] = sample['is_crowd'][:gt_num, 0]
|
||||
sample['is_crowd'] = pad_crowd
|
||||
if 'gt_ide' in sample:
|
||||
pad_ide = np.zeros((num_max, ), dtype=np.int32)
|
||||
if gt_num > 0:
|
||||
pad_ide[:gt_num] = sample['gt_ide'][:gt_num, 0]
|
||||
sample['gt_ide'] = pad_ide
|
||||
return sample
|
||||
|
||||
def __call__(self, samples, context=None):
|
||||
assert len(self.anchor_masks) == len(self.downsample_ratios), \
|
||||
"anchor_masks', and 'downsample_ratios' should have same length."
|
||||
h, w = samples[0]['image'].shape[1:3]
|
||||
|
||||
num_max = 0
|
||||
for sample in samples:
|
||||
num_max = max(num_max, len(sample['gt_bbox']))
|
||||
|
||||
for sample in samples:
|
||||
gt_bbox = sample['gt_bbox']
|
||||
gt_ide = sample['gt_ide']
|
||||
for i, (anchor_hw, downsample_ratio
|
||||
) in enumerate(zip(self.anchors, self.downsample_ratios)):
|
||||
anchor_hw = np.array(
|
||||
anchor_hw, dtype=np.float32) / downsample_ratio
|
||||
nA = len(anchor_hw)
|
||||
nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio)
|
||||
tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32)
|
||||
tconf = np.zeros((nA, nGh, nGw), dtype=np.float32)
|
||||
tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32)
|
||||
|
||||
gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy()
|
||||
gxy[:, 0] = gxy[:, 0] * nGw
|
||||
gxy[:, 1] = gxy[:, 1] * nGh
|
||||
gwh[:, 0] = gwh[:, 0] * nGw
|
||||
gwh[:, 1] = gwh[:, 1] * nGh
|
||||
gxy[:, 0] = np.clip(gxy[:, 0], 0, nGw - 1)
|
||||
gxy[:, 1] = np.clip(gxy[:, 1], 0, nGh - 1)
|
||||
tboxes = np.concatenate([gxy, gwh], axis=1)
|
||||
|
||||
anchor_mesh = self.generate_anchor(nGh, nGw, anchor_hw)
|
||||
|
||||
anchor_list = np.transpose(anchor_mesh,
|
||||
(0, 2, 3, 1)).reshape(-1, 4)
|
||||
iou_pdist = bbox_iou_np_expand(
|
||||
anchor_list, tboxes, x1y1x2y2=False)
|
||||
|
||||
iou_max = np.max(iou_pdist, axis=1)
|
||||
max_gt_index = np.argmax(iou_pdist, axis=1)
|
||||
|
||||
iou_map = iou_max.reshape(nA, nGh, nGw)
|
||||
gt_index_map = max_gt_index.reshape(nA, nGh, nGw)
|
||||
|
||||
id_index = iou_map > self.ide_thresh
|
||||
fg_index = iou_map > self.fg_thresh
|
||||
bg_index = iou_map < self.bg_thresh
|
||||
ign_index = (iou_map < self.fg_thresh) * (
|
||||
iou_map > self.bg_thresh)
|
||||
tconf[fg_index] = 1
|
||||
tconf[bg_index] = 0
|
||||
tconf[ign_index] = -1
|
||||
|
||||
gt_index = gt_index_map[fg_index]
|
||||
gt_box_list = tboxes[gt_index]
|
||||
gt_id_list = gt_ide[gt_index_map[id_index]]
|
||||
|
||||
if np.sum(fg_index) > 0:
|
||||
tid[id_index] = gt_id_list
|
||||
|
||||
fg_anchor_list = anchor_list.reshape(nA, nGh, nGw,
|
||||
4)[fg_index]
|
||||
delta_target = self.encode_delta(gt_box_list,
|
||||
fg_anchor_list)
|
||||
tbox[fg_index] = delta_target
|
||||
|
||||
sample['tbox{}'.format(i)] = tbox
|
||||
sample['tconf{}'.format(i)] = tconf
|
||||
sample['tide{}'.format(i)] = tid
|
||||
sample.pop('gt_class')
|
||||
sample = self.pad_box(sample, num_max)
|
||||
return samples
|
||||
|
||||
|
||||
@register_op
|
||||
class Gt2JDETargetMax(BaseOperator):
|
||||
__shared__ = ['num_classes']
|
||||
"""
|
||||
Generate JDE targets by groud truth data when evaluating
|
||||
Args:
|
||||
anchors (list): anchors of JDE model
|
||||
anchor_masks (list): anchor_masks of JDE model
|
||||
downsample_ratios (list): downsample ratios of JDE model
|
||||
max_iou_thresh (float): iou thresh for high quality anchor
|
||||
num_classes (int): number of classes
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
anchors,
|
||||
anchor_masks,
|
||||
downsample_ratios,
|
||||
max_iou_thresh=0.60,
|
||||
num_classes=1):
|
||||
super(Gt2JDETargetMax, self).__init__()
|
||||
self.anchors = anchors
|
||||
self.anchor_masks = anchor_masks
|
||||
self.downsample_ratios = downsample_ratios
|
||||
self.max_iou_thresh = max_iou_thresh
|
||||
self.num_classes = num_classes
|
||||
|
||||
def __call__(self, samples, context=None):
|
||||
assert len(self.anchor_masks) == len(self.downsample_ratios), \
|
||||
"anchor_masks', and 'downsample_ratios' should have same length."
|
||||
h, w = samples[0]['image'].shape[1:3]
|
||||
for sample in samples:
|
||||
gt_bbox = sample['gt_bbox']
|
||||
gt_ide = sample['gt_ide']
|
||||
for i, (anchor_hw, downsample_ratio
|
||||
) in enumerate(zip(self.anchors, self.downsample_ratios)):
|
||||
anchor_hw = np.array(
|
||||
anchor_hw, dtype=np.float32) / downsample_ratio
|
||||
nA = len(anchor_hw)
|
||||
nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio)
|
||||
tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32)
|
||||
tconf = np.zeros((nA, nGh, nGw), dtype=np.float32)
|
||||
tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32)
|
||||
|
||||
gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy()
|
||||
gxy[:, 0] = gxy[:, 0] * nGw
|
||||
gxy[:, 1] = gxy[:, 1] * nGh
|
||||
gwh[:, 0] = gwh[:, 0] * nGw
|
||||
gwh[:, 1] = gwh[:, 1] * nGh
|
||||
gi = np.clip(gxy[:, 0], 0, nGw - 1).astype(int)
|
||||
gj = np.clip(gxy[:, 1], 0, nGh - 1).astype(int)
|
||||
|
||||
# iou of targets-anchors (using wh only)
|
||||
box1 = gwh
|
||||
box2 = anchor_hw[:, None, :]
|
||||
inter_area = np.minimum(box1, box2).prod(2)
|
||||
iou = inter_area / (
|
||||
box1.prod(1) + box2.prod(2) - inter_area + 1e-16)
|
||||
|
||||
# Select best iou_pred and anchor
|
||||
iou_best = iou.max(0) # best anchor [0-2] for each target
|
||||
a = np.argmax(iou, axis=0)
|
||||
|
||||
# Select best unique target-anchor combinations
|
||||
iou_order = np.argsort(-iou_best) # best to worst
|
||||
|
||||
# Unique anchor selection
|
||||
u = np.stack((gi, gj, a), 0)[:, iou_order]
|
||||
_, first_unique = np.unique(u, axis=1, return_index=True)
|
||||
mask = iou_order[first_unique]
|
||||
# best anchor must share significant commonality (iou) with target
|
||||
# TODO: examine arbitrary threshold
|
||||
idx = mask[iou_best[mask] > self.max_iou_thresh]
|
||||
|
||||
if len(idx) > 0:
|
||||
a_i, gj_i, gi_i = a[idx], gj[idx], gi[idx]
|
||||
t_box = gt_bbox[idx]
|
||||
t_id = gt_ide[idx]
|
||||
if len(t_box.shape) == 1:
|
||||
t_box = t_box.reshape(1, 4)
|
||||
|
||||
gxy, gwh = t_box[:, 0:2].copy(), t_box[:, 2:4].copy()
|
||||
gxy[:, 0] = gxy[:, 0] * nGw
|
||||
gxy[:, 1] = gxy[:, 1] * nGh
|
||||
gwh[:, 0] = gwh[:, 0] * nGw
|
||||
gwh[:, 1] = gwh[:, 1] * nGh
|
||||
|
||||
# XY coordinates
|
||||
tbox[:, :, :, 0:2][a_i, gj_i, gi_i] = gxy - gxy.astype(int)
|
||||
# Width and height in yolo method
|
||||
tbox[:, :, :, 2:4][a_i, gj_i, gi_i] = np.log(gwh /
|
||||
anchor_hw[a_i])
|
||||
tconf[a_i, gj_i, gi_i] = 1
|
||||
tid[a_i, gj_i, gi_i] = t_id
|
||||
|
||||
sample['tbox{}'.format(i)] = tbox
|
||||
sample['tconf{}'.format(i)] = tconf
|
||||
sample['tide{}'.format(i)] = tid
|
||||
|
||||
|
||||
class Gt2FairMOTTarget(Gt2TTFTarget):
|
||||
__shared__ = ['num_classes']
|
||||
"""
|
||||
Generate FairMOT targets by ground truth data.
|
||||
Difference between Gt2FairMOTTarget and Gt2TTFTarget are:
|
||||
1. the gaussian kernal radius to generate a heatmap.
|
||||
2. the targets needed during training.
|
||||
|
||||
Args:
|
||||
num_classes(int): the number of classes.
|
||||
down_ratio(int): the down ratio from images to heatmap, 4 by default.
|
||||
max_objs(int): the maximum number of ground truth objects in a image, 500 by default.
|
||||
"""
|
||||
|
||||
def __init__(self, num_classes=1, down_ratio=4, max_objs=500):
|
||||
super(Gt2TTFTarget, self).__init__()
|
||||
self.down_ratio = down_ratio
|
||||
self.num_classes = num_classes
|
||||
self.max_objs = max_objs
|
||||
|
||||
def __call__(self, samples, context=None):
|
||||
for b_id, sample in enumerate(samples):
|
||||
output_h = sample['image'].shape[1] // self.down_ratio
|
||||
output_w = sample['image'].shape[2] // self.down_ratio
|
||||
|
||||
heatmap = np.zeros(
|
||||
(self.num_classes, output_h, output_w), dtype='float32')
|
||||
bbox_size = np.zeros((self.max_objs, 4), dtype=np.float32)
|
||||
center_offset = np.zeros((self.max_objs, 2), dtype=np.float32)
|
||||
index = np.zeros((self.max_objs, ), dtype=np.int64)
|
||||
index_mask = np.zeros((self.max_objs, ), dtype=np.int32)
|
||||
reid = np.zeros((self.max_objs, ), dtype=np.int64)
|
||||
bbox_xys = np.zeros((self.max_objs, 4), dtype=np.float32)
|
||||
if self.num_classes > 1:
|
||||
# each category corresponds to a set of track ids
|
||||
cls_tr_ids = np.zeros(
|
||||
(self.num_classes, output_h, output_w), dtype=np.int64)
|
||||
cls_id_map = np.full((output_h, output_w), -1, dtype=np.int64)
|
||||
|
||||
gt_bbox = sample['gt_bbox']
|
||||
gt_class = sample['gt_class']
|
||||
gt_ide = sample['gt_ide']
|
||||
|
||||
for k in range(len(gt_bbox)):
|
||||
cls_id = gt_class[k][0]
|
||||
bbox = gt_bbox[k]
|
||||
ide = gt_ide[k][0]
|
||||
bbox[[0, 2]] = bbox[[0, 2]] * output_w
|
||||
bbox[[1, 3]] = bbox[[1, 3]] * output_h
|
||||
bbox_amodal = copy.deepcopy(bbox)
|
||||
bbox_amodal[0] = bbox_amodal[0] - bbox_amodal[2] / 2.
|
||||
bbox_amodal[1] = bbox_amodal[1] - bbox_amodal[3] / 2.
|
||||
bbox_amodal[2] = bbox_amodal[0] + bbox_amodal[2]
|
||||
bbox_amodal[3] = bbox_amodal[1] + bbox_amodal[3]
|
||||
bbox[0] = np.clip(bbox[0], 0, output_w - 1)
|
||||
bbox[1] = np.clip(bbox[1], 0, output_h - 1)
|
||||
h = bbox[3]
|
||||
w = bbox[2]
|
||||
|
||||
bbox_xy = copy.deepcopy(bbox)
|
||||
bbox_xy[0] = bbox_xy[0] - bbox_xy[2] / 2
|
||||
bbox_xy[1] = bbox_xy[1] - bbox_xy[3] / 2
|
||||
bbox_xy[2] = bbox_xy[0] + bbox_xy[2]
|
||||
bbox_xy[3] = bbox_xy[1] + bbox_xy[3]
|
||||
|
||||
if h > 0 and w > 0:
|
||||
radius = gaussian_radius((math.ceil(h), math.ceil(w)), 0.7)
|
||||
radius = max(0, int(radius))
|
||||
ct = np.array([bbox[0], bbox[1]], dtype=np.float32)
|
||||
ct_int = ct.astype(np.int32)
|
||||
self.draw_truncate_gaussian(heatmap[cls_id], ct_int, radius,
|
||||
radius)
|
||||
bbox_size[k] = ct[0] - bbox_amodal[0], ct[1] - bbox_amodal[1], \
|
||||
bbox_amodal[2] - ct[0], bbox_amodal[3] - ct[1]
|
||||
|
||||
index[k] = ct_int[1] * output_w + ct_int[0]
|
||||
center_offset[k] = ct - ct_int
|
||||
index_mask[k] = 1
|
||||
reid[k] = ide
|
||||
bbox_xys[k] = bbox_xy
|
||||
if self.num_classes > 1:
|
||||
cls_id_map[ct_int[1], ct_int[0]] = cls_id
|
||||
cls_tr_ids[cls_id][ct_int[1]][ct_int[0]] = ide - 1
|
||||
# track id start from 0
|
||||
|
||||
sample['heatmap'] = heatmap
|
||||
sample['index'] = index
|
||||
sample['offset'] = center_offset
|
||||
sample['size'] = bbox_size
|
||||
sample['index_mask'] = index_mask
|
||||
sample['reid'] = reid
|
||||
if self.num_classes > 1:
|
||||
sample['cls_id_map'] = cls_id_map
|
||||
sample['cls_tr_ids'] = cls_tr_ids
|
||||
sample['bbox_xys'] = bbox_xys
|
||||
sample.pop('is_crowd', None)
|
||||
sample.pop('difficult', None)
|
||||
sample.pop('gt_class', None)
|
||||
sample.pop('gt_bbox', None)
|
||||
sample.pop('gt_score', None)
|
||||
sample.pop('gt_ide', None)
|
||||
return samples
|
||||
494
paddle_detection/ppdet/data/transform/op_helper.py
Normal file
494
paddle_detection/ppdet/data/transform/op_helper.py
Normal file
@@ -0,0 +1,494 @@
|
||||
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# this file contains helper methods for BBOX processing
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import random
|
||||
import math
|
||||
import cv2
|
||||
|
||||
|
||||
def meet_emit_constraint(src_bbox, sample_bbox):
|
||||
center_x = (src_bbox[2] + src_bbox[0]) / 2
|
||||
center_y = (src_bbox[3] + src_bbox[1]) / 2
|
||||
if center_x >= sample_bbox[0] and \
|
||||
center_x <= sample_bbox[2] and \
|
||||
center_y >= sample_bbox[1] and \
|
||||
center_y <= sample_bbox[3]:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def clip_bbox(src_bbox):
|
||||
src_bbox[0] = max(min(src_bbox[0], 1.0), 0.0)
|
||||
src_bbox[1] = max(min(src_bbox[1], 1.0), 0.0)
|
||||
src_bbox[2] = max(min(src_bbox[2], 1.0), 0.0)
|
||||
src_bbox[3] = max(min(src_bbox[3], 1.0), 0.0)
|
||||
return src_bbox
|
||||
|
||||
|
||||
def bbox_area(src_bbox):
|
||||
if src_bbox[2] < src_bbox[0] or src_bbox[3] < src_bbox[1]:
|
||||
return 0.
|
||||
else:
|
||||
width = src_bbox[2] - src_bbox[0]
|
||||
height = src_bbox[3] - src_bbox[1]
|
||||
return width * height
|
||||
|
||||
|
||||
def is_overlap(object_bbox, sample_bbox):
|
||||
if object_bbox[0] >= sample_bbox[2] or \
|
||||
object_bbox[2] <= sample_bbox[0] or \
|
||||
object_bbox[1] >= sample_bbox[3] or \
|
||||
object_bbox[3] <= sample_bbox[1]:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def filter_and_process(sample_bbox, bboxes, labels, scores=None,
|
||||
keypoints=None):
|
||||
new_bboxes = []
|
||||
new_labels = []
|
||||
new_scores = []
|
||||
new_keypoints = []
|
||||
new_kp_ignore = []
|
||||
for i in range(len(bboxes)):
|
||||
new_bbox = [0, 0, 0, 0]
|
||||
obj_bbox = [bboxes[i][0], bboxes[i][1], bboxes[i][2], bboxes[i][3]]
|
||||
if not meet_emit_constraint(obj_bbox, sample_bbox):
|
||||
continue
|
||||
if not is_overlap(obj_bbox, sample_bbox):
|
||||
continue
|
||||
sample_width = sample_bbox[2] - sample_bbox[0]
|
||||
sample_height = sample_bbox[3] - sample_bbox[1]
|
||||
new_bbox[0] = (obj_bbox[0] - sample_bbox[0]) / sample_width
|
||||
new_bbox[1] = (obj_bbox[1] - sample_bbox[1]) / sample_height
|
||||
new_bbox[2] = (obj_bbox[2] - sample_bbox[0]) / sample_width
|
||||
new_bbox[3] = (obj_bbox[3] - sample_bbox[1]) / sample_height
|
||||
new_bbox = clip_bbox(new_bbox)
|
||||
if bbox_area(new_bbox) > 0:
|
||||
new_bboxes.append(new_bbox)
|
||||
new_labels.append([labels[i][0]])
|
||||
if scores is not None:
|
||||
new_scores.append([scores[i][0]])
|
||||
if keypoints is not None:
|
||||
sample_keypoint = keypoints[0][i]
|
||||
for j in range(len(sample_keypoint)):
|
||||
kp_len = sample_height if j % 2 else sample_width
|
||||
sample_coord = sample_bbox[1] if j % 2 else sample_bbox[0]
|
||||
sample_keypoint[j] = (
|
||||
sample_keypoint[j] - sample_coord) / kp_len
|
||||
sample_keypoint[j] = max(min(sample_keypoint[j], 1.0), 0.0)
|
||||
new_keypoints.append(sample_keypoint)
|
||||
new_kp_ignore.append(keypoints[1][i])
|
||||
|
||||
bboxes = np.array(new_bboxes)
|
||||
labels = np.array(new_labels)
|
||||
scores = np.array(new_scores)
|
||||
if keypoints is not None:
|
||||
keypoints = np.array(new_keypoints)
|
||||
new_kp_ignore = np.array(new_kp_ignore)
|
||||
return bboxes, labels, scores, (keypoints, new_kp_ignore)
|
||||
return bboxes, labels, scores
|
||||
|
||||
|
||||
def bbox_area_sampling(bboxes, labels, scores, target_size, min_size):
|
||||
new_bboxes = []
|
||||
new_labels = []
|
||||
new_scores = []
|
||||
for i, bbox in enumerate(bboxes):
|
||||
w = float((bbox[2] - bbox[0]) * target_size)
|
||||
h = float((bbox[3] - bbox[1]) * target_size)
|
||||
if w * h < float(min_size * min_size):
|
||||
continue
|
||||
else:
|
||||
new_bboxes.append(bbox)
|
||||
new_labels.append(labels[i])
|
||||
if scores is not None and scores.size != 0:
|
||||
new_scores.append(scores[i])
|
||||
bboxes = np.array(new_bboxes)
|
||||
labels = np.array(new_labels)
|
||||
scores = np.array(new_scores)
|
||||
return bboxes, labels, scores
|
||||
|
||||
|
||||
def generate_sample_bbox(sampler):
|
||||
scale = np.random.uniform(sampler[2], sampler[3])
|
||||
aspect_ratio = np.random.uniform(sampler[4], sampler[5])
|
||||
aspect_ratio = max(aspect_ratio, (scale**2.0))
|
||||
aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))
|
||||
bbox_width = scale * (aspect_ratio**0.5)
|
||||
bbox_height = scale / (aspect_ratio**0.5)
|
||||
xmin_bound = 1 - bbox_width
|
||||
ymin_bound = 1 - bbox_height
|
||||
xmin = np.random.uniform(0, xmin_bound)
|
||||
ymin = np.random.uniform(0, ymin_bound)
|
||||
xmax = xmin + bbox_width
|
||||
ymax = ymin + bbox_height
|
||||
sampled_bbox = [xmin, ymin, xmax, ymax]
|
||||
return sampled_bbox
|
||||
|
||||
|
||||
def generate_sample_bbox_square(sampler, image_width, image_height):
|
||||
scale = np.random.uniform(sampler[2], sampler[3])
|
||||
aspect_ratio = np.random.uniform(sampler[4], sampler[5])
|
||||
aspect_ratio = max(aspect_ratio, (scale**2.0))
|
||||
aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))
|
||||
bbox_width = scale * (aspect_ratio**0.5)
|
||||
bbox_height = scale / (aspect_ratio**0.5)
|
||||
if image_height < image_width:
|
||||
bbox_width = bbox_height * image_height / image_width
|
||||
else:
|
||||
bbox_height = bbox_width * image_width / image_height
|
||||
xmin_bound = 1 - bbox_width
|
||||
ymin_bound = 1 - bbox_height
|
||||
xmin = np.random.uniform(0, xmin_bound)
|
||||
ymin = np.random.uniform(0, ymin_bound)
|
||||
xmax = xmin + bbox_width
|
||||
ymax = ymin + bbox_height
|
||||
sampled_bbox = [xmin, ymin, xmax, ymax]
|
||||
return sampled_bbox
|
||||
|
||||
|
||||
def data_anchor_sampling(bbox_labels, image_width, image_height, scale_array,
|
||||
resize_width):
|
||||
num_gt = len(bbox_labels)
|
||||
# np.random.randint range: [low, high)
|
||||
rand_idx = np.random.randint(0, num_gt) if num_gt != 0 else 0
|
||||
|
||||
if num_gt != 0:
|
||||
norm_xmin = bbox_labels[rand_idx][0]
|
||||
norm_ymin = bbox_labels[rand_idx][1]
|
||||
norm_xmax = bbox_labels[rand_idx][2]
|
||||
norm_ymax = bbox_labels[rand_idx][3]
|
||||
|
||||
xmin = norm_xmin * image_width
|
||||
ymin = norm_ymin * image_height
|
||||
wid = image_width * (norm_xmax - norm_xmin)
|
||||
hei = image_height * (norm_ymax - norm_ymin)
|
||||
range_size = 0
|
||||
|
||||
area = wid * hei
|
||||
for scale_ind in range(0, len(scale_array) - 1):
|
||||
if area > scale_array[scale_ind] ** 2 and area < \
|
||||
scale_array[scale_ind + 1] ** 2:
|
||||
range_size = scale_ind + 1
|
||||
break
|
||||
|
||||
if area > scale_array[len(scale_array) - 2]**2:
|
||||
range_size = len(scale_array) - 2
|
||||
|
||||
scale_choose = 0.0
|
||||
if range_size == 0:
|
||||
rand_idx_size = 0
|
||||
else:
|
||||
# np.random.randint range: [low, high)
|
||||
rng_rand_size = np.random.randint(0, range_size + 1)
|
||||
rand_idx_size = rng_rand_size % (range_size + 1)
|
||||
|
||||
if rand_idx_size == range_size:
|
||||
min_resize_val = scale_array[rand_idx_size] / 2.0
|
||||
max_resize_val = min(2.0 * scale_array[rand_idx_size],
|
||||
2 * math.sqrt(wid * hei))
|
||||
scale_choose = random.uniform(min_resize_val, max_resize_val)
|
||||
else:
|
||||
min_resize_val = scale_array[rand_idx_size] / 2.0
|
||||
max_resize_val = 2.0 * scale_array[rand_idx_size]
|
||||
scale_choose = random.uniform(min_resize_val, max_resize_val)
|
||||
|
||||
sample_bbox_size = wid * resize_width / scale_choose
|
||||
|
||||
w_off_orig = 0.0
|
||||
h_off_orig = 0.0
|
||||
if sample_bbox_size < max(image_height, image_width):
|
||||
if wid <= sample_bbox_size:
|
||||
w_off_orig = np.random.uniform(xmin + wid - sample_bbox_size,
|
||||
xmin)
|
||||
else:
|
||||
w_off_orig = np.random.uniform(xmin,
|
||||
xmin + wid - sample_bbox_size)
|
||||
|
||||
if hei <= sample_bbox_size:
|
||||
h_off_orig = np.random.uniform(ymin + hei - sample_bbox_size,
|
||||
ymin)
|
||||
else:
|
||||
h_off_orig = np.random.uniform(ymin,
|
||||
ymin + hei - sample_bbox_size)
|
||||
|
||||
else:
|
||||
w_off_orig = np.random.uniform(image_width - sample_bbox_size, 0.0)
|
||||
h_off_orig = np.random.uniform(image_height - sample_bbox_size, 0.0)
|
||||
|
||||
w_off_orig = math.floor(w_off_orig)
|
||||
h_off_orig = math.floor(h_off_orig)
|
||||
|
||||
# Figure out top left coordinates.
|
||||
w_off = float(w_off_orig / image_width)
|
||||
h_off = float(h_off_orig / image_height)
|
||||
|
||||
sampled_bbox = [
|
||||
w_off, h_off, w_off + float(sample_bbox_size / image_width),
|
||||
h_off + float(sample_bbox_size / image_height)
|
||||
]
|
||||
return sampled_bbox
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
def jaccard_overlap(sample_bbox, object_bbox):
|
||||
if sample_bbox[0] >= object_bbox[2] or \
|
||||
sample_bbox[2] <= object_bbox[0] or \
|
||||
sample_bbox[1] >= object_bbox[3] or \
|
||||
sample_bbox[3] <= object_bbox[1]:
|
||||
return 0
|
||||
intersect_xmin = max(sample_bbox[0], object_bbox[0])
|
||||
intersect_ymin = max(sample_bbox[1], object_bbox[1])
|
||||
intersect_xmax = min(sample_bbox[2], object_bbox[2])
|
||||
intersect_ymax = min(sample_bbox[3], object_bbox[3])
|
||||
intersect_size = (intersect_xmax - intersect_xmin) * (
|
||||
intersect_ymax - intersect_ymin)
|
||||
sample_bbox_size = bbox_area(sample_bbox)
|
||||
object_bbox_size = bbox_area(object_bbox)
|
||||
overlap = intersect_size / (
|
||||
sample_bbox_size + object_bbox_size - intersect_size)
|
||||
return overlap
|
||||
|
||||
|
||||
def intersect_bbox(bbox1, bbox2):
|
||||
if bbox2[0] > bbox1[2] or bbox2[2] < bbox1[0] or \
|
||||
bbox2[1] > bbox1[3] or bbox2[3] < bbox1[1]:
|
||||
intersection_box = [0.0, 0.0, 0.0, 0.0]
|
||||
else:
|
||||
intersection_box = [
|
||||
max(bbox1[0], bbox2[0]), max(bbox1[1], bbox2[1]),
|
||||
min(bbox1[2], bbox2[2]), min(bbox1[3], bbox2[3])
|
||||
]
|
||||
return intersection_box
|
||||
|
||||
|
||||
def bbox_coverage(bbox1, bbox2):
|
||||
inter_box = intersect_bbox(bbox1, bbox2)
|
||||
intersect_size = bbox_area(inter_box)
|
||||
|
||||
if intersect_size > 0:
|
||||
bbox1_size = bbox_area(bbox1)
|
||||
return intersect_size / bbox1_size
|
||||
else:
|
||||
return 0.
|
||||
|
||||
|
||||
def satisfy_sample_constraint(sampler,
|
||||
sample_bbox,
|
||||
gt_bboxes,
|
||||
satisfy_all=False):
|
||||
if sampler[6] == 0 and sampler[7] == 0:
|
||||
return True
|
||||
satisfied = []
|
||||
for i in range(len(gt_bboxes)):
|
||||
object_bbox = [
|
||||
gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3]
|
||||
]
|
||||
overlap = jaccard_overlap(sample_bbox, object_bbox)
|
||||
if sampler[6] != 0 and \
|
||||
overlap < sampler[6]:
|
||||
satisfied.append(False)
|
||||
continue
|
||||
if sampler[7] != 0 and \
|
||||
overlap > sampler[7]:
|
||||
satisfied.append(False)
|
||||
continue
|
||||
satisfied.append(True)
|
||||
if not satisfy_all:
|
||||
return True
|
||||
|
||||
if satisfy_all:
|
||||
return np.all(satisfied)
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def satisfy_sample_constraint_coverage(sampler, sample_bbox, gt_bboxes):
|
||||
if sampler[6] == 0 and sampler[7] == 0:
|
||||
has_jaccard_overlap = False
|
||||
else:
|
||||
has_jaccard_overlap = True
|
||||
if sampler[8] == 0 and sampler[9] == 0:
|
||||
has_object_coverage = False
|
||||
else:
|
||||
has_object_coverage = True
|
||||
|
||||
if not has_jaccard_overlap and not has_object_coverage:
|
||||
return True
|
||||
found = False
|
||||
for i in range(len(gt_bboxes)):
|
||||
object_bbox = [
|
||||
gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3]
|
||||
]
|
||||
if has_jaccard_overlap:
|
||||
overlap = jaccard_overlap(sample_bbox, object_bbox)
|
||||
if sampler[6] != 0 and \
|
||||
overlap < sampler[6]:
|
||||
continue
|
||||
if sampler[7] != 0 and \
|
||||
overlap > sampler[7]:
|
||||
continue
|
||||
found = True
|
||||
if has_object_coverage:
|
||||
object_coverage = bbox_coverage(object_bbox, sample_bbox)
|
||||
if sampler[8] != 0 and \
|
||||
object_coverage < sampler[8]:
|
||||
continue
|
||||
if sampler[9] != 0 and \
|
||||
object_coverage > sampler[9]:
|
||||
continue
|
||||
found = True
|
||||
if found:
|
||||
return True
|
||||
return found
|
||||
|
||||
|
||||
def crop_image_sampling(img, sample_bbox, image_width, image_height,
|
||||
target_size):
|
||||
# no clipping here
|
||||
xmin = int(sample_bbox[0] * image_width)
|
||||
xmax = int(sample_bbox[2] * image_width)
|
||||
ymin = int(sample_bbox[1] * image_height)
|
||||
ymax = int(sample_bbox[3] * image_height)
|
||||
|
||||
w_off = xmin
|
||||
h_off = ymin
|
||||
width = xmax - xmin
|
||||
height = ymax - ymin
|
||||
cross_xmin = max(0.0, float(w_off))
|
||||
cross_ymin = max(0.0, float(h_off))
|
||||
cross_xmax = min(float(w_off + width - 1.0), float(image_width))
|
||||
cross_ymax = min(float(h_off + height - 1.0), float(image_height))
|
||||
cross_width = cross_xmax - cross_xmin
|
||||
cross_height = cross_ymax - cross_ymin
|
||||
|
||||
roi_xmin = 0 if w_off >= 0 else abs(w_off)
|
||||
roi_ymin = 0 if h_off >= 0 else abs(h_off)
|
||||
roi_width = cross_width
|
||||
roi_height = cross_height
|
||||
|
||||
roi_y1 = int(roi_ymin)
|
||||
roi_y2 = int(roi_ymin + roi_height)
|
||||
roi_x1 = int(roi_xmin)
|
||||
roi_x2 = int(roi_xmin + roi_width)
|
||||
|
||||
cross_y1 = int(cross_ymin)
|
||||
cross_y2 = int(cross_ymin + cross_height)
|
||||
cross_x1 = int(cross_xmin)
|
||||
cross_x2 = int(cross_xmin + cross_width)
|
||||
|
||||
sample_img = np.zeros((height, width, 3))
|
||||
sample_img[roi_y1: roi_y2, roi_x1: roi_x2] = \
|
||||
img[cross_y1: cross_y2, cross_x1: cross_x2]
|
||||
|
||||
sample_img = cv2.resize(
|
||||
sample_img, (target_size, target_size), interpolation=cv2.INTER_AREA)
|
||||
|
||||
return sample_img
|
||||
|
||||
|
||||
def is_poly(segm):
|
||||
assert isinstance(segm, (list, dict)), \
|
||||
"Invalid segm type: {}".format(type(segm))
|
||||
return isinstance(segm, list)
|
||||
|
||||
|
||||
def gaussian_radius(bbox_size, min_overlap):
|
||||
height, width = bbox_size
|
||||
|
||||
a1 = 1
|
||||
b1 = (height + width)
|
||||
c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
|
||||
sq1 = np.sqrt(b1**2 - 4 * a1 * c1)
|
||||
radius1 = (b1 + sq1) / (2 * a1)
|
||||
|
||||
a2 = 4
|
||||
b2 = 2 * (height + width)
|
||||
c2 = (1 - min_overlap) * width * height
|
||||
sq2 = np.sqrt(b2**2 - 4 * a2 * c2)
|
||||
radius2 = (b2 + sq2) / 2
|
||||
|
||||
a3 = 4 * min_overlap
|
||||
b3 = -2 * min_overlap * (height + width)
|
||||
c3 = (min_overlap - 1) * width * height
|
||||
sq3 = np.sqrt(b3**2 - 4 * a3 * c3)
|
||||
radius3 = (b3 + sq3) / 2
|
||||
return min(radius1, radius2, radius3)
|
||||
|
||||
|
||||
def draw_gaussian(heatmap, center, radius, k=1, delte=6):
|
||||
diameter = 2 * radius + 1
|
||||
sigma = diameter / delte
|
||||
gaussian = gaussian2D((diameter, diameter), sigma_x=sigma, sigma_y=sigma)
|
||||
|
||||
x, y = center
|
||||
|
||||
height, width = heatmap.shape[0:2]
|
||||
|
||||
left, right = min(x, radius), min(width - x, radius + 1)
|
||||
top, bottom = min(y, radius), min(height - y, radius + 1)
|
||||
|
||||
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
|
||||
masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:
|
||||
radius + right]
|
||||
np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
|
||||
|
||||
|
||||
def gaussian2D(shape, sigma_x=1, sigma_y=1):
|
||||
m, n = [(ss - 1.) / 2. for ss in shape]
|
||||
y, x = np.ogrid[-m:m + 1, -n:n + 1]
|
||||
|
||||
h = np.exp(-(x * x / (2 * sigma_x * sigma_x) + y * y / (2 * sigma_y *
|
||||
sigma_y)))
|
||||
h[h < np.finfo(h.dtype).eps * h.max()] = 0
|
||||
return h
|
||||
|
||||
|
||||
def draw_umich_gaussian(heatmap, center, radius, k=1):
|
||||
"""
|
||||
draw_umich_gaussian, refer to https://github.com/xingyizhou/CenterNet/blob/master/src/lib/utils/image.py#L126
|
||||
"""
|
||||
diameter = 2 * radius + 1
|
||||
gaussian = gaussian2D(
|
||||
(diameter, diameter), sigma_x=diameter / 6, sigma_y=diameter / 6)
|
||||
|
||||
x, y = int(center[0]), int(center[1])
|
||||
|
||||
height, width = heatmap.shape[0:2]
|
||||
|
||||
left, right = min(x, radius), min(width - x, radius + 1)
|
||||
top, bottom = min(y, radius), min(height - y, radius + 1)
|
||||
|
||||
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
|
||||
masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:
|
||||
radius + right]
|
||||
if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:
|
||||
np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
|
||||
return heatmap
|
||||
|
||||
|
||||
def get_border(border, size):
|
||||
i = 1
|
||||
while size - border // i <= border // i:
|
||||
i *= 2
|
||||
return border // i
|
||||
4148
paddle_detection/ppdet/data/transform/operators.py
Normal file
4148
paddle_detection/ppdet/data/transform/operators.py
Normal file
File diff suppressed because it is too large
Load Diff
480
paddle_detection/ppdet/data/transform/rotated_operators.py
Normal file
480
paddle_detection/ppdet/data/transform/rotated_operators.py
Normal file
@@ -0,0 +1,480 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
from __future__ import division
|
||||
|
||||
try:
|
||||
from collections.abc import Sequence
|
||||
except Exception:
|
||||
from collections import Sequence
|
||||
|
||||
from numbers import Number, Integral
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import math
|
||||
import copy
|
||||
|
||||
from .operators import register_op, BaseOperator
|
||||
from ppdet.modeling.rbox_utils import poly2rbox_le135_np, poly2rbox_oc_np, rbox2poly_np
|
||||
from ppdet.utils.logger import setup_logger
|
||||
from ppdet.utils.compact import imagedraw_textsize_c
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
|
||||
@register_op
|
||||
class RRotate(BaseOperator):
|
||||
""" Rotate Image, Polygon, Box
|
||||
|
||||
Args:
|
||||
scale (float): rotate scale
|
||||
angle (float): rotate angle
|
||||
fill_value (int, tuple): fill color
|
||||
auto_bound (bool): whether auto bound or not
|
||||
"""
|
||||
|
||||
def __init__(self, scale=1.0, angle=0., fill_value=0., auto_bound=True):
|
||||
super(RRotate, self).__init__()
|
||||
self.scale = scale
|
||||
self.angle = angle
|
||||
self.fill_value = fill_value
|
||||
self.auto_bound = auto_bound
|
||||
|
||||
def get_rotated_matrix(self, angle, scale, h, w):
|
||||
center = ((w - 1) * 0.5, (h - 1) * 0.5)
|
||||
matrix = cv2.getRotationMatrix2D(center, -angle, scale)
|
||||
# calculate the new size
|
||||
cos = np.abs(matrix[0, 0])
|
||||
sin = np.abs(matrix[0, 1])
|
||||
new_w = h * sin + w * cos
|
||||
new_h = h * cos + w * sin
|
||||
# calculate offset
|
||||
n_w = int(np.round(new_w))
|
||||
n_h = int(np.round(new_h))
|
||||
if self.auto_bound:
|
||||
ratio = min(w / n_w, h / n_h)
|
||||
matrix = cv2.getRotationMatrix2D(center, -angle, ratio)
|
||||
else:
|
||||
matrix[0, 2] += (new_w - w) * 0.5
|
||||
matrix[1, 2] += (new_h - h) * 0.5
|
||||
w = n_w
|
||||
h = n_h
|
||||
return matrix, h, w
|
||||
|
||||
def get_rect_from_pts(self, pts, h, w):
|
||||
""" get minimum rectangle of points
|
||||
"""
|
||||
assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
|
||||
min_x, min_y = np.min(pts[:, 0::2], axis=1), np.min(pts[:, 1::2],
|
||||
axis=1)
|
||||
max_x, max_y = np.max(pts[:, 0::2], axis=1), np.max(pts[:, 1::2],
|
||||
axis=1)
|
||||
min_x, min_y = np.clip(min_x, 0, w), np.clip(min_y, 0, h)
|
||||
max_x, max_y = np.clip(max_x, 0, w), np.clip(max_y, 0, h)
|
||||
boxes = np.stack([min_x, min_y, max_x, max_y], axis=-1)
|
||||
return boxes
|
||||
|
||||
def apply_image(self, image, matrix, h, w):
|
||||
return cv2.warpAffine(
|
||||
image, matrix, (w, h), borderValue=self.fill_value)
|
||||
|
||||
def apply_pts(self, pts, matrix, h, w):
|
||||
assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
|
||||
# n is number of samples and m is two times the number of points due to (x, y)
|
||||
_, m = pts.shape
|
||||
# transpose points
|
||||
pts_ = pts.reshape(-1, 2).T
|
||||
# pad 1 to convert the points to homogeneous coordinates
|
||||
padding = np.ones((1, pts_.shape[1]), pts.dtype)
|
||||
rotated_pts = np.matmul(matrix, np.concatenate((pts_, padding), axis=0))
|
||||
return rotated_pts[:2, :].T.reshape(-1, m)
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
image = sample['image']
|
||||
h, w = image.shape[:2]
|
||||
matrix, h, w = self.get_rotated_matrix(self.angle, self.scale, h, w)
|
||||
sample['image'] = self.apply_image(image, matrix, h, w)
|
||||
polys = sample['gt_poly']
|
||||
# TODO: segment or keypoint to be processed
|
||||
if len(polys) > 0:
|
||||
pts = self.apply_pts(polys, matrix, h, w)
|
||||
sample['gt_poly'] = pts
|
||||
sample['gt_bbox'] = self.get_rect_from_pts(pts, h, w)
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class RandomRRotate(BaseOperator):
|
||||
""" Random Rotate Image
|
||||
Args:
|
||||
scale (float, tuple, list): rotate scale
|
||||
scale_mode (str): mode of scale, [range, value, None]
|
||||
angle (float, tuple, list): rotate angle
|
||||
angle_mode (str): mode of angle, [range, value, None]
|
||||
fill_value (float, tuple, list): fill value
|
||||
rotate_prob (float): probability of rotation
|
||||
auto_bound (bool): whether auto bound or not
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
scale=1.0,
|
||||
scale_mode=None,
|
||||
angle=0.,
|
||||
angle_mode=None,
|
||||
fill_value=0.,
|
||||
rotate_prob=1.0,
|
||||
auto_bound=True):
|
||||
super(RandomRRotate, self).__init__()
|
||||
self.scale = scale
|
||||
self.scale_mode = scale_mode
|
||||
self.angle = angle
|
||||
self.angle_mode = angle_mode
|
||||
self.fill_value = fill_value
|
||||
self.rotate_prob = rotate_prob
|
||||
self.auto_bound = auto_bound
|
||||
|
||||
def get_angle(self, angle, angle_mode):
|
||||
assert not angle_mode or angle_mode in [
|
||||
'range', 'value'
|
||||
], 'angle mode should be in [range, value, None]'
|
||||
if not angle_mode:
|
||||
return angle
|
||||
elif angle_mode == 'range':
|
||||
low, high = angle
|
||||
return np.random.rand() * (high - low) + low
|
||||
elif angle_mode == 'value':
|
||||
return np.random.choice(angle)
|
||||
|
||||
def get_scale(self, scale, scale_mode):
|
||||
assert not scale_mode or scale_mode in [
|
||||
'range', 'value'
|
||||
], 'scale mode should be in [range, value, None]'
|
||||
if not scale_mode:
|
||||
return scale
|
||||
elif scale_mode == 'range':
|
||||
low, high = scale
|
||||
return np.random.rand() * (high - low) + low
|
||||
elif scale_mode == 'value':
|
||||
return np.random.choice(scale)
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
if np.random.rand() > self.rotate_prob:
|
||||
return sample
|
||||
|
||||
angle = self.get_angle(self.angle, self.angle_mode)
|
||||
scale = self.get_scale(self.scale, self.scale_mode)
|
||||
rotator = RRotate(scale, angle, self.fill_value, self.auto_bound)
|
||||
return rotator(sample)
|
||||
|
||||
|
||||
@register_op
|
||||
class Poly2RBox(BaseOperator):
|
||||
""" Polygon to Rotated Box, using new OpenCV definition since 4.5.1
|
||||
|
||||
Args:
|
||||
filter_threshold (int, float): threshold to filter annotations
|
||||
filter_mode (str): filter mode, ['area', 'edge']
|
||||
rbox_type (str): rbox type, ['le135', 'oc']
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, filter_threshold=4, filter_mode=None, rbox_type='le135'):
|
||||
super(Poly2RBox, self).__init__()
|
||||
self.filter_fn = lambda size: self.filter(size, filter_threshold, filter_mode)
|
||||
self.rbox_fn = poly2rbox_le135_np if rbox_type == 'le135' else poly2rbox_oc_np
|
||||
|
||||
def filter(self, size, threshold, mode):
|
||||
if mode == 'area':
|
||||
if size[0] * size[1] < threshold:
|
||||
return True
|
||||
elif mode == 'edge':
|
||||
if min(size) < threshold:
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_rbox(self, polys):
|
||||
valid_ids, rboxes, bboxes = [], [], []
|
||||
for i, poly in enumerate(polys):
|
||||
cx, cy, w, h, angle = self.rbox_fn(poly)
|
||||
if self.filter_fn((w, h)):
|
||||
continue
|
||||
rboxes.append(np.array([cx, cy, w, h, angle], dtype=np.float32))
|
||||
valid_ids.append(i)
|
||||
xmin, ymin = min(poly[0::2]), min(poly[1::2])
|
||||
xmax, ymax = max(poly[0::2]), max(poly[1::2])
|
||||
bboxes.append(np.array([xmin, ymin, xmax, ymax], dtype=np.float32))
|
||||
|
||||
if len(valid_ids) == 0:
|
||||
rboxes = np.zeros((0, 5), dtype=np.float32)
|
||||
bboxes = np.zeros((0, 4), dtype=np.float32)
|
||||
else:
|
||||
rboxes = np.stack(rboxes)
|
||||
bboxes = np.stack(bboxes)
|
||||
|
||||
return rboxes, bboxes, valid_ids
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
rboxes, bboxes, valid_ids = self.get_rbox(sample['gt_poly'])
|
||||
sample['gt_rbox'] = rboxes
|
||||
sample['gt_bbox'] = bboxes
|
||||
for k in ['gt_class', 'gt_score', 'gt_poly', 'is_crowd', 'difficult']:
|
||||
if k in sample:
|
||||
sample[k] = sample[k][valid_ids]
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class Poly2Array(BaseOperator):
|
||||
""" convert gt_poly to np.array for rotated bboxes
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(Poly2Array, self).__init__()
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
if 'gt_poly' in sample:
|
||||
sample['gt_poly'] = np.array(
|
||||
sample['gt_poly'], dtype=np.float32).reshape((-1, 8))
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class RResize(BaseOperator):
|
||||
def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
|
||||
"""
|
||||
Resize image to target size. if keep_ratio is True,
|
||||
resize the image's long side to the maximum of target_size
|
||||
if keep_ratio is False, resize the image to target size(h, w)
|
||||
Args:
|
||||
target_size (int|list): image target size
|
||||
keep_ratio (bool): whether keep_ratio or not, default true
|
||||
interp (int): the interpolation method
|
||||
"""
|
||||
super(RResize, self).__init__()
|
||||
self.keep_ratio = keep_ratio
|
||||
self.interp = interp
|
||||
if not isinstance(target_size, (Integral, Sequence)):
|
||||
raise TypeError(
|
||||
"Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
|
||||
format(type(target_size)))
|
||||
if isinstance(target_size, Integral):
|
||||
target_size = [target_size, target_size]
|
||||
self.target_size = target_size
|
||||
|
||||
def apply_image(self, image, scale):
|
||||
im_scale_x, im_scale_y = scale
|
||||
|
||||
return cv2.resize(
|
||||
image,
|
||||
None,
|
||||
None,
|
||||
fx=im_scale_x,
|
||||
fy=im_scale_y,
|
||||
interpolation=self.interp)
|
||||
|
||||
def apply_pts(self, pts, scale, size):
|
||||
im_scale_x, im_scale_y = scale
|
||||
resize_w, resize_h = size
|
||||
pts[:, 0::2] *= im_scale_x
|
||||
pts[:, 1::2] *= im_scale_y
|
||||
pts[:, 0::2] = np.clip(pts[:, 0::2], 0, resize_w)
|
||||
pts[:, 1::2] = np.clip(pts[:, 1::2], 0, resize_h)
|
||||
return pts
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
""" Resize the image numpy.
|
||||
"""
|
||||
im = sample['image']
|
||||
if not isinstance(im, np.ndarray):
|
||||
raise TypeError("{}: image type is not numpy.".format(self))
|
||||
if len(im.shape) != 3:
|
||||
raise ImageError('{}: image is not 3-dimensional.'.format(self))
|
||||
|
||||
# apply image
|
||||
im_shape = im.shape
|
||||
if self.keep_ratio:
|
||||
|
||||
im_size_min = np.min(im_shape[0:2])
|
||||
im_size_max = np.max(im_shape[0:2])
|
||||
|
||||
target_size_min = np.min(self.target_size)
|
||||
target_size_max = np.max(self.target_size)
|
||||
|
||||
im_scale = min(target_size_min / im_size_min,
|
||||
target_size_max / im_size_max)
|
||||
|
||||
resize_h = im_scale * float(im_shape[0])
|
||||
resize_w = im_scale * float(im_shape[1])
|
||||
|
||||
im_scale_x = im_scale
|
||||
im_scale_y = im_scale
|
||||
else:
|
||||
resize_h, resize_w = self.target_size
|
||||
im_scale_y = resize_h / im_shape[0]
|
||||
im_scale_x = resize_w / im_shape[1]
|
||||
|
||||
im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
|
||||
sample['image'] = im.astype(np.float32)
|
||||
sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
|
||||
if 'scale_factor' in sample:
|
||||
scale_factor = sample['scale_factor']
|
||||
sample['scale_factor'] = np.asarray(
|
||||
[scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
|
||||
dtype=np.float32)
|
||||
else:
|
||||
sample['scale_factor'] = np.asarray(
|
||||
[im_scale_y, im_scale_x], dtype=np.float32)
|
||||
|
||||
# apply bbox
|
||||
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
|
||||
sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'],
|
||||
[im_scale_x, im_scale_y],
|
||||
[resize_w, resize_h])
|
||||
|
||||
# apply polygon
|
||||
if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
|
||||
sample['gt_poly'] = self.apply_pts(sample['gt_poly'],
|
||||
[im_scale_x, im_scale_y],
|
||||
[resize_w, resize_h])
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class RandomRFlip(BaseOperator):
|
||||
def __init__(self, prob=0.5):
|
||||
"""
|
||||
Args:
|
||||
prob (float): the probability of flipping image
|
||||
"""
|
||||
super(RandomRFlip, self).__init__()
|
||||
self.prob = prob
|
||||
if not (isinstance(self.prob, float)):
|
||||
raise TypeError("{}: input type is invalid.".format(self))
|
||||
|
||||
def apply_image(self, image):
|
||||
return image[:, ::-1, :]
|
||||
|
||||
def apply_pts(self, pts, width):
|
||||
oldx = pts[:, 0::2].copy()
|
||||
pts[:, 0::2] = width - oldx - 1
|
||||
return pts
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
"""Filp the image and bounding box.
|
||||
Operators:
|
||||
1. Flip the image numpy.
|
||||
2. Transform the bboxes' x coordinates.
|
||||
(Must judge whether the coordinates are normalized!)
|
||||
3. Transform the segmentations' x coordinates.
|
||||
(Must judge whether the coordinates are normalized!)
|
||||
Output:
|
||||
sample: the image, bounding box and segmentation part
|
||||
in sample are flipped.
|
||||
"""
|
||||
if np.random.uniform(0, 1) < self.prob:
|
||||
im = sample['image']
|
||||
height, width = im.shape[:2]
|
||||
im = self.apply_image(im)
|
||||
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
|
||||
sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'], width)
|
||||
if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
|
||||
sample['gt_poly'] = self.apply_pts(sample['gt_poly'], width)
|
||||
|
||||
sample['flipped'] = True
|
||||
sample['image'] = im
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class VisibleRBox(BaseOperator):
|
||||
"""
|
||||
In debug mode, visualize images according to `gt_box`.
|
||||
(Currently only supported when not cropping and flipping image.)
|
||||
"""
|
||||
|
||||
def __init__(self, output_dir='debug'):
|
||||
super(VisibleRBox, self).__init__()
|
||||
self.output_dir = output_dir
|
||||
if not os.path.isdir(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
image = Image.fromarray(sample['image'].astype(np.uint8))
|
||||
out_file_name = '{:012d}.jpg'.format(sample['im_id'][0])
|
||||
width = sample['w']
|
||||
height = sample['h']
|
||||
# gt_poly = sample['gt_rbox']
|
||||
gt_poly = sample['gt_poly']
|
||||
gt_class = sample['gt_class']
|
||||
draw = ImageDraw.Draw(image)
|
||||
for i in range(gt_poly.shape[0]):
|
||||
x1, y1, x2, y2, x3, y3, x4, y4 = gt_poly[i]
|
||||
draw.line(
|
||||
[(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)],
|
||||
width=2,
|
||||
fill='green')
|
||||
# draw label
|
||||
xmin = min(x1, x2, x3, x4)
|
||||
ymin = min(y1, y2, y3, y4)
|
||||
text = str(gt_class[i][0])
|
||||
tw, th = imagedraw_textsize_c(draw, text)
|
||||
draw.rectangle(
|
||||
[(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green')
|
||||
draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
|
||||
|
||||
if 'gt_keypoint' in sample.keys():
|
||||
gt_keypoint = sample['gt_keypoint']
|
||||
if self.is_normalized:
|
||||
for i in range(gt_keypoint.shape[1]):
|
||||
if i % 2:
|
||||
gt_keypoint[:, i] = gt_keypoint[:, i] * height
|
||||
else:
|
||||
gt_keypoint[:, i] = gt_keypoint[:, i] * width
|
||||
for i in range(gt_keypoint.shape[0]):
|
||||
keypoint = gt_keypoint[i]
|
||||
for j in range(int(keypoint.shape[0] / 2)):
|
||||
x1 = round(keypoint[2 * j]).astype(np.int32)
|
||||
y1 = round(keypoint[2 * j + 1]).astype(np.int32)
|
||||
draw.ellipse(
|
||||
(x1, y1, x1 + 5, y1 + 5), fill='green', outline='green')
|
||||
save_path = os.path.join(self.output_dir, out_file_name)
|
||||
image.save(save_path, quality=95)
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class Rbox2Poly(BaseOperator):
|
||||
"""
|
||||
Convert rbbox format to poly format.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(Rbox2Poly, self).__init__()
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
assert 'gt_rbox' in sample
|
||||
assert sample['gt_rbox'].shape[1] == 5
|
||||
rboxes = sample['gt_rbox']
|
||||
polys = rbox2poly_np(rboxes)
|
||||
sample['gt_poly'] = polys
|
||||
xmin, ymin = polys[:, 0::2].min(1), polys[:, 1::2].min(1)
|
||||
xmax, ymax = polys[:, 0::2].max(1), polys[:, 1::2].max(1)
|
||||
sample['gt_bbox'] = np.stack([xmin, ymin, xmin, ymin], axis=1)
|
||||
return sample
|
||||
Reference in New Issue
Block a user