更换文档检测模型

2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions
--- a/paddle_detection/ppdet/data/transform/init.py
+++ b/paddle_detection/ppdet/data/transform/init.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import operators
+from . import batch_operators
+from . import keypoint_operators
+from . import mot_operators
+from . import rotated_operators
+from . import keypoints_3d_operators
+from . import culane_operators
+
+from .operators import *
+from .batch_operators import *
+from .keypoint_operators import *
+from .mot_operators import *
+from .rotated_operators import *
+from .keypoints_3d_operators import *
+from .culane_operators import *
+
+__all__ = []
+__all__ += registered_ops
+__all__ += keypoint_operators.__all__
+__all__ += mot_operators.__all__
+__all__ += culane_operators.__all__
--- a/paddle_detection/ppdet/data/transform/atss_assigner.py
+++ b/paddle_detection/ppdet/data/transform/atss_assigner.py
@@ -0,0 +1,421 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# The code is based on:
+# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/bbox/assigners/atss_assigner.py
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+
+def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-6):
+    """Calculate overlap between two set of bboxes.
+    If ``is_aligned `` is ``False``, then calculate the overlaps between each
+    bbox of bboxes1 and bboxes2, otherwise the overlaps between each aligned
+    pair of bboxes1 and bboxes2.
+    Args:
+        bboxes1 (Tensor): shape (B, m, 4) in <x1, y1, x2, y2> format or empty.
+        bboxes2 (Tensor): shape (B, n, 4) in <x1, y1, x2, y2> format or empty.
+            B indicates the batch dim, in shape (B1, B2, ..., Bn).
+            If ``is_aligned `` is ``True``, then m and n must be equal.
+        mode (str): "iou" (intersection over union) or "iof" (intersection over
+            foreground).
+        is_aligned (bool, optional): If True, then m and n must be equal.
+            Default False.
+        eps (float, optional): A value added to the denominator for numerical
+            stability. Default 1e-6.
+    Returns:
+        Tensor: shape (m, n) if ``is_aligned `` is False else shape (m,)
+    """
+    assert mode in ['iou', 'iof', 'giou', 'diou'], 'Unsupported mode {}'.format(
+        mode)
+    # Either the boxes are empty or the length of boxes's last dimenstion is 4
+    assert (bboxes1.shape[-1] == 4 or bboxes1.shape[0] == 0)
+    assert (bboxes2.shape[-1] == 4 or bboxes2.shape[0] == 0)
+
+    # Batch dim must be the same
+    # Batch dim: (B1, B2, ... Bn)
+    assert bboxes1.shape[:-2] == bboxes2.shape[:-2]
+    batch_shape = bboxes1.shape[:-2]
+
+    rows = bboxes1.shape[-2] if bboxes1.shape[0] > 0 else 0
+    cols = bboxes2.shape[-2] if bboxes2.shape[0] > 0 else 0
+    if is_aligned:
+        assert rows == cols
+
+    if rows * cols == 0:
+        if is_aligned:
+            return np.random.random(batch_shape + (rows, ))
+        else:
+            return np.random.random(batch_shape + (rows, cols))
+
+    area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * (
+        bboxes1[..., 3] - bboxes1[..., 1])
+    area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * (
+        bboxes2[..., 3] - bboxes2[..., 1])
+
+    if is_aligned:
+        lt = np.maximum(bboxes1[..., :2], bboxes2[..., :2])  # [B, rows, 2]
+        rb = np.minimum(bboxes1[..., 2:], bboxes2[..., 2:])  # [B, rows, 2]
+
+        wh = (rb - lt).clip(min=0)  # [B, rows, 2]
+        overlap = wh[..., 0] * wh[..., 1]
+
+        if mode in ['iou', 'giou']:
+            union = area1 + area2 - overlap
+        else:
+            union = area1
+        if mode == 'giou':
+            enclosed_lt = np.minimum(bboxes1[..., :2], bboxes2[..., :2])
+            enclosed_rb = np.maximum(bboxes1[..., 2:], bboxes2[..., 2:])
+        if mode == 'diou':
+            enclosed_lt = np.minimum(bboxes1[..., :2], bboxes2[..., :2])
+            enclosed_rb = np.maximum(bboxes1[..., 2:], bboxes2[..., 2:])
+            b1_x1, b1_y1 = bboxes1[..., 0], bboxes1[..., 1]
+            b1_x2, b1_y2 = bboxes1[..., 2], bboxes1[..., 3]
+            b2_x1, b2_y1 = bboxes2[..., 0], bboxes2[..., 1]
+            b2_x2, b2_y2 = bboxes2[..., 2], bboxes2[..., 3]
+    else:
+        lt = np.maximum(bboxes1[..., :, None, :2],
+                        bboxes2[..., None, :, :2])  # [B, rows, cols, 2]
+        rb = np.minimum(bboxes1[..., :, None, 2:],
+                        bboxes2[..., None, :, 2:])  # [B, rows, cols, 2]
+
+        wh = (rb - lt).clip(min=0)  # [B, rows, cols, 2]
+        overlap = wh[..., 0] * wh[..., 1]
+
+        if mode in ['iou', 'giou']:
+            union = area1[..., None] + area2[..., None, :] - overlap
+        else:
+            union = area1[..., None]
+        if mode == 'giou':
+            enclosed_lt = np.minimum(bboxes1[..., :, None, :2],
+                                     bboxes2[..., None, :, :2])
+            enclosed_rb = np.maximum(bboxes1[..., :, None, 2:],
+                                     bboxes2[..., None, :, 2:])
+        if mode == 'diou':
+            enclosed_lt = np.minimum(bboxes1[..., :, None, :2],
+                                     bboxes2[..., None, :, :2])
+            enclosed_rb = np.maximum(bboxes1[..., :, None, 2:],
+                                     bboxes2[..., None, :, 2:])
+            b1_x1, b1_y1 = bboxes1[..., :, None, 0], bboxes1[..., :, None, 1]
+            b1_x2, b1_y2 = bboxes1[..., :, None, 2], bboxes1[..., :, None, 3]
+            b2_x1, b2_y1 = bboxes2[..., None, :, 0], bboxes2[..., None, :, 1]
+            b2_x2, b2_y2 = bboxes2[..., None, :, 2], bboxes2[..., None, :, 3]
+
+    eps = np.array([eps])
+    union = np.maximum(union, eps)
+    ious = overlap / union
+    if mode in ['iou', 'iof']:
+        return ious
+    # calculate gious
+    if mode in ['giou']:
+        enclose_wh = (enclosed_rb - enclosed_lt).clip(min=0)
+        enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1]
+        enclose_area = np.maximum(enclose_area, eps)
+        gious = ious - (enclose_area - union) / enclose_area
+        return gious
+    if mode in ['diou']:
+        left = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2))**2 / 4
+        right = ((b2_y1 + b2_y2) - (b1_y1 + b1_y2))**2 / 4
+        rho2 = left + right
+        enclose_wh = (enclosed_rb - enclosed_lt).clip(min=0)
+        enclose_c = enclose_wh[..., 0]**2 + enclose_wh[..., 1]**2
+        enclose_c = np.maximum(enclose_c, eps)
+        dious = ious - rho2 / enclose_c
+        return dious
+
+
+def topk_(input, k, axis=1, largest=True):
+    x = -input if largest else input
+    if axis == 0:
+        row_index = np.arange(input.shape[1 - axis])
+        if k == x.shape[0]:  # argpartition requires index < len(input)
+            topk_index = np.argpartition(x, k - 1, axis=axis)[0:k, :]
+        else:
+            topk_index = np.argpartition(x, k, axis=axis)[0:k, :]
+
+        topk_data = x[topk_index, row_index]
+
+        topk_index_sort = np.argsort(topk_data, axis=axis)
+        topk_data_sort = topk_data[topk_index_sort, row_index]
+        topk_index_sort = topk_index[0:k, :][topk_index_sort, row_index]
+    else:
+        column_index = np.arange(x.shape[1 - axis])[:, None]
+        topk_index = np.argpartition(x, k, axis=axis)[:, 0:k]
+        topk_data = x[column_index, topk_index]
+        topk_data = -topk_data if largest else topk_data
+        topk_index_sort = np.argsort(topk_data, axis=axis)
+        topk_data_sort = topk_data[column_index, topk_index_sort]
+        topk_index_sort = topk_index[:, 0:k][column_index, topk_index_sort]
+
+    return topk_data_sort, topk_index_sort
+
+
+class ATSSAssigner(object):
+    """Assign a corresponding gt bbox or background to each bbox.
+
+    Each proposals will be assigned with `0` or a positive integer
+    indicating the ground truth index.
+
+    - 0: negative sample, no assigned gt
+    - positive integer: positive sample, index (1-based) of assigned gt
+
+    Args:
+        topk (float): number of bbox selected in each level
+    """
+
+    def __init__(self, topk=9):
+        self.topk = topk
+
+    def __call__(self,
+                 bboxes,
+                 num_level_bboxes,
+                 gt_bboxes,
+                 gt_bboxes_ignore=None,
+                 gt_labels=None):
+        """Assign gt to bboxes.
+        The assignment is done in following steps
+        1. compute iou between all bbox (bbox of all pyramid levels) and gt
+        2. compute center distance between all bbox and gt
+        3. on each pyramid level, for each gt, select k bbox whose center
+           are closest to the gt center, so we total select k*l bbox as
+           candidates for each gt
+        4. get corresponding iou for the these candidates, and compute the
+           mean and std, set mean + std as the iou threshold
+        5. select these candidates whose iou are greater than or equal to
+           the threshold as postive
+        6. limit the positive sample's center in gt
+        Args:
+            bboxes (np.array): Bounding boxes to be assigned, shape(n, 4).
+            num_level_bboxes (List): num of bboxes in each level
+            gt_bboxes (np.array): Groundtruth boxes, shape (k, 4).
+            gt_bboxes_ignore (np.array, optional): Ground truth bboxes that are
+                labelled as `ignored`, e.g., crowd boxes in COCO.
+            gt_labels (np.array, optional): Label of gt_bboxes, shape (k, ).
+        """
+        bboxes = bboxes[:, :4]
+        num_gt, num_bboxes = gt_bboxes.shape[0], bboxes.shape[0]
+
+        # assign 0 by default
+        assigned_gt_inds = np.zeros((num_bboxes, ), dtype=np.int64)
+
+        if num_gt == 0 or num_bboxes == 0:
+            # No ground truth or boxes, return empty assignment
+            max_overlaps = np.zeros((num_bboxes, ))
+            if num_gt == 0:
+                # No truth, assign everything to background
+                assigned_gt_inds[:] = 0
+            if not np.any(gt_labels):
+                assigned_labels = None
+            else:
+                assigned_labels = -np.ones((num_bboxes, ), dtype=np.int64)
+            return assigned_gt_inds, max_overlaps
+
+        # compute iou between all bbox and gt
+        overlaps = bbox_overlaps(bboxes, gt_bboxes)
+        # compute center distance between all bbox and gt
+        gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
+        gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
+        gt_points = np.stack((gt_cx, gt_cy), axis=1)
+
+        bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0
+        bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0
+        bboxes_points = np.stack((bboxes_cx, bboxes_cy), axis=1)
+
+        distances = np.sqrt(
+            np.power((bboxes_points[:, None, :] - gt_points[None, :, :]), 2)
+            .sum(-1))
+
+        # Selecting candidates based on the center distance
+        candidate_idxs = []
+        start_idx = 0
+        for bboxes_per_level in num_level_bboxes:
+            # on each pyramid level, for each gt,
+            # select k bbox whose center are closest to the gt center
+            end_idx = start_idx + bboxes_per_level
+            distances_per_level = distances[start_idx:end_idx, :]
+            selectable_k = min(self.topk, bboxes_per_level)
+            _, topk_idxs_per_level = topk_(
+                distances_per_level, selectable_k, axis=0, largest=False)
+            candidate_idxs.append(topk_idxs_per_level + start_idx)
+            start_idx = end_idx
+        candidate_idxs = np.concatenate(candidate_idxs, axis=0)
+
+        # get corresponding iou for the these candidates, and compute the
+        # mean and std, set mean + std as the iou threshold
+        candidate_overlaps = overlaps[candidate_idxs, np.arange(num_gt)]
+        overlaps_mean_per_gt = candidate_overlaps.mean(0)
+        overlaps_std_per_gt = candidate_overlaps.std(0)
+        overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
+
+        is_pos = candidate_overlaps >= overlaps_thr_per_gt[None, :]
+
+        # limit the positive sample's center in gt
+        for gt_idx in range(num_gt):
+            candidate_idxs[:, gt_idx] += gt_idx * num_bboxes
+        ep_bboxes_cx = np.broadcast_to(
+            bboxes_cx.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1)
+        ep_bboxes_cy = np.broadcast_to(
+            bboxes_cy.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1)
+        candidate_idxs = candidate_idxs.reshape(-1)
+
+        # calculate the left, top, right, bottom distance between positive
+        # bbox center and gt side
+        l_ = ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 0]
+        t_ = ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 1]
+        r_ = gt_bboxes[:, 2] - ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt)
+        b_ = gt_bboxes[:, 3] - ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt)
+        is_in_gts = np.stack([l_, t_, r_, b_], axis=1).min(axis=1) > 0.01
+        is_pos = is_pos & is_in_gts
+
+        # if an anchor box is assigned to multiple gts,
+        # the one with the highest IoU will be selected.
+        overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1)
+        index = candidate_idxs.reshape(-1)[is_pos.reshape(-1)]
+        overlaps_inf[index] = overlaps.T.reshape(-1)[index]
+        overlaps_inf = overlaps_inf.reshape(num_gt, -1).T
+
+        max_overlaps = overlaps_inf.max(axis=1)
+        argmax_overlaps = overlaps_inf.argmax(axis=1)
+        assigned_gt_inds[max_overlaps !=
+                         -np.inf] = argmax_overlaps[max_overlaps != -np.inf] + 1
+
+        return assigned_gt_inds, max_overlaps
+
+    def get_vlr_region(self,
+                       bboxes,
+                       num_level_bboxes,
+                       gt_bboxes,
+                       gt_bboxes_ignore=None,
+                       gt_labels=None):
+        """get vlr region for ld distillation.
+        Args:
+            bboxes (np.array): Bounding boxes to be assigned, shape(n, 4).
+            num_level_bboxes (List): num of bboxes in each level
+            gt_bboxes (np.array): Groundtruth boxes, shape (k, 4).
+            gt_bboxes_ignore (np.array, optional): Ground truth bboxes that are
+                labelled as `ignored`, e.g., crowd boxes in COCO.
+            gt_labels (np.array, optional): Label of gt_bboxes, shape (k, ).
+        """
+        bboxes = bboxes[:, :4]
+
+        num_gt, num_bboxes = gt_bboxes.shape[0], bboxes.shape[0]
+
+        # compute iou between all bbox and gt
+        overlaps = bbox_overlaps(bboxes, gt_bboxes)
+
+        # compute diou between all bbox and gt
+        diou = bbox_overlaps(bboxes, gt_bboxes, mode='diou')
+
+        # assign 0 by default
+        assigned_gt_inds = np.zeros((num_bboxes, ), dtype=np.int64)
+
+        vlr_region_iou = (assigned_gt_inds + 0).astype(np.float32)
+
+        if num_gt == 0 or num_bboxes == 0:
+            # No ground truth or boxes, return empty assignment
+            max_overlaps = np.zeros((num_bboxes, ))
+            if num_gt == 0:
+                # No truth, assign everything to background
+                assigned_gt_inds[:] = 0
+            if not np.any(gt_labels):
+                assigned_labels = None
+            else:
+                assigned_labels = -np.ones((num_bboxes, ), dtype=np.int64)
+            return assigned_gt_inds, max_overlaps
+
+        # compute center distance between all bbox and gt
+        gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
+        gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
+        gt_points = np.stack((gt_cx, gt_cy), axis=1)
+
+        bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0
+        bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0
+        bboxes_points = np.stack((bboxes_cx, bboxes_cy), axis=1)
+
+        distances = np.sqrt(
+            np.power((bboxes_points[:, None, :] - gt_points[None, :, :]), 2)
+            .sum(-1))
+
+        # Selecting candidates based on the center distance
+        candidate_idxs = []
+        candidate_idxs_t = []
+        start_idx = 0
+        for bboxes_per_level in num_level_bboxes:
+            # on each pyramid level, for each gt,
+            # select k bbox whose center are closest to the gt center
+            end_idx = start_idx + bboxes_per_level
+            distances_per_level = distances[start_idx:end_idx, :]
+            selectable_t = min(self.topk, bboxes_per_level)
+            selectable_k = bboxes_per_level  #k for all
+            _, topt_idxs_per_level = topk_(
+                distances_per_level, selectable_t, axis=0, largest=False)
+            _, topk_idxs_per_level = topk_(
+                distances_per_level, selectable_k, axis=0, largest=False)
+            candidate_idxs_t.append(topt_idxs_per_level + start_idx)
+            candidate_idxs.append(topk_idxs_per_level + start_idx)
+            start_idx = end_idx
+
+        candidate_idxs_t = np.concatenate(candidate_idxs_t, axis=0)
+        candidate_idxs = np.concatenate(candidate_idxs, axis=0)
+
+        # get corresponding iou for the these candidates, and compute the
+        # mean and std, set mean + std as the iou threshold
+        candidate_overlaps_t = overlaps[candidate_idxs_t, np.arange(num_gt)]
+
+        # compute tdiou
+        t_diou = diou[candidate_idxs, np.arange(num_gt)]
+
+        overlaps_mean_per_gt = candidate_overlaps_t.mean(0)
+        overlaps_std_per_gt = candidate_overlaps_t.std(
+            0, ddof=1)  # NOTE: use Bessel correction
+        overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
+
+        # compute region        
+        is_pos = (t_diou < overlaps_thr_per_gt[None, :]) & (
+            t_diou >= 0.25 * overlaps_thr_per_gt[None, :])
+
+        # limit the positive sample's center in gt
+        for gt_idx in range(num_gt):
+            candidate_idxs[:, gt_idx] += gt_idx * num_bboxes
+
+        candidate_idxs = candidate_idxs.reshape(-1)
+
+        # if an anchor box is assigned to multiple gts,
+        # the one with the highest IoU will be selected.
+        overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1)
+        index = candidate_idxs.reshape(-1)[is_pos.reshape(-1)]
+
+        overlaps_inf[index] = overlaps.T.reshape(-1)[index]
+        overlaps_inf = overlaps_inf.reshape(num_gt, -1).T
+
+        max_overlaps = overlaps_inf.max(axis=1)
+        argmax_overlaps = overlaps_inf.argmax(axis=1)
+
+        overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1)
+        overlaps_inf = overlaps_inf.reshape(num_gt, -1).T
+
+        assigned_gt_inds[max_overlaps !=
+                         -np.inf] = argmax_overlaps[max_overlaps != -np.inf] + 1
+
+        vlr_region_iou[max_overlaps !=
+                       -np.inf] = max_overlaps[max_overlaps != -np.inf] + 0
+
+        return vlr_region_iou
--- a/paddle_detection/ppdet/data/transform/autoaugment_utils.py
+++ b/paddle_detection/ppdet/data/transform/autoaugment_utils.py
--- a/paddle_detection/ppdet/data/transform/batch_operators.py
+++ b/paddle_detection/ppdet/data/transform/batch_operators.py
--- a/paddle_detection/ppdet/data/transform/culane_operators.py
+++ b/paddle_detection/ppdet/data/transform/culane_operators.py
@@ -0,0 +1,366 @@
+import numpy as np
+import imgaug.augmenters as iaa
+from .operators import BaseOperator, register_op
+from ppdet.utils.logger import setup_logger
+from ppdet.data.culane_utils import linestrings_to_lanes, transform_annotation
+
+logger = setup_logger(__name__)
+
+__all__ = [
+    "CULaneTrainProcess", "CULaneDataProcess", "HorizontalFlip",
+    "ChannelShuffle", "CULaneAffine", "CULaneResize", "OneOfBlur",
+    "MultiplyAndAddToBrightness", "AddToHueAndSaturation"
+]
+
+
+def trainTransforms(img_h, img_w):
+    transforms = [{
+        'name': 'Resize',
+        'parameters': dict(size=dict(
+            height=img_h, width=img_w)),
+        'p': 1.0
+    }, {
+        'name': 'HorizontalFlip',
+        'parameters': dict(p=1.0),
+        'p': 0.5
+    }, {
+        'name': 'ChannelShuffle',
+        'parameters': dict(p=1.0),
+        'p': 0.1
+    }, {
+        'name': 'MultiplyAndAddToBrightness',
+        'parameters': dict(
+            mul=(0.85, 1.15), add=(-10, 10)),
+        'p': 0.6
+    }, {
+        'name': 'AddToHueAndSaturation',
+        'parameters': dict(value=(-10, 10)),
+        'p': 0.7
+    }, {
+        'name': 'OneOf',
+        'transforms': [
+            dict(
+                name='MotionBlur', parameters=dict(k=(3, 5))), dict(
+                    name='MedianBlur', parameters=dict(k=(3, 5)))
+        ],
+        'p': 0.2
+    }, {
+        'name': 'Affine',
+        'parameters': dict(
+            translate_percent=dict(
+                x=(-0.1, 0.1), y=(-0.1, 0.1)),
+            rotate=(-10, 10),
+            scale=(0.8, 1.2)),
+        'p': 0.7
+    }, {
+        'name': 'Resize',
+        'parameters': dict(size=dict(
+            height=img_h, width=img_w)),
+        'p': 1.0
+    }]
+    return transforms
+
+
+@register_op
+class CULaneTrainProcess(BaseOperator):
+    def __init__(self, img_w, img_h):
+        super(CULaneTrainProcess, self).__init__()
+        self.img_w = img_w
+        self.img_h = img_h
+        self.transforms = trainTransforms(self.img_h, self.img_w)
+
+        if self.transforms is not None:
+            img_transforms = []
+            for aug in self.transforms:
+                p = aug['p']
+                if aug['name'] != 'OneOf':
+                    img_transforms.append(
+                        iaa.Sometimes(
+                            p=p,
+                            then_list=getattr(iaa, aug['name'])(**aug[
+                                'parameters'])))
+                else:
+                    img_transforms.append(
+                        iaa.Sometimes(
+                            p=p,
+                            then_list=iaa.OneOf([
+                                getattr(iaa, aug_['name'])(**aug_['parameters'])
+                                for aug_ in aug['transforms']
+                            ])))
+        else:
+            img_transforms = []
+        self.iaa_transform = iaa.Sequential(img_transforms)
+
+    def apply(self, sample, context=None):
+        img, line_strings, seg = self.iaa_transform(
+            image=sample['image'],
+            line_strings=sample['lanes'],
+            segmentation_maps=sample['mask'])
+        sample['image'] = img
+        sample['lanes'] = line_strings
+        sample['mask'] = seg
+        return sample
+
+
+@register_op
+class CULaneDataProcess(BaseOperator):
+    def __init__(self, img_w, img_h, num_points, max_lanes):
+        super(CULaneDataProcess, self).__init__()
+        self.img_w = img_w
+        self.img_h = img_h
+        self.num_points = num_points
+        self.n_offsets = num_points
+        self.n_strips = num_points - 1
+        self.strip_size = self.img_h / self.n_strips
+
+        self.max_lanes = max_lanes
+        self.offsets_ys = np.arange(self.img_h, -1, -self.strip_size)
+
+    def apply(self, sample, context=None):
+        data = {}
+        line_strings = sample['lanes']
+        line_strings.clip_out_of_image_()
+        new_anno = {'lanes': linestrings_to_lanes(line_strings)}
+
+        for i in range(30):
+            try:
+                annos = transform_annotation(
+                    self.img_w, self.img_h, self.max_lanes, self.n_offsets,
+                    self.offsets_ys, self.n_strips, self.strip_size, new_anno)
+                label = annos['label']
+                lane_endpoints = annos['lane_endpoints']
+                break
+            except:
+                if (i + 1) == 30:
+                    logger.critical('Transform annotation failed 30 times :(')
+                    exit()
+
+        sample['image'] = sample['image'].astype(np.float32) / 255.
+        data['image'] = sample['image'].transpose(2, 0, 1)
+        data['lane_line'] = label
+        data['seg'] = sample['seg']
+        data['full_img_path'] = sample['full_img_path']
+        data['img_name'] = sample['img_name']
+        data['im_id'] = sample['im_id']
+
+        if 'mask' in sample.keys():
+            data['seg'] = sample['mask'].get_arr()
+
+        data['im_shape'] = np.array([self.img_w, self.img_h], dtype=np.float32)
+        data['scale_factor'] = np.array([1., 1.], dtype=np.float32)
+
+        return data
+
+
+@register_op
+class CULaneResize(BaseOperator):
+    def __init__(self, img_h, img_w, prob=0.5):
+        super(CULaneResize, self).__init__()
+        self.img_h = img_h
+        self.img_w = img_w
+        self.prob = prob
+
+    def apply(self, sample, context=None):
+        transform = iaa.Sometimes(self.prob,
+                                  iaa.Resize({
+                                      "height": self.img_h,
+                                      "width": self.img_w
+                                  }))
+        if 'mask' in sample.keys():
+            img, line_strings, seg = transform(
+                image=sample['image'],
+                line_strings=sample['lanes'],
+                segmentation_maps=sample['mask'])
+            sample['image'] = img
+            sample['lanes'] = line_strings
+            sample['mask'] = seg
+        else:
+            img, line_strings = transform(
+                image=sample['image'].copy().astype(np.uint8),
+                line_strings=sample['lanes'])
+            sample['image'] = img
+            sample['lanes'] = line_strings
+
+        return sample
+
+
+@register_op
+class HorizontalFlip(BaseOperator):
+    def __init__(self, prob=0.5):
+        super(HorizontalFlip, self).__init__()
+        self.prob = prob
+
+    def apply(self, sample, context=None):
+        transform = iaa.Sometimes(self.prob, iaa.HorizontalFlip(1.0))
+        if 'mask' in sample.keys():
+            img, line_strings, seg = transform(
+                image=sample['image'],
+                line_strings=sample['lanes'],
+                segmentation_maps=sample['mask'])
+            sample['image'] = img
+            sample['lanes'] = line_strings
+            sample['mask'] = seg
+        else:
+            img, line_strings = transform(
+                image=sample['image'], line_strings=sample['lanes'])
+            sample['image'] = img
+            sample['lanes'] = line_strings
+
+        return sample
+
+
+@register_op
+class ChannelShuffle(BaseOperator):
+    def __init__(self, prob=0.1):
+        super(ChannelShuffle, self).__init__()
+        self.prob = prob
+
+    def apply(self, sample, context=None):
+        transform = iaa.Sometimes(self.prob, iaa.ChannelShuffle(1.0))
+        if 'mask' in sample.keys():
+            img, line_strings, seg = transform(
+                image=sample['image'],
+                line_strings=sample['lanes'],
+                segmentation_maps=sample['mask'])
+            sample['image'] = img
+            sample['lanes'] = line_strings
+            sample['mask'] = seg
+        else:
+            img, line_strings = transform(
+                image=sample['image'], line_strings=sample['lanes'])
+            sample['image'] = img
+            sample['lanes'] = line_strings
+
+        return sample
+
+
+@register_op
+class MultiplyAndAddToBrightness(BaseOperator):
+    def __init__(self, mul=(0.85, 1.15), add=(-10, 10), prob=0.5):
+        super(MultiplyAndAddToBrightness, self).__init__()
+        self.mul = tuple(mul)
+        self.add = tuple(add)
+        self.prob = prob
+
+    def apply(self, sample, context=None):
+        transform = iaa.Sometimes(
+            self.prob,
+            iaa.MultiplyAndAddToBrightness(
+                mul=self.mul, add=self.add))
+        if 'mask' in sample.keys():
+            img, line_strings, seg = transform(
+                image=sample['image'],
+                line_strings=sample['lanes'],
+                segmentation_maps=sample['mask'])
+            sample['image'] = img
+            sample['lanes'] = line_strings
+            sample['mask'] = seg
+        else:
+            img, line_strings = transform(
+                image=sample['image'], line_strings=sample['lanes'])
+            sample['image'] = img
+            sample['lanes'] = line_strings
+
+        return sample
+
+
+@register_op
+class AddToHueAndSaturation(BaseOperator):
+    def __init__(self, value=(-10, 10), prob=0.5):
+        super(AddToHueAndSaturation, self).__init__()
+        self.value = tuple(value)
+        self.prob = prob
+
+    def apply(self, sample, context=None):
+        transform = iaa.Sometimes(
+            self.prob, iaa.AddToHueAndSaturation(value=self.value))
+        if 'mask' in sample.keys():
+            img, line_strings, seg = transform(
+                image=sample['image'],
+                line_strings=sample['lanes'],
+                segmentation_maps=sample['mask'])
+            sample['image'] = img
+            sample['lanes'] = line_strings
+            sample['mask'] = seg
+        else:
+            img, line_strings = transform(
+                image=sample['image'], line_strings=sample['lanes'])
+            sample['image'] = img
+            sample['lanes'] = line_strings
+
+        return sample
+
+
+@register_op
+class OneOfBlur(BaseOperator):
+    def __init__(self, MotionBlur_k=(3, 5), MedianBlur_k=(3, 5), prob=0.5):
+        super(OneOfBlur, self).__init__()
+        self.MotionBlur_k = tuple(MotionBlur_k)
+        self.MedianBlur_k = tuple(MedianBlur_k)
+        self.prob = prob
+
+    def apply(self, sample, context=None):
+        transform = iaa.Sometimes(
+            self.prob,
+            iaa.OneOf([
+                iaa.MotionBlur(k=self.MotionBlur_k),
+                iaa.MedianBlur(k=self.MedianBlur_k)
+            ]))
+
+        if 'mask' in sample.keys():
+            img, line_strings, seg = transform(
+                image=sample['image'],
+                line_strings=sample['lanes'],
+                segmentation_maps=sample['mask'])
+            sample['image'] = img
+            sample['lanes'] = line_strings
+            sample['mask'] = seg
+        else:
+            img, line_strings = transform(
+                image=sample['image'], line_strings=sample['lanes'])
+            sample['image'] = img
+            sample['lanes'] = line_strings
+
+        return sample
+
+
+@register_op
+class CULaneAffine(BaseOperator):
+    def __init__(self,
+                 translate_percent_x=(-0.1, 0.1),
+                 translate_percent_y=(-0.1, 0.1),
+                 rotate=(3, 5),
+                 scale=(0.8, 1.2),
+                 prob=0.5):
+        super(CULaneAffine, self).__init__()
+        self.translate_percent = {
+            'x': tuple(translate_percent_x),
+            'y': tuple(translate_percent_y)
+        }
+        self.rotate = tuple(rotate)
+        self.scale = tuple(scale)
+        self.prob = prob
+
+    def apply(self, sample, context=None):
+        transform = iaa.Sometimes(
+            self.prob,
+            iaa.Affine(
+                translate_percent=self.translate_percent,
+                rotate=self.rotate,
+                scale=self.scale))
+
+        if 'mask' in sample.keys():
+            img, line_strings, seg = transform(
+                image=sample['image'],
+                line_strings=sample['lanes'],
+                segmentation_maps=sample['mask'])
+            sample['image'] = img
+            sample['lanes'] = line_strings
+            sample['mask'] = seg
+        else:
+            img, line_strings = transform(
+                image=sample['image'], line_strings=sample['lanes'])
+            sample['image'] = img
+            sample['lanes'] = line_strings
+
+        return sample
--- a/paddle_detection/ppdet/data/transform/gridmask_utils.py
+++ b/paddle_detection/ppdet/data/transform/gridmask_utils.py
@@ -0,0 +1,86 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# The code is based on:
+# https://github.com/dvlab-research/GridMask/blob/master/detection_grid/maskrcnn_benchmark/data/transforms/grid.py
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import numpy as np
+from PIL import Image
+
+
+class Gridmask(object):
+    def __init__(self,
+                 use_h=True,
+                 use_w=True,
+                 rotate=1,
+                 offset=False,
+                 ratio=0.5,
+                 mode=1,
+                 prob=0.7,
+                 upper_iter=360000):
+        super(Gridmask, self).__init__()
+        self.use_h = use_h
+        self.use_w = use_w
+        self.rotate = rotate
+        self.offset = offset
+        self.ratio = ratio
+        self.mode = mode
+        self.prob = prob
+        self.st_prob = prob
+        self.upper_iter = upper_iter
+
+    def __call__(self, x, curr_iter):
+        self.prob = self.st_prob * min(1, 1.0 * curr_iter / self.upper_iter)
+        if np.random.rand() > self.prob:
+            return x
+        h, w, _ = x.shape
+        hh = int(1.5 * h)
+        ww = int(1.5 * w)
+        d = np.random.randint(2, h)
+        self.l = min(max(int(d * self.ratio + 0.5), 1), d - 1)
+        mask = np.ones((hh, ww), np.float32)
+        st_h = np.random.randint(d)
+        st_w = np.random.randint(d)
+        if self.use_h:
+            for i in range(hh // d):
+                s = d * i + st_h
+                t = min(s + self.l, hh)
+                mask[s:t, :] *= 0
+        if self.use_w:
+            for i in range(ww // d):
+                s = d * i + st_w
+                t = min(s + self.l, ww)
+                mask[:, s:t] *= 0
+
+        r = np.random.randint(self.rotate)
+        mask = Image.fromarray(np.uint8(mask))
+        mask = mask.rotate(r)
+        mask = np.asarray(mask)
+        mask = mask[(hh - h) // 2:(hh - h) // 2 + h, (ww - w) // 2:(ww - w) // 2
+                    + w].astype(np.float32)
+
+        if self.mode == 1:
+            mask = 1 - mask
+        mask = np.expand_dims(mask, axis=-1)
+        if self.offset:
+            offset = (2 * (np.random.rand(h, w) - 0.5)).astype(np.float32)
+            x = (x * mask + offset * (1 - mask)).astype(x.dtype)
+        else:
+            x = (x * mask).astype(x.dtype)
+
+        return x
--- a/paddle_detection/ppdet/data/transform/keypoint_operators.py
+++ b/paddle_detection/ppdet/data/transform/keypoint_operators.py
--- a/paddle_detection/ppdet/data/transform/keypoints_3d_operators.py
+++ b/paddle_detection/ppdet/data/transform/keypoints_3d_operators.py
@@ -0,0 +1,296 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+
+try:
+    from collections.abc import Sequence
+except Exception:
+    from collections import Sequence
+import cv2
+import numpy as np
+import math
+import copy
+import random
+import uuid
+from numbers import Number, Integral
+
+from ...modeling.keypoint_utils import get_affine_mat_kernel, warp_affine_joints, get_affine_transform, affine_transform, get_warp_matrix
+from ppdet.core.workspace import serializable
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+registered_ops = []
+
+__all__ = [
+    'CropAndFlipImages', 'PermuteImages', 'RandomFlipHalfBody3DTransformImages'
+]
+
+import matplotlib.pyplot as plt
+from PIL import Image, ImageDraw
+from mpl_toolkits.mplot3d import Axes3D
+
+
+def register_keypointop(cls):
+    return serializable(cls)
+
+
+def register_op(cls):
+    registered_ops.append(cls.__name__)
+    if not hasattr(BaseOperator, cls.__name__):
+        setattr(BaseOperator, cls.__name__, cls)
+    else:
+        raise KeyError("The {} class has been registered.".format(cls.__name__))
+    return serializable(cls)
+
+
+class BaseOperator(object):
+    def __init__(self, name=None):
+        if name is None:
+            name = self.__class__.__name__
+        self._id = name + '_' + str(uuid.uuid4())[-6:]
+
+    def apply(self, sample, context=None):
+        """ Process a sample.
+        Args:
+            sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
+            context (dict): info about this sample processing
+        Returns:
+            result (dict): a processed sample
+        """
+        return sample
+
+    def __call__(self, sample, context=None):
+        """ Process a sample.
+        Args:
+            sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
+            context (dict): info about this sample processing
+        Returns:
+            result (dict): a processed sample
+        """
+        if isinstance(sample, Sequence):  # for batch_size
+            for i in range(len(sample)):
+                sample[i] = self.apply(sample[i], context)
+        else:
+            # image.shape changed
+            sample = self.apply(sample, context)
+        return sample
+
+    def __str__(self):
+        return str(self._id)
+
+
+@register_keypointop
+class CropAndFlipImages(object):
+    """Crop all images"""
+
+    def __init__(self, crop_range, flip_pairs=None):
+        super(CropAndFlipImages, self).__init__()
+        self.crop_range = crop_range
+        self.flip_pairs = flip_pairs
+
+    def __call__(self, records):  # tuple
+        images = records["image"]
+        images = images[:, :, ::-1, :]
+        images = images[:, :, self.crop_range[0]:self.crop_range[1]]
+        records["image"] = images
+
+        if "kps2d" in records.keys():
+            kps2d = records["kps2d"]
+
+            width, height = images.shape[2], images.shape[1]
+            kps2d = np.array(kps2d)
+            kps2d[:, :, 0] = kps2d[:, :, 0] - self.crop_range[0]
+
+            for pair in self.flip_pairs:
+                kps2d[:, pair[0], :], kps2d[:,pair[1], :] = \
+                    kps2d[:,pair[1], :], kps2d[:,pair[0], :].copy()
+
+            records["kps2d"] = kps2d
+
+        return records
+
+
+@register_op
+class PermuteImages(BaseOperator):
+    def __init__(self):
+        """
+        Change the channel to be (batch_size, C, H, W) #(6, 3, 1080, 1920)
+        """
+        super(PermuteImages, self).__init__()
+
+    def apply(self, sample, context=None):
+        images = sample["image"]
+        images = images.transpose((0, 3, 1, 2))
+
+        sample["image"] = images
+
+        return sample
+
+
+@register_keypointop
+class RandomFlipHalfBody3DTransformImages(object):
+    """apply data augment to images and coords
+    to achieve the flip, scale, rotate and half body transform effect for training image
+    Args:
+        trainsize (list):[w, h], Image target size
+        upper_body_ids (list): The upper body joint ids
+        flip_pairs (list): The left-right joints exchange order list
+        pixel_std (int): The pixel std of the scale
+        scale (float): The scale factor to transform the image
+        rot (int): The rotate factor to transform the image
+        num_joints_half_body (int): The joints threshold of the half body transform
+        prob_half_body (float): The threshold of the half body transform
+        flip (bool): Whether to flip the image
+    Returns:
+        records(dict): contain the image and coords after tranformed
+    """
+
+    def __init__(self,
+                 trainsize,
+                 upper_body_ids,
+                 flip_pairs,
+                 pixel_std,
+                 scale=0.35,
+                 rot=40,
+                 num_joints_half_body=8,
+                 prob_half_body=0.3,
+                 flip=True,
+                 rot_prob=0.6,
+                 do_occlusion=False):
+        super(RandomFlipHalfBody3DTransformImages, self).__init__()
+        self.trainsize = trainsize
+        self.upper_body_ids = upper_body_ids
+        self.flip_pairs = flip_pairs
+        self.pixel_std = pixel_std
+        self.scale = scale
+        self.rot = rot
+        self.num_joints_half_body = num_joints_half_body
+        self.prob_half_body = prob_half_body
+        self.flip = flip
+        self.aspect_ratio = trainsize[0] * 1.0 / trainsize[1]
+        self.rot_prob = rot_prob
+        self.do_occlusion = do_occlusion
+
+    def halfbody_transform(self, joints, joints_vis):
+        upper_joints = []
+        lower_joints = []
+        for joint_id in range(joints.shape[0]):
+            if joints_vis[joint_id][0] > 0:
+                if joint_id in self.upper_body_ids:
+                    upper_joints.append(joints[joint_id])
+                else:
+                    lower_joints.append(joints[joint_id])
+        if np.random.randn() < 0.5 and len(upper_joints) > 2:
+            selected_joints = upper_joints
+        else:
+            selected_joints = lower_joints if len(
+                lower_joints) > 2 else upper_joints
+        if len(selected_joints) < 2:
+            return None, None
+        selected_joints = np.array(selected_joints, dtype=np.float32)
+        center = selected_joints.mean(axis=0)[:2]
+        left_top = np.amin(selected_joints, axis=0)
+        right_bottom = np.amax(selected_joints, axis=0)
+        w = right_bottom[0] - left_top[0]
+        h = right_bottom[1] - left_top[1]
+        if w > self.aspect_ratio * h:
+            h = w * 1.0 / self.aspect_ratio
+        elif w < self.aspect_ratio * h:
+            w = h * self.aspect_ratio
+        scale = np.array(
+            [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
+            dtype=np.float32)
+        scale = scale * 1.5
+
+        return center, scale
+
+    def flip_joints(self, joints, joints_vis, width, matched_parts, kps2d=None):
+        # joints: (6, 24, 3),(num_frames, num_joints, 3)
+
+        joints[:, :, 0] = width - joints[:, :, 0] - 1  # x
+        if kps2d is not None:
+            kps2d[:, :, 0] = width - kps2d[:, :, 0] - 1
+
+        for pair in matched_parts:
+            joints[:, pair[0], :], joints[:,pair[1], :] = \
+                joints[:,pair[1], :], joints[:,pair[0], :].copy()
+
+            joints_vis[:,pair[0], :], joints_vis[:,pair[1], :] = \
+                joints_vis[:,pair[1], :], joints_vis[:,pair[0], :].copy()
+
+            if kps2d is not None:
+                kps2d[:, pair[0], :], kps2d[:,pair[1], :] = \
+                    kps2d[:,pair[1], :], kps2d[:,pair[0], :].copy()
+
+        # move to zero
+        joints -= joints[:, [0], :]  # (batch_size, 24, 3),numpy.ndarray
+
+        return joints, joints_vis, kps2d
+
+    def __call__(self, records):
+        images = records[
+            'image']  #kps3d, kps3d_vis, images. images.shape(num_frames, width, height, 3)
+
+        joints = records['kps3d']
+        joints_vis = records['kps3d_vis']
+
+        kps2d = None
+        if 'kps2d' in records.keys():
+            kps2d = records['kps2d']
+
+        if self.flip and np.random.random() <= 0.5:
+            images = images[:, :, ::-1, :]  # 图像水平翻转 (6, 1080, 810, 3)
+            joints, joints_vis, kps2d = self.flip_joints(
+                joints, joints_vis, images.shape[2], self.flip_pairs,
+                kps2d)  # 关键点左右对称翻转
+        occlusion = False
+        if self.do_occlusion and random.random() <= 0.5:  # 随机遮挡
+            height = images[0].shape[0]
+            width = images[0].shape[1]
+            occlusion = True
+            while True:
+                area_min = 0.0
+                area_max = 0.2
+                synth_area = (random.random() *
+                              (area_max - area_min) + area_min) * width * height
+
+                ratio_min = 0.3
+                ratio_max = 1 / 0.3
+                synth_ratio = (random.random() *
+                               (ratio_max - ratio_min) + ratio_min)
+
+                synth_h = math.sqrt(synth_area * synth_ratio)
+                synth_w = math.sqrt(synth_area / synth_ratio)
+                synth_xmin = random.random() * (width - synth_w - 1)
+                synth_ymin = random.random() * (height - synth_h - 1)
+
+                if synth_xmin >= 0 and synth_ymin >= 0 and synth_xmin + synth_w < width and synth_ymin + synth_h < height:
+                    xmin = int(synth_xmin)
+                    ymin = int(synth_ymin)
+                    w = int(synth_w)
+                    h = int(synth_h)
+
+                    mask = np.random.rand(h, w, 3) * 255
+                    images[:, ymin:ymin + h, xmin:xmin + w, :] = mask[
+                        None, :, :, :]
+                    break
+
+        records['image'] = images
+        records['kps3d'] = joints
+        records['kps3d_vis'] = joints_vis
+        if kps2d is not None:
+            records['kps2d'] = kps2d
+
+        return records
--- a/paddle_detection/ppdet/data/transform/mot_operators.py
+++ b/paddle_detection/ppdet/data/transform/mot_operators.py
@@ -0,0 +1,627 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+try:
+    from collections.abc import Sequence
+except Exception:
+    from collections import Sequence
+from numbers import Integral
+
+import cv2
+import copy
+import numpy as np
+import random
+import math
+
+from .operators import BaseOperator, register_op
+from .batch_operators import Gt2TTFTarget
+from ppdet.modeling.bbox_utils import bbox_iou_np_expand
+from ppdet.utils.logger import setup_logger
+from .op_helper import gaussian_radius
+logger = setup_logger(__name__)
+
+__all__ = [
+    'RGBReverse', 'LetterBoxResize', 'MOTRandomAffine', 'Gt2JDETargetThres',
+    'Gt2JDETargetMax', 'Gt2FairMOTTarget'
+]
+
+
+@register_op
+class RGBReverse(BaseOperator):
+    """RGB to BGR, or BGR to RGB, sensitive to MOTRandomAffine
+    """
+
+    def __init__(self):
+        super(RGBReverse, self).__init__()
+
+    def apply(self, sample, context=None):
+        im = sample['image']
+        sample['image'] = np.ascontiguousarray(im[:, :, ::-1])
+        return sample
+
+
+@register_op
+class LetterBoxResize(BaseOperator):
+    def __init__(self, target_size):
+        """
+        Resize image to target size, convert normalized xywh to pixel xyxy
+        format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
+        Args:
+            target_size (int|list): image target size.
+        """
+        super(LetterBoxResize, self).__init__()
+        if not isinstance(target_size, (Integral, Sequence)):
+            raise TypeError(
+                "Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
+                format(type(target_size)))
+        if isinstance(target_size, Integral):
+            target_size = [target_size, target_size]
+        self.target_size = target_size
+
+    def apply_image(self, img, height, width, color=(127.5, 127.5, 127.5)):
+        # letterbox: resize a rectangular image to a padded rectangular
+        shape = img.shape[:2]  # [height, width]
+        ratio_h = float(height) / shape[0]
+        ratio_w = float(width) / shape[1]
+        ratio = min(ratio_h, ratio_w)
+        new_shape = (round(shape[1] * ratio),
+                     round(shape[0] * ratio))  # [width, height]
+        padw = (width - new_shape[0]) / 2
+        padh = (height - new_shape[1]) / 2
+        top, bottom = round(padh - 0.1), round(padh + 0.1)
+        left, right = round(padw - 0.1), round(padw + 0.1)
+
+        img = cv2.resize(
+            img, new_shape, interpolation=cv2.INTER_AREA)  # resized, no border
+        img = cv2.copyMakeBorder(
+            img, top, bottom, left, right, cv2.BORDER_CONSTANT,
+            value=color)  # padded rectangular
+        return img, ratio, padw, padh
+
+    def apply_bbox(self, bbox0, h, w, ratio, padw, padh):
+        bboxes = bbox0.copy()
+        bboxes[:, 0] = ratio * w * (bbox0[:, 0] - bbox0[:, 2] / 2) + padw
+        bboxes[:, 1] = ratio * h * (bbox0[:, 1] - bbox0[:, 3] / 2) + padh
+        bboxes[:, 2] = ratio * w * (bbox0[:, 0] + bbox0[:, 2] / 2) + padw
+        bboxes[:, 3] = ratio * h * (bbox0[:, 1] + bbox0[:, 3] / 2) + padh
+        return bboxes
+
+    def apply(self, sample, context=None):
+        """ Resize the image numpy.
+        """
+        im = sample['image']
+        h, w = sample['im_shape']
+        if not isinstance(im, np.ndarray):
+            raise TypeError("{}: image type is not numpy.".format(self))
+        if len(im.shape) != 3:
+            from PIL import UnidentifiedImageError
+            raise UnidentifiedImageError(
+                '{}: image is not 3-dimensional.'.format(self))
+
+        # apply image
+        height, width = self.target_size
+        img, ratio, padw, padh = self.apply_image(
+            im, height=height, width=width)
+
+        sample['image'] = img
+        new_shape = (round(h * ratio), round(w * ratio))
+        sample['im_shape'] = np.asarray(new_shape, dtype=np.float32)
+        sample['scale_factor'] = np.asarray([ratio, ratio], dtype=np.float32)
+
+        # apply bbox
+        if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
+            sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], h, w, ratio,
+                                                padw, padh)
+        return sample
+
+
+@register_op
+class MOTRandomAffine(BaseOperator):
+    """ 
+    Affine transform to image and coords to achieve the rotate, scale and
+    shift effect for training image.
+
+    Args:
+        degrees (list[2]): the rotate range to apply, transform range is [min, max]
+        translate (list[2]): the translate range to apply, transform range is [min, max]
+        scale (list[2]): the scale range to apply, transform range is [min, max]
+        shear (list[2]): the shear range to apply, transform range is [min, max]
+        borderValue (list[3]): value used in case of a constant border when appling
+            the perspective transformation
+        reject_outside (bool): reject warped bounding bboxes outside of image
+
+    Returns:
+        records(dict): contain the image and coords after tranformed
+
+    """
+
+    def __init__(self,
+                 degrees=(-5, 5),
+                 translate=(0.10, 0.10),
+                 scale=(0.50, 1.20),
+                 shear=(-2, 2),
+                 borderValue=(127.5, 127.5, 127.5),
+                 reject_outside=True):
+        super(MOTRandomAffine, self).__init__()
+        self.degrees = degrees
+        self.translate = translate
+        self.scale = scale
+        self.shear = shear
+        self.borderValue = borderValue
+        self.reject_outside = reject_outside
+
+    def apply(self, sample, context=None):
+        # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
+        border = 0  # width of added border (optional)
+
+        img = sample['image']
+        height, width = img.shape[0], img.shape[1]
+
+        # Rotation and Scale
+        R = np.eye(3)
+        a = random.random() * (self.degrees[1] - self.degrees[0]
+                               ) + self.degrees[0]
+        s = random.random() * (self.scale[1] - self.scale[0]) + self.scale[0]
+        R[:2] = cv2.getRotationMatrix2D(
+            angle=a, center=(width / 2, height / 2), scale=s)
+
+        # Translation
+        T = np.eye(3)
+        T[0, 2] = (
+            random.random() * 2 - 1
+        ) * self.translate[0] * height + border  # x translation (pixels)
+        T[1, 2] = (
+            random.random() * 2 - 1
+        ) * self.translate[1] * width + border  # y translation (pixels)
+
+        # Shear
+        S = np.eye(3)
+        S[0, 1] = math.tan((random.random() *
+                            (self.shear[1] - self.shear[0]) + self.shear[0]) *
+                           math.pi / 180)  # x shear (deg)
+        S[1, 0] = math.tan((random.random() *
+                            (self.shear[1] - self.shear[0]) + self.shear[0]) *
+                           math.pi / 180)  # y shear (deg)
+
+        M = S @T @R  # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
+        imw = cv2.warpPerspective(
+            img,
+            M,
+            dsize=(width, height),
+            flags=cv2.INTER_LINEAR,
+            borderValue=self.borderValue)  # BGR order borderValue
+
+        if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
+            targets = sample['gt_bbox']
+            n = targets.shape[0]
+            points = targets.copy()
+            area0 = (points[:, 2] - points[:, 0]) * (
+                points[:, 3] - points[:, 1])
+
+            # warp points
+            xy = np.ones((n * 4, 3))
+            xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
+                n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
+            xy = (xy @M.T)[:, :2].reshape(n, 8)
+
+            # create new boxes
+            x = xy[:, [0, 2, 4, 6]]
+            y = xy[:, [1, 3, 5, 7]]
+            xy = np.concatenate(
+                (x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+
+            # apply angle-based reduction
+            radians = a * math.pi / 180
+            reduction = max(abs(math.sin(radians)), abs(math.cos(radians)))**0.5
+            x = (xy[:, 2] + xy[:, 0]) / 2
+            y = (xy[:, 3] + xy[:, 1]) / 2
+            w = (xy[:, 2] - xy[:, 0]) * reduction
+            h = (xy[:, 3] - xy[:, 1]) * reduction
+            xy = np.concatenate(
+                (x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
+
+            # reject warped points outside of image
+            if self.reject_outside:
+                np.clip(xy[:, 0], 0, width, out=xy[:, 0])
+                np.clip(xy[:, 2], 0, width, out=xy[:, 2])
+                np.clip(xy[:, 1], 0, height, out=xy[:, 1])
+                np.clip(xy[:, 3], 0, height, out=xy[:, 3])
+            w = xy[:, 2] - xy[:, 0]
+            h = xy[:, 3] - xy[:, 1]
+            area = w * h
+            ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
+            i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
+
+            if sum(i) > 0:
+                sample['gt_bbox'] = xy[i].astype(sample['gt_bbox'].dtype)
+                sample['gt_class'] = sample['gt_class'][i]
+                if 'difficult' in sample:
+                    sample['difficult'] = sample['difficult'][i]
+                if 'gt_ide' in sample:
+                    sample['gt_ide'] = sample['gt_ide'][i]
+                if 'is_crowd' in sample:
+                    sample['is_crowd'] = sample['is_crowd'][i]
+                sample['image'] = imw
+                return sample
+            else:
+                return sample
+
+
+@register_op
+class Gt2JDETargetThres(BaseOperator):
+    __shared__ = ['num_classes']
+    """
+    Generate JDE targets by groud truth data when training
+    Args:
+        anchors (list): anchors of JDE model
+        anchor_masks (list): anchor_masks of JDE model
+        downsample_ratios (list): downsample ratios of JDE model
+        ide_thresh (float): thresh of identity, higher is groud truth 
+        fg_thresh (float): thresh of foreground, higher is foreground
+        bg_thresh (float): thresh of background, lower is background
+        num_classes (int): number of classes
+    """
+
+    def __init__(self,
+                 anchors,
+                 anchor_masks,
+                 downsample_ratios,
+                 ide_thresh=0.5,
+                 fg_thresh=0.5,
+                 bg_thresh=0.4,
+                 num_classes=1):
+        super(Gt2JDETargetThres, self).__init__()
+        self.anchors = anchors
+        self.anchor_masks = anchor_masks
+        self.downsample_ratios = downsample_ratios
+        self.ide_thresh = ide_thresh
+        self.fg_thresh = fg_thresh
+        self.bg_thresh = bg_thresh
+        self.num_classes = num_classes
+
+    def generate_anchor(self, nGh, nGw, anchor_hw):
+        nA = len(anchor_hw)
+        yy, xx = np.meshgrid(np.arange(nGh), np.arange(nGw))
+
+        mesh = np.stack([xx.T, yy.T], axis=0)  # [2, nGh, nGw]
+        mesh = np.repeat(mesh[None, :], nA, axis=0)  # [nA, 2, nGh, nGw]
+
+        anchor_offset_mesh = anchor_hw[:, :, None][:, :, :, None]
+        anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGh, axis=-2)
+        anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGw, axis=-1)
+
+        anchor_mesh = np.concatenate(
+            [mesh, anchor_offset_mesh], axis=1)  # [nA, 4, nGh, nGw]
+        return anchor_mesh
+
+    def encode_delta(self, gt_box_list, fg_anchor_list):
+        px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \
+                        fg_anchor_list[:, 2], fg_anchor_list[:,3]
+        gx, gy, gw, gh = gt_box_list[:, 0], gt_box_list[:, 1], \
+                        gt_box_list[:, 2], gt_box_list[:, 3]
+        dx = (gx - px) / pw
+        dy = (gy - py) / ph
+        dw = np.log(gw / pw)
+        dh = np.log(gh / ph)
+        return np.stack([dx, dy, dw, dh], axis=1)
+
+    def pad_box(self, sample, num_max):
+        assert 'gt_bbox' in sample
+        bbox = sample['gt_bbox']
+        gt_num = len(bbox)
+        pad_bbox = np.zeros((num_max, 4), dtype=np.float32)
+        if gt_num > 0:
+            pad_bbox[:gt_num, :] = bbox[:gt_num, :]
+        sample['gt_bbox'] = pad_bbox
+        if 'gt_score' in sample:
+            pad_score = np.zeros((num_max, ), dtype=np.float32)
+            if gt_num > 0:
+                pad_score[:gt_num] = sample['gt_score'][:gt_num, 0]
+            sample['gt_score'] = pad_score
+        if 'difficult' in sample:
+            pad_diff = np.zeros((num_max, ), dtype=np.int32)
+            if gt_num > 0:
+                pad_diff[:gt_num] = sample['difficult'][:gt_num, 0]
+            sample['difficult'] = pad_diff
+        if 'is_crowd' in sample:
+            pad_crowd = np.zeros((num_max, ), dtype=np.int32)
+            if gt_num > 0:
+                pad_crowd[:gt_num] = sample['is_crowd'][:gt_num, 0]
+            sample['is_crowd'] = pad_crowd
+        if 'gt_ide' in sample:
+            pad_ide = np.zeros((num_max, ), dtype=np.int32)
+            if gt_num > 0:
+                pad_ide[:gt_num] = sample['gt_ide'][:gt_num, 0]
+            sample['gt_ide'] = pad_ide
+        return sample
+
+    def __call__(self, samples, context=None):
+        assert len(self.anchor_masks) == len(self.downsample_ratios), \
+            "anchor_masks', and 'downsample_ratios' should have same length."
+        h, w = samples[0]['image'].shape[1:3]
+
+        num_max = 0
+        for sample in samples:
+            num_max = max(num_max, len(sample['gt_bbox']))
+
+        for sample in samples:
+            gt_bbox = sample['gt_bbox']
+            gt_ide = sample['gt_ide']
+            for i, (anchor_hw, downsample_ratio
+                    ) in enumerate(zip(self.anchors, self.downsample_ratios)):
+                anchor_hw = np.array(
+                    anchor_hw, dtype=np.float32) / downsample_ratio
+                nA = len(anchor_hw)
+                nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio)
+                tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32)
+                tconf = np.zeros((nA, nGh, nGw), dtype=np.float32)
+                tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32)
+
+                gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy()
+                gxy[:, 0] = gxy[:, 0] * nGw
+                gxy[:, 1] = gxy[:, 1] * nGh
+                gwh[:, 0] = gwh[:, 0] * nGw
+                gwh[:, 1] = gwh[:, 1] * nGh
+                gxy[:, 0] = np.clip(gxy[:, 0], 0, nGw - 1)
+                gxy[:, 1] = np.clip(gxy[:, 1], 0, nGh - 1)
+                tboxes = np.concatenate([gxy, gwh], axis=1)
+
+                anchor_mesh = self.generate_anchor(nGh, nGw, anchor_hw)
+
+                anchor_list = np.transpose(anchor_mesh,
+                                           (0, 2, 3, 1)).reshape(-1, 4)
+                iou_pdist = bbox_iou_np_expand(
+                    anchor_list, tboxes, x1y1x2y2=False)
+
+                iou_max = np.max(iou_pdist, axis=1)
+                max_gt_index = np.argmax(iou_pdist, axis=1)
+
+                iou_map = iou_max.reshape(nA, nGh, nGw)
+                gt_index_map = max_gt_index.reshape(nA, nGh, nGw)
+
+                id_index = iou_map > self.ide_thresh
+                fg_index = iou_map > self.fg_thresh
+                bg_index = iou_map < self.bg_thresh
+                ign_index = (iou_map < self.fg_thresh) * (
+                    iou_map > self.bg_thresh)
+                tconf[fg_index] = 1
+                tconf[bg_index] = 0
+                tconf[ign_index] = -1
+
+                gt_index = gt_index_map[fg_index]
+                gt_box_list = tboxes[gt_index]
+                gt_id_list = gt_ide[gt_index_map[id_index]]
+
+                if np.sum(fg_index) > 0:
+                    tid[id_index] = gt_id_list
+
+                    fg_anchor_list = anchor_list.reshape(nA, nGh, nGw,
+                                                         4)[fg_index]
+                    delta_target = self.encode_delta(gt_box_list,
+                                                     fg_anchor_list)
+                    tbox[fg_index] = delta_target
+
+                sample['tbox{}'.format(i)] = tbox
+                sample['tconf{}'.format(i)] = tconf
+                sample['tide{}'.format(i)] = tid
+            sample.pop('gt_class')
+            sample = self.pad_box(sample, num_max)
+        return samples
+
+
+@register_op
+class Gt2JDETargetMax(BaseOperator):
+    __shared__ = ['num_classes']
+    """
+    Generate JDE targets by groud truth data when evaluating
+    Args:
+        anchors (list): anchors of JDE model
+        anchor_masks (list): anchor_masks of JDE model
+        downsample_ratios (list): downsample ratios of JDE model
+        max_iou_thresh (float): iou thresh for high quality anchor
+        num_classes (int): number of classes
+    """
+
+    def __init__(self,
+                 anchors,
+                 anchor_masks,
+                 downsample_ratios,
+                 max_iou_thresh=0.60,
+                 num_classes=1):
+        super(Gt2JDETargetMax, self).__init__()
+        self.anchors = anchors
+        self.anchor_masks = anchor_masks
+        self.downsample_ratios = downsample_ratios
+        self.max_iou_thresh = max_iou_thresh
+        self.num_classes = num_classes
+
+    def __call__(self, samples, context=None):
+        assert len(self.anchor_masks) == len(self.downsample_ratios), \
+            "anchor_masks', and 'downsample_ratios' should have same length."
+        h, w = samples[0]['image'].shape[1:3]
+        for sample in samples:
+            gt_bbox = sample['gt_bbox']
+            gt_ide = sample['gt_ide']
+            for i, (anchor_hw, downsample_ratio
+                    ) in enumerate(zip(self.anchors, self.downsample_ratios)):
+                anchor_hw = np.array(
+                    anchor_hw, dtype=np.float32) / downsample_ratio
+                nA = len(anchor_hw)
+                nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio)
+                tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32)
+                tconf = np.zeros((nA, nGh, nGw), dtype=np.float32)
+                tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32)
+
+                gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy()
+                gxy[:, 0] = gxy[:, 0] * nGw
+                gxy[:, 1] = gxy[:, 1] * nGh
+                gwh[:, 0] = gwh[:, 0] * nGw
+                gwh[:, 1] = gwh[:, 1] * nGh
+                gi = np.clip(gxy[:, 0], 0, nGw - 1).astype(int)
+                gj = np.clip(gxy[:, 1], 0, nGh - 1).astype(int)
+
+                # iou of targets-anchors (using wh only)
+                box1 = gwh
+                box2 = anchor_hw[:, None, :]
+                inter_area = np.minimum(box1, box2).prod(2)
+                iou = inter_area / (
+                    box1.prod(1) + box2.prod(2) - inter_area + 1e-16)
+
+                # Select best iou_pred and anchor
+                iou_best = iou.max(0)  # best anchor [0-2] for each target
+                a = np.argmax(iou, axis=0)
+
+                # Select best unique target-anchor combinations
+                iou_order = np.argsort(-iou_best)  # best to worst
+
+                # Unique anchor selection
+                u = np.stack((gi, gj, a), 0)[:, iou_order]
+                _, first_unique = np.unique(u, axis=1, return_index=True)
+                mask = iou_order[first_unique]
+                # best anchor must share significant commonality (iou) with target
+                # TODO: examine arbitrary threshold
+                idx = mask[iou_best[mask] > self.max_iou_thresh]
+
+                if len(idx) > 0:
+                    a_i, gj_i, gi_i = a[idx], gj[idx], gi[idx]
+                    t_box = gt_bbox[idx]
+                    t_id = gt_ide[idx]
+                    if len(t_box.shape) == 1:
+                        t_box = t_box.reshape(1, 4)
+
+                    gxy, gwh = t_box[:, 0:2].copy(), t_box[:, 2:4].copy()
+                    gxy[:, 0] = gxy[:, 0] * nGw
+                    gxy[:, 1] = gxy[:, 1] * nGh
+                    gwh[:, 0] = gwh[:, 0] * nGw
+                    gwh[:, 1] = gwh[:, 1] * nGh
+
+                    # XY coordinates
+                    tbox[:, :, :, 0:2][a_i, gj_i, gi_i] = gxy - gxy.astype(int)
+                    # Width and height in yolo method
+                    tbox[:, :, :, 2:4][a_i, gj_i, gi_i] = np.log(gwh /
+                                                                 anchor_hw[a_i])
+                    tconf[a_i, gj_i, gi_i] = 1
+                    tid[a_i, gj_i, gi_i] = t_id
+
+                sample['tbox{}'.format(i)] = tbox
+                sample['tconf{}'.format(i)] = tconf
+                sample['tide{}'.format(i)] = tid
+
+
+class Gt2FairMOTTarget(Gt2TTFTarget):
+    __shared__ = ['num_classes']
+    """
+    Generate FairMOT targets by ground truth data.
+    Difference between Gt2FairMOTTarget and Gt2TTFTarget are:
+        1. the gaussian kernal radius to generate a heatmap.
+        2. the targets needed during training.
+    
+    Args:
+        num_classes(int): the number of classes.
+        down_ratio(int): the down ratio from images to heatmap, 4 by default.
+        max_objs(int): the maximum number of ground truth objects in a image, 500 by default.
+    """
+
+    def __init__(self, num_classes=1, down_ratio=4, max_objs=500):
+        super(Gt2TTFTarget, self).__init__()
+        self.down_ratio = down_ratio
+        self.num_classes = num_classes
+        self.max_objs = max_objs
+
+    def __call__(self, samples, context=None):
+        for b_id, sample in enumerate(samples):
+            output_h = sample['image'].shape[1] // self.down_ratio
+            output_w = sample['image'].shape[2] // self.down_ratio
+
+            heatmap = np.zeros(
+                (self.num_classes, output_h, output_w), dtype='float32')
+            bbox_size = np.zeros((self.max_objs, 4), dtype=np.float32)
+            center_offset = np.zeros((self.max_objs, 2), dtype=np.float32)
+            index = np.zeros((self.max_objs, ), dtype=np.int64)
+            index_mask = np.zeros((self.max_objs, ), dtype=np.int32)
+            reid = np.zeros((self.max_objs, ), dtype=np.int64)
+            bbox_xys = np.zeros((self.max_objs, 4), dtype=np.float32)
+            if self.num_classes > 1:
+                # each category corresponds to a set of track ids
+                cls_tr_ids = np.zeros(
+                    (self.num_classes, output_h, output_w), dtype=np.int64)
+                cls_id_map = np.full((output_h, output_w), -1, dtype=np.int64)
+
+            gt_bbox = sample['gt_bbox']
+            gt_class = sample['gt_class']
+            gt_ide = sample['gt_ide']
+
+            for k in range(len(gt_bbox)):
+                cls_id = gt_class[k][0]
+                bbox = gt_bbox[k]
+                ide = gt_ide[k][0]
+                bbox[[0, 2]] = bbox[[0, 2]] * output_w
+                bbox[[1, 3]] = bbox[[1, 3]] * output_h
+                bbox_amodal = copy.deepcopy(bbox)
+                bbox_amodal[0] = bbox_amodal[0] - bbox_amodal[2] / 2.
+                bbox_amodal[1] = bbox_amodal[1] - bbox_amodal[3] / 2.
+                bbox_amodal[2] = bbox_amodal[0] + bbox_amodal[2]
+                bbox_amodal[3] = bbox_amodal[1] + bbox_amodal[3]
+                bbox[0] = np.clip(bbox[0], 0, output_w - 1)
+                bbox[1] = np.clip(bbox[1], 0, output_h - 1)
+                h = bbox[3]
+                w = bbox[2]
+
+                bbox_xy = copy.deepcopy(bbox)
+                bbox_xy[0] = bbox_xy[0] - bbox_xy[2] / 2
+                bbox_xy[1] = bbox_xy[1] - bbox_xy[3] / 2
+                bbox_xy[2] = bbox_xy[0] + bbox_xy[2]
+                bbox_xy[3] = bbox_xy[1] + bbox_xy[3]
+
+                if h > 0 and w > 0:
+                    radius = gaussian_radius((math.ceil(h), math.ceil(w)), 0.7)
+                    radius = max(0, int(radius))
+                    ct = np.array([bbox[0], bbox[1]], dtype=np.float32)
+                    ct_int = ct.astype(np.int32)
+                    self.draw_truncate_gaussian(heatmap[cls_id], ct_int, radius,
+                                                radius)
+                    bbox_size[k] = ct[0] - bbox_amodal[0], ct[1] - bbox_amodal[1], \
+                            bbox_amodal[2] - ct[0], bbox_amodal[3] - ct[1]
+
+                    index[k] = ct_int[1] * output_w + ct_int[0]
+                    center_offset[k] = ct - ct_int
+                    index_mask[k] = 1
+                    reid[k] = ide
+                    bbox_xys[k] = bbox_xy
+                    if self.num_classes > 1:
+                        cls_id_map[ct_int[1], ct_int[0]] = cls_id
+                        cls_tr_ids[cls_id][ct_int[1]][ct_int[0]] = ide - 1
+                        # track id start from 0
+
+            sample['heatmap'] = heatmap
+            sample['index'] = index
+            sample['offset'] = center_offset
+            sample['size'] = bbox_size
+            sample['index_mask'] = index_mask
+            sample['reid'] = reid
+            if self.num_classes > 1:
+                sample['cls_id_map'] = cls_id_map
+                sample['cls_tr_ids'] = cls_tr_ids
+            sample['bbox_xys'] = bbox_xys
+            sample.pop('is_crowd', None)
+            sample.pop('difficult', None)
+            sample.pop('gt_class', None)
+            sample.pop('gt_bbox', None)
+            sample.pop('gt_score', None)
+            sample.pop('gt_ide', None)
+        return samples
--- a/paddle_detection/ppdet/data/transform/op_helper.py
+++ b/paddle_detection/ppdet/data/transform/op_helper.py
@@ -0,0 +1,494 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# this file contains helper methods for BBOX processing
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import random
+import math
+import cv2
+
+
+def meet_emit_constraint(src_bbox, sample_bbox):
+    center_x = (src_bbox[2] + src_bbox[0]) / 2
+    center_y = (src_bbox[3] + src_bbox[1]) / 2
+    if center_x >= sample_bbox[0] and \
+            center_x <= sample_bbox[2] and \
+            center_y >= sample_bbox[1] and \
+            center_y <= sample_bbox[3]:
+        return True
+    return False
+
+
+def clip_bbox(src_bbox):
+    src_bbox[0] = max(min(src_bbox[0], 1.0), 0.0)
+    src_bbox[1] = max(min(src_bbox[1], 1.0), 0.0)
+    src_bbox[2] = max(min(src_bbox[2], 1.0), 0.0)
+    src_bbox[3] = max(min(src_bbox[3], 1.0), 0.0)
+    return src_bbox
+
+
+def bbox_area(src_bbox):
+    if src_bbox[2] < src_bbox[0] or src_bbox[3] < src_bbox[1]:
+        return 0.
+    else:
+        width = src_bbox[2] - src_bbox[0]
+        height = src_bbox[3] - src_bbox[1]
+        return width * height
+
+
+def is_overlap(object_bbox, sample_bbox):
+    if object_bbox[0] >= sample_bbox[2] or \
+       object_bbox[2] <= sample_bbox[0] or \
+       object_bbox[1] >= sample_bbox[3] or \
+       object_bbox[3] <= sample_bbox[1]:
+        return False
+    else:
+        return True
+
+
+def filter_and_process(sample_bbox, bboxes, labels, scores=None,
+                       keypoints=None):
+    new_bboxes = []
+    new_labels = []
+    new_scores = []
+    new_keypoints = []
+    new_kp_ignore = []
+    for i in range(len(bboxes)):
+        new_bbox = [0, 0, 0, 0]
+        obj_bbox = [bboxes[i][0], bboxes[i][1], bboxes[i][2], bboxes[i][3]]
+        if not meet_emit_constraint(obj_bbox, sample_bbox):
+            continue
+        if not is_overlap(obj_bbox, sample_bbox):
+            continue
+        sample_width = sample_bbox[2] - sample_bbox[0]
+        sample_height = sample_bbox[3] - sample_bbox[1]
+        new_bbox[0] = (obj_bbox[0] - sample_bbox[0]) / sample_width
+        new_bbox[1] = (obj_bbox[1] - sample_bbox[1]) / sample_height
+        new_bbox[2] = (obj_bbox[2] - sample_bbox[0]) / sample_width
+        new_bbox[3] = (obj_bbox[3] - sample_bbox[1]) / sample_height
+        new_bbox = clip_bbox(new_bbox)
+        if bbox_area(new_bbox) > 0:
+            new_bboxes.append(new_bbox)
+            new_labels.append([labels[i][0]])
+            if scores is not None:
+                new_scores.append([scores[i][0]])
+            if keypoints is not None:
+                sample_keypoint = keypoints[0][i]
+                for j in range(len(sample_keypoint)):
+                    kp_len = sample_height if j % 2 else sample_width
+                    sample_coord = sample_bbox[1] if j % 2 else sample_bbox[0]
+                    sample_keypoint[j] = (
+                        sample_keypoint[j] - sample_coord) / kp_len
+                    sample_keypoint[j] = max(min(sample_keypoint[j], 1.0), 0.0)
+                new_keypoints.append(sample_keypoint)
+                new_kp_ignore.append(keypoints[1][i])
+
+    bboxes = np.array(new_bboxes)
+    labels = np.array(new_labels)
+    scores = np.array(new_scores)
+    if keypoints is not None:
+        keypoints = np.array(new_keypoints)
+        new_kp_ignore = np.array(new_kp_ignore)
+        return bboxes, labels, scores, (keypoints, new_kp_ignore)
+    return bboxes, labels, scores
+
+
+def bbox_area_sampling(bboxes, labels, scores, target_size, min_size):
+    new_bboxes = []
+    new_labels = []
+    new_scores = []
+    for i, bbox in enumerate(bboxes):
+        w = float((bbox[2] - bbox[0]) * target_size)
+        h = float((bbox[3] - bbox[1]) * target_size)
+        if w * h < float(min_size * min_size):
+            continue
+        else:
+            new_bboxes.append(bbox)
+            new_labels.append(labels[i])
+            if scores is not None and scores.size != 0:
+                new_scores.append(scores[i])
+    bboxes = np.array(new_bboxes)
+    labels = np.array(new_labels)
+    scores = np.array(new_scores)
+    return bboxes, labels, scores
+
+
+def generate_sample_bbox(sampler):
+    scale = np.random.uniform(sampler[2], sampler[3])
+    aspect_ratio = np.random.uniform(sampler[4], sampler[5])
+    aspect_ratio = max(aspect_ratio, (scale**2.0))
+    aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))
+    bbox_width = scale * (aspect_ratio**0.5)
+    bbox_height = scale / (aspect_ratio**0.5)
+    xmin_bound = 1 - bbox_width
+    ymin_bound = 1 - bbox_height
+    xmin = np.random.uniform(0, xmin_bound)
+    ymin = np.random.uniform(0, ymin_bound)
+    xmax = xmin + bbox_width
+    ymax = ymin + bbox_height
+    sampled_bbox = [xmin, ymin, xmax, ymax]
+    return sampled_bbox
+
+
+def generate_sample_bbox_square(sampler, image_width, image_height):
+    scale = np.random.uniform(sampler[2], sampler[3])
+    aspect_ratio = np.random.uniform(sampler[4], sampler[5])
+    aspect_ratio = max(aspect_ratio, (scale**2.0))
+    aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))
+    bbox_width = scale * (aspect_ratio**0.5)
+    bbox_height = scale / (aspect_ratio**0.5)
+    if image_height < image_width:
+        bbox_width = bbox_height * image_height / image_width
+    else:
+        bbox_height = bbox_width * image_width / image_height
+    xmin_bound = 1 - bbox_width
+    ymin_bound = 1 - bbox_height
+    xmin = np.random.uniform(0, xmin_bound)
+    ymin = np.random.uniform(0, ymin_bound)
+    xmax = xmin + bbox_width
+    ymax = ymin + bbox_height
+    sampled_bbox = [xmin, ymin, xmax, ymax]
+    return sampled_bbox
+
+
+def data_anchor_sampling(bbox_labels, image_width, image_height, scale_array,
+                         resize_width):
+    num_gt = len(bbox_labels)
+    # np.random.randint range: [low, high)
+    rand_idx = np.random.randint(0, num_gt) if num_gt != 0 else 0
+
+    if num_gt != 0:
+        norm_xmin = bbox_labels[rand_idx][0]
+        norm_ymin = bbox_labels[rand_idx][1]
+        norm_xmax = bbox_labels[rand_idx][2]
+        norm_ymax = bbox_labels[rand_idx][3]
+
+        xmin = norm_xmin * image_width
+        ymin = norm_ymin * image_height
+        wid = image_width * (norm_xmax - norm_xmin)
+        hei = image_height * (norm_ymax - norm_ymin)
+        range_size = 0
+
+        area = wid * hei
+        for scale_ind in range(0, len(scale_array) - 1):
+            if area > scale_array[scale_ind] ** 2 and area < \
+                    scale_array[scale_ind + 1] ** 2:
+                range_size = scale_ind + 1
+                break
+
+        if area > scale_array[len(scale_array) - 2]**2:
+            range_size = len(scale_array) - 2
+
+        scale_choose = 0.0
+        if range_size == 0:
+            rand_idx_size = 0
+        else:
+            # np.random.randint range: [low, high)
+            rng_rand_size = np.random.randint(0, range_size + 1)
+            rand_idx_size = rng_rand_size % (range_size + 1)
+
+        if rand_idx_size == range_size:
+            min_resize_val = scale_array[rand_idx_size] / 2.0
+            max_resize_val = min(2.0 * scale_array[rand_idx_size],
+                                 2 * math.sqrt(wid * hei))
+            scale_choose = random.uniform(min_resize_val, max_resize_val)
+        else:
+            min_resize_val = scale_array[rand_idx_size] / 2.0
+            max_resize_val = 2.0 * scale_array[rand_idx_size]
+            scale_choose = random.uniform(min_resize_val, max_resize_val)
+
+        sample_bbox_size = wid * resize_width / scale_choose
+
+        w_off_orig = 0.0
+        h_off_orig = 0.0
+        if sample_bbox_size < max(image_height, image_width):
+            if wid <= sample_bbox_size:
+                w_off_orig = np.random.uniform(xmin + wid - sample_bbox_size,
+                                               xmin)
+            else:
+                w_off_orig = np.random.uniform(xmin,
+                                               xmin + wid - sample_bbox_size)
+
+            if hei <= sample_bbox_size:
+                h_off_orig = np.random.uniform(ymin + hei - sample_bbox_size,
+                                               ymin)
+            else:
+                h_off_orig = np.random.uniform(ymin,
+                                               ymin + hei - sample_bbox_size)
+
+        else:
+            w_off_orig = np.random.uniform(image_width - sample_bbox_size, 0.0)
+            h_off_orig = np.random.uniform(image_height - sample_bbox_size, 0.0)
+
+        w_off_orig = math.floor(w_off_orig)
+        h_off_orig = math.floor(h_off_orig)
+
+        # Figure out top left coordinates.
+        w_off = float(w_off_orig / image_width)
+        h_off = float(h_off_orig / image_height)
+
+        sampled_bbox = [
+            w_off, h_off, w_off + float(sample_bbox_size / image_width),
+            h_off + float(sample_bbox_size / image_height)
+        ]
+        return sampled_bbox
+    else:
+        return 0
+
+
+def jaccard_overlap(sample_bbox, object_bbox):
+    if sample_bbox[0] >= object_bbox[2] or \
+        sample_bbox[2] <= object_bbox[0] or \
+        sample_bbox[1] >= object_bbox[3] or \
+        sample_bbox[3] <= object_bbox[1]:
+        return 0
+    intersect_xmin = max(sample_bbox[0], object_bbox[0])
+    intersect_ymin = max(sample_bbox[1], object_bbox[1])
+    intersect_xmax = min(sample_bbox[2], object_bbox[2])
+    intersect_ymax = min(sample_bbox[3], object_bbox[3])
+    intersect_size = (intersect_xmax - intersect_xmin) * (
+        intersect_ymax - intersect_ymin)
+    sample_bbox_size = bbox_area(sample_bbox)
+    object_bbox_size = bbox_area(object_bbox)
+    overlap = intersect_size / (
+        sample_bbox_size + object_bbox_size - intersect_size)
+    return overlap
+
+
+def intersect_bbox(bbox1, bbox2):
+    if bbox2[0] > bbox1[2] or bbox2[2] < bbox1[0] or \
+        bbox2[1] > bbox1[3] or bbox2[3] < bbox1[1]:
+        intersection_box = [0.0, 0.0, 0.0, 0.0]
+    else:
+        intersection_box = [
+            max(bbox1[0], bbox2[0]), max(bbox1[1], bbox2[1]),
+            min(bbox1[2], bbox2[2]), min(bbox1[3], bbox2[3])
+        ]
+    return intersection_box
+
+
+def bbox_coverage(bbox1, bbox2):
+    inter_box = intersect_bbox(bbox1, bbox2)
+    intersect_size = bbox_area(inter_box)
+
+    if intersect_size > 0:
+        bbox1_size = bbox_area(bbox1)
+        return intersect_size / bbox1_size
+    else:
+        return 0.
+
+
+def satisfy_sample_constraint(sampler,
+                              sample_bbox,
+                              gt_bboxes,
+                              satisfy_all=False):
+    if sampler[6] == 0 and sampler[7] == 0:
+        return True
+    satisfied = []
+    for i in range(len(gt_bboxes)):
+        object_bbox = [
+            gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3]
+        ]
+        overlap = jaccard_overlap(sample_bbox, object_bbox)
+        if sampler[6] != 0 and \
+                overlap < sampler[6]:
+            satisfied.append(False)
+            continue
+        if sampler[7] != 0 and \
+                overlap > sampler[7]:
+            satisfied.append(False)
+            continue
+        satisfied.append(True)
+        if not satisfy_all:
+            return True
+
+    if satisfy_all:
+        return np.all(satisfied)
+    else:
+        return False
+
+
+def satisfy_sample_constraint_coverage(sampler, sample_bbox, gt_bboxes):
+    if sampler[6] == 0 and sampler[7] == 0:
+        has_jaccard_overlap = False
+    else:
+        has_jaccard_overlap = True
+    if sampler[8] == 0 and sampler[9] == 0:
+        has_object_coverage = False
+    else:
+        has_object_coverage = True
+
+    if not has_jaccard_overlap and not has_object_coverage:
+        return True
+    found = False
+    for i in range(len(gt_bboxes)):
+        object_bbox = [
+            gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3]
+        ]
+        if has_jaccard_overlap:
+            overlap = jaccard_overlap(sample_bbox, object_bbox)
+            if sampler[6] != 0 and \
+                    overlap < sampler[6]:
+                continue
+            if sampler[7] != 0 and \
+                    overlap > sampler[7]:
+                continue
+            found = True
+        if has_object_coverage:
+            object_coverage = bbox_coverage(object_bbox, sample_bbox)
+            if sampler[8] != 0 and \
+                    object_coverage < sampler[8]:
+                continue
+            if sampler[9] != 0 and \
+                    object_coverage > sampler[9]:
+                continue
+            found = True
+        if found:
+            return True
+    return found
+
+
+def crop_image_sampling(img, sample_bbox, image_width, image_height,
+                        target_size):
+    # no clipping here
+    xmin = int(sample_bbox[0] * image_width)
+    xmax = int(sample_bbox[2] * image_width)
+    ymin = int(sample_bbox[1] * image_height)
+    ymax = int(sample_bbox[3] * image_height)
+
+    w_off = xmin
+    h_off = ymin
+    width = xmax - xmin
+    height = ymax - ymin
+    cross_xmin = max(0.0, float(w_off))
+    cross_ymin = max(0.0, float(h_off))
+    cross_xmax = min(float(w_off + width - 1.0), float(image_width))
+    cross_ymax = min(float(h_off + height - 1.0), float(image_height))
+    cross_width = cross_xmax - cross_xmin
+    cross_height = cross_ymax - cross_ymin
+
+    roi_xmin = 0 if w_off >= 0 else abs(w_off)
+    roi_ymin = 0 if h_off >= 0 else abs(h_off)
+    roi_width = cross_width
+    roi_height = cross_height
+
+    roi_y1 = int(roi_ymin)
+    roi_y2 = int(roi_ymin + roi_height)
+    roi_x1 = int(roi_xmin)
+    roi_x2 = int(roi_xmin + roi_width)
+
+    cross_y1 = int(cross_ymin)
+    cross_y2 = int(cross_ymin + cross_height)
+    cross_x1 = int(cross_xmin)
+    cross_x2 = int(cross_xmin + cross_width)
+
+    sample_img = np.zeros((height, width, 3))
+    sample_img[roi_y1: roi_y2, roi_x1: roi_x2] = \
+        img[cross_y1: cross_y2, cross_x1: cross_x2]
+
+    sample_img = cv2.resize(
+        sample_img, (target_size, target_size), interpolation=cv2.INTER_AREA)
+
+    return sample_img
+
+
+def is_poly(segm):
+    assert isinstance(segm, (list, dict)), \
+        "Invalid segm type: {}".format(type(segm))
+    return isinstance(segm, list)
+
+
+def gaussian_radius(bbox_size, min_overlap):
+    height, width = bbox_size
+
+    a1 = 1
+    b1 = (height + width)
+    c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
+    sq1 = np.sqrt(b1**2 - 4 * a1 * c1)
+    radius1 = (b1 + sq1) / (2 * a1)
+
+    a2 = 4
+    b2 = 2 * (height + width)
+    c2 = (1 - min_overlap) * width * height
+    sq2 = np.sqrt(b2**2 - 4 * a2 * c2)
+    radius2 = (b2 + sq2) / 2
+
+    a3 = 4 * min_overlap
+    b3 = -2 * min_overlap * (height + width)
+    c3 = (min_overlap - 1) * width * height
+    sq3 = np.sqrt(b3**2 - 4 * a3 * c3)
+    radius3 = (b3 + sq3) / 2
+    return min(radius1, radius2, radius3)
+
+
+def draw_gaussian(heatmap, center, radius, k=1, delte=6):
+    diameter = 2 * radius + 1
+    sigma = diameter / delte
+    gaussian = gaussian2D((diameter, diameter), sigma_x=sigma, sigma_y=sigma)
+
+    x, y = center
+
+    height, width = heatmap.shape[0:2]
+
+    left, right = min(x, radius), min(width - x, radius + 1)
+    top, bottom = min(y, radius), min(height - y, radius + 1)
+
+    masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
+    masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:
+                               radius + right]
+    np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
+
+
+def gaussian2D(shape, sigma_x=1, sigma_y=1):
+    m, n = [(ss - 1.) / 2. for ss in shape]
+    y, x = np.ogrid[-m:m + 1, -n:n + 1]
+
+    h = np.exp(-(x * x / (2 * sigma_x * sigma_x) + y * y / (2 * sigma_y *
+                                                            sigma_y)))
+    h[h < np.finfo(h.dtype).eps * h.max()] = 0
+    return h
+
+
+def draw_umich_gaussian(heatmap, center, radius, k=1):
+    """
+    draw_umich_gaussian, refer to https://github.com/xingyizhou/CenterNet/blob/master/src/lib/utils/image.py#L126
+    """
+    diameter = 2 * radius + 1
+    gaussian = gaussian2D(
+        (diameter, diameter), sigma_x=diameter / 6, sigma_y=diameter / 6)
+
+    x, y = int(center[0]), int(center[1])
+
+    height, width = heatmap.shape[0:2]
+
+    left, right = min(x, radius), min(width - x, radius + 1)
+    top, bottom = min(y, radius), min(height - y, radius + 1)
+
+    masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
+    masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:
+                               radius + right]
+    if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:
+        np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
+    return heatmap
+
+
+def get_border(border, size):
+    i = 1
+    while size - border // i <= border // i:
+        i *= 2
+    return border // i
--- a/paddle_detection/ppdet/data/transform/operators.py
+++ b/paddle_detection/ppdet/data/transform/operators.py
--- a/paddle_detection/ppdet/data/transform/rotated_operators.py
+++ b/paddle_detection/ppdet/data/transform/rotated_operators.py
@@ -0,0 +1,480 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+try:
+    from collections.abc import Sequence
+except Exception:
+    from collections import Sequence
+
+from numbers import Number, Integral
+
+import cv2
+import numpy as np
+import math
+import copy
+
+from .operators import register_op, BaseOperator
+from ppdet.modeling.rbox_utils import poly2rbox_le135_np, poly2rbox_oc_np, rbox2poly_np
+from ppdet.utils.logger import setup_logger
+from ppdet.utils.compact import imagedraw_textsize_c
+logger = setup_logger(__name__)
+
+
+@register_op
+class RRotate(BaseOperator):
+    """ Rotate Image, Polygon, Box
+
+    Args:
+        scale (float): rotate scale
+        angle (float): rotate angle
+        fill_value (int, tuple): fill color
+        auto_bound (bool): whether auto bound or not
+    """
+
+    def __init__(self, scale=1.0, angle=0., fill_value=0., auto_bound=True):
+        super(RRotate, self).__init__()
+        self.scale = scale
+        self.angle = angle
+        self.fill_value = fill_value
+        self.auto_bound = auto_bound
+
+    def get_rotated_matrix(self, angle, scale, h, w):
+        center = ((w - 1) * 0.5, (h - 1) * 0.5)
+        matrix = cv2.getRotationMatrix2D(center, -angle, scale)
+        # calculate the new size
+        cos = np.abs(matrix[0, 0])
+        sin = np.abs(matrix[0, 1])
+        new_w = h * sin + w * cos
+        new_h = h * cos + w * sin
+        # calculate offset
+        n_w = int(np.round(new_w))
+        n_h = int(np.round(new_h))
+        if self.auto_bound:
+            ratio = min(w / n_w, h / n_h)
+            matrix = cv2.getRotationMatrix2D(center, -angle, ratio)
+        else:
+            matrix[0, 2] += (new_w - w) * 0.5
+            matrix[1, 2] += (new_h - h) * 0.5
+            w = n_w
+            h = n_h
+        return matrix, h, w
+
+    def get_rect_from_pts(self, pts, h, w):
+        """ get minimum rectangle of points
+        """
+        assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
+        min_x, min_y = np.min(pts[:, 0::2], axis=1), np.min(pts[:, 1::2],
+                                                            axis=1)
+        max_x, max_y = np.max(pts[:, 0::2], axis=1), np.max(pts[:, 1::2],
+                                                            axis=1)
+        min_x, min_y = np.clip(min_x, 0, w), np.clip(min_y, 0, h)
+        max_x, max_y = np.clip(max_x, 0, w), np.clip(max_y, 0, h)
+        boxes = np.stack([min_x, min_y, max_x, max_y], axis=-1)
+        return boxes
+
+    def apply_image(self, image, matrix, h, w):
+        return cv2.warpAffine(
+            image, matrix, (w, h), borderValue=self.fill_value)
+
+    def apply_pts(self, pts, matrix, h, w):
+        assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
+        # n is number of samples and m is two times the number of points due to (x, y)
+        _, m = pts.shape
+        # transpose points
+        pts_ = pts.reshape(-1, 2).T
+        # pad 1 to convert the points to homogeneous coordinates
+        padding = np.ones((1, pts_.shape[1]), pts.dtype)
+        rotated_pts = np.matmul(matrix, np.concatenate((pts_, padding), axis=0))
+        return rotated_pts[:2, :].T.reshape(-1, m)
+
+    def apply(self, sample, context=None):
+        image = sample['image']
+        h, w = image.shape[:2]
+        matrix, h, w = self.get_rotated_matrix(self.angle, self.scale, h, w)
+        sample['image'] = self.apply_image(image, matrix, h, w)
+        polys = sample['gt_poly']
+        # TODO: segment or keypoint to be processed 
+        if len(polys) > 0:
+            pts = self.apply_pts(polys, matrix, h, w)
+            sample['gt_poly'] = pts
+            sample['gt_bbox'] = self.get_rect_from_pts(pts, h, w)
+
+        return sample
+
+
+@register_op
+class RandomRRotate(BaseOperator):
+    """ Random Rotate Image
+    Args:
+        scale (float, tuple, list): rotate scale
+        scale_mode (str): mode of scale, [range, value, None]
+        angle (float, tuple, list): rotate angle
+        angle_mode (str): mode of angle, [range, value, None]
+        fill_value (float, tuple, list): fill value
+        rotate_prob (float): probability of rotation
+        auto_bound (bool): whether auto bound or not
+    """
+
+    def __init__(self,
+                 scale=1.0,
+                 scale_mode=None,
+                 angle=0.,
+                 angle_mode=None,
+                 fill_value=0.,
+                 rotate_prob=1.0,
+                 auto_bound=True):
+        super(RandomRRotate, self).__init__()
+        self.scale = scale
+        self.scale_mode = scale_mode
+        self.angle = angle
+        self.angle_mode = angle_mode
+        self.fill_value = fill_value
+        self.rotate_prob = rotate_prob
+        self.auto_bound = auto_bound
+
+    def get_angle(self, angle, angle_mode):
+        assert not angle_mode or angle_mode in [
+            'range', 'value'
+        ], 'angle mode should be in [range, value, None]'
+        if not angle_mode:
+            return angle
+        elif angle_mode == 'range':
+            low, high = angle
+            return np.random.rand() * (high - low) + low
+        elif angle_mode == 'value':
+            return np.random.choice(angle)
+
+    def get_scale(self, scale, scale_mode):
+        assert not scale_mode or scale_mode in [
+            'range', 'value'
+        ], 'scale mode should be in [range, value, None]'
+        if not scale_mode:
+            return scale
+        elif scale_mode == 'range':
+            low, high = scale
+            return np.random.rand() * (high - low) + low
+        elif scale_mode == 'value':
+            return np.random.choice(scale)
+
+    def apply(self, sample, context=None):
+        if np.random.rand() > self.rotate_prob:
+            return sample
+
+        angle = self.get_angle(self.angle, self.angle_mode)
+        scale = self.get_scale(self.scale, self.scale_mode)
+        rotator = RRotate(scale, angle, self.fill_value, self.auto_bound)
+        return rotator(sample)
+
+
+@register_op
+class Poly2RBox(BaseOperator):
+    """ Polygon to Rotated Box, using new OpenCV definition since 4.5.1
+
+    Args:
+        filter_threshold (int, float): threshold to filter annotations
+        filter_mode (str): filter mode, ['area', 'edge']
+        rbox_type (str): rbox type, ['le135', 'oc']
+
+    """
+
+    def __init__(self, filter_threshold=4, filter_mode=None, rbox_type='le135'):
+        super(Poly2RBox, self).__init__()
+        self.filter_fn = lambda size: self.filter(size, filter_threshold, filter_mode)
+        self.rbox_fn = poly2rbox_le135_np if rbox_type == 'le135' else poly2rbox_oc_np
+
+    def filter(self, size, threshold, mode):
+        if mode == 'area':
+            if size[0] * size[1] < threshold:
+                return True
+        elif mode == 'edge':
+            if min(size) < threshold:
+                return True
+        return False
+
+    def get_rbox(self, polys):
+        valid_ids, rboxes, bboxes = [], [], []
+        for i, poly in enumerate(polys):
+            cx, cy, w, h, angle = self.rbox_fn(poly)
+            if self.filter_fn((w, h)):
+                continue
+            rboxes.append(np.array([cx, cy, w, h, angle], dtype=np.float32))
+            valid_ids.append(i)
+            xmin, ymin = min(poly[0::2]), min(poly[1::2])
+            xmax, ymax = max(poly[0::2]), max(poly[1::2])
+            bboxes.append(np.array([xmin, ymin, xmax, ymax], dtype=np.float32))
+
+        if len(valid_ids) == 0:
+            rboxes = np.zeros((0, 5), dtype=np.float32)
+            bboxes = np.zeros((0, 4), dtype=np.float32)
+        else:
+            rboxes = np.stack(rboxes)
+            bboxes = np.stack(bboxes)
+
+        return rboxes, bboxes, valid_ids
+
+    def apply(self, sample, context=None):
+        rboxes, bboxes, valid_ids = self.get_rbox(sample['gt_poly'])
+        sample['gt_rbox'] = rboxes
+        sample['gt_bbox'] = bboxes
+        for k in ['gt_class', 'gt_score', 'gt_poly', 'is_crowd', 'difficult']:
+            if k in sample:
+                sample[k] = sample[k][valid_ids]
+
+        return sample
+
+
+@register_op
+class Poly2Array(BaseOperator):
+    """ convert gt_poly to np.array for rotated bboxes
+    """
+
+    def __init__(self):
+        super(Poly2Array, self).__init__()
+
+    def apply(self, sample, context=None):
+        if 'gt_poly' in sample:
+            sample['gt_poly'] = np.array(
+                sample['gt_poly'], dtype=np.float32).reshape((-1, 8))
+
+        return sample
+
+
+@register_op
+class RResize(BaseOperator):
+    def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
+        """
+        Resize image to target size. if keep_ratio is True, 
+        resize the image's long side to the maximum of target_size
+        if keep_ratio is False, resize the image to target size(h, w)
+        Args:
+            target_size (int|list): image target size
+            keep_ratio (bool): whether keep_ratio or not, default true
+            interp (int): the interpolation method
+        """
+        super(RResize, self).__init__()
+        self.keep_ratio = keep_ratio
+        self.interp = interp
+        if not isinstance(target_size, (Integral, Sequence)):
+            raise TypeError(
+                "Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
+                format(type(target_size)))
+        if isinstance(target_size, Integral):
+            target_size = [target_size, target_size]
+        self.target_size = target_size
+
+    def apply_image(self, image, scale):
+        im_scale_x, im_scale_y = scale
+
+        return cv2.resize(
+            image,
+            None,
+            None,
+            fx=im_scale_x,
+            fy=im_scale_y,
+            interpolation=self.interp)
+
+    def apply_pts(self, pts, scale, size):
+        im_scale_x, im_scale_y = scale
+        resize_w, resize_h = size
+        pts[:, 0::2] *= im_scale_x
+        pts[:, 1::2] *= im_scale_y
+        pts[:, 0::2] = np.clip(pts[:, 0::2], 0, resize_w)
+        pts[:, 1::2] = np.clip(pts[:, 1::2], 0, resize_h)
+        return pts
+
+    def apply(self, sample, context=None):
+        """ Resize the image numpy.
+        """
+        im = sample['image']
+        if not isinstance(im, np.ndarray):
+            raise TypeError("{}: image type is not numpy.".format(self))
+        if len(im.shape) != 3:
+            raise ImageError('{}: image is not 3-dimensional.'.format(self))
+
+        # apply image
+        im_shape = im.shape
+        if self.keep_ratio:
+
+            im_size_min = np.min(im_shape[0:2])
+            im_size_max = np.max(im_shape[0:2])
+
+            target_size_min = np.min(self.target_size)
+            target_size_max = np.max(self.target_size)
+
+            im_scale = min(target_size_min / im_size_min,
+                           target_size_max / im_size_max)
+
+            resize_h = im_scale * float(im_shape[0])
+            resize_w = im_scale * float(im_shape[1])
+
+            im_scale_x = im_scale
+            im_scale_y = im_scale
+        else:
+            resize_h, resize_w = self.target_size
+            im_scale_y = resize_h / im_shape[0]
+            im_scale_x = resize_w / im_shape[1]
+
+        im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
+        sample['image'] = im.astype(np.float32)
+        sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
+        if 'scale_factor' in sample:
+            scale_factor = sample['scale_factor']
+            sample['scale_factor'] = np.asarray(
+                [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
+                dtype=np.float32)
+        else:
+            sample['scale_factor'] = np.asarray(
+                [im_scale_y, im_scale_x], dtype=np.float32)
+
+        # apply bbox
+        if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
+            sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'],
+                                               [im_scale_x, im_scale_y],
+                                               [resize_w, resize_h])
+
+        # apply polygon
+        if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
+            sample['gt_poly'] = self.apply_pts(sample['gt_poly'],
+                                               [im_scale_x, im_scale_y],
+                                               [resize_w, resize_h])
+
+        return sample
+
+
+@register_op
+class RandomRFlip(BaseOperator):
+    def __init__(self, prob=0.5):
+        """
+        Args:
+            prob (float): the probability of flipping image
+        """
+        super(RandomRFlip, self).__init__()
+        self.prob = prob
+        if not (isinstance(self.prob, float)):
+            raise TypeError("{}: input type is invalid.".format(self))
+
+    def apply_image(self, image):
+        return image[:, ::-1, :]
+
+    def apply_pts(self, pts, width):
+        oldx = pts[:, 0::2].copy()
+        pts[:, 0::2] = width - oldx - 1
+        return pts
+
+    def apply(self, sample, context=None):
+        """Filp the image and bounding box.
+        Operators:
+            1. Flip the image numpy.
+            2. Transform the bboxes' x coordinates.
+              (Must judge whether the coordinates are normalized!)
+            3. Transform the segmentations' x coordinates.
+              (Must judge whether the coordinates are normalized!)
+        Output:
+            sample: the image, bounding box and segmentation part
+                    in sample are flipped.
+        """
+        if np.random.uniform(0, 1) < self.prob:
+            im = sample['image']
+            height, width = im.shape[:2]
+            im = self.apply_image(im)
+            if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
+                sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'], width)
+            if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
+                sample['gt_poly'] = self.apply_pts(sample['gt_poly'], width)
+
+            sample['flipped'] = True
+            sample['image'] = im
+        return sample
+
+
+@register_op
+class VisibleRBox(BaseOperator):
+    """
+    In debug mode, visualize images according to `gt_box`.
+    (Currently only supported when not cropping and flipping image.)
+    """
+
+    def __init__(self, output_dir='debug'):
+        super(VisibleRBox, self).__init__()
+        self.output_dir = output_dir
+        if not os.path.isdir(output_dir):
+            os.makedirs(output_dir)
+
+    def apply(self, sample, context=None):
+        image = Image.fromarray(sample['image'].astype(np.uint8))
+        out_file_name = '{:012d}.jpg'.format(sample['im_id'][0])
+        width = sample['w']
+        height = sample['h']
+        # gt_poly = sample['gt_rbox']
+        gt_poly = sample['gt_poly']
+        gt_class = sample['gt_class']
+        draw = ImageDraw.Draw(image)
+        for i in range(gt_poly.shape[0]):
+            x1, y1, x2, y2, x3, y3, x4, y4 = gt_poly[i]
+            draw.line(
+                [(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)],
+                width=2,
+                fill='green')
+            # draw label
+            xmin = min(x1, x2, x3, x4)
+            ymin = min(y1, y2, y3, y4)
+            text = str(gt_class[i][0])
+            tw, th = imagedraw_textsize_c(draw, text)
+            draw.rectangle(
+                [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green')
+            draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
+
+        if 'gt_keypoint' in sample.keys():
+            gt_keypoint = sample['gt_keypoint']
+            if self.is_normalized:
+                for i in range(gt_keypoint.shape[1]):
+                    if i % 2:
+                        gt_keypoint[:, i] = gt_keypoint[:, i] * height
+                    else:
+                        gt_keypoint[:, i] = gt_keypoint[:, i] * width
+            for i in range(gt_keypoint.shape[0]):
+                keypoint = gt_keypoint[i]
+                for j in range(int(keypoint.shape[0] / 2)):
+                    x1 = round(keypoint[2 * j]).astype(np.int32)
+                    y1 = round(keypoint[2 * j + 1]).astype(np.int32)
+                    draw.ellipse(
+                        (x1, y1, x1 + 5, y1 + 5), fill='green', outline='green')
+        save_path = os.path.join(self.output_dir, out_file_name)
+        image.save(save_path, quality=95)
+        return sample
+
+
+@register_op
+class Rbox2Poly(BaseOperator):
+    """
+    Convert rbbox format to poly format.
+    """
+
+    def __init__(self):
+        super(Rbox2Poly, self).__init__()
+
+    def apply(self, sample, context=None):
+        assert 'gt_rbox' in sample
+        assert sample['gt_rbox'].shape[1] == 5
+        rboxes = sample['gt_rbox']
+        polys = rbox2poly_np(rboxes)
+        sample['gt_poly'] = polys
+        xmin, ymin = polys[:, 0::2].min(1), polys[:, 1::2].min(1)
+        xmax, ymax = polys[:, 0::2].max(1), polys[:, 1::2].max(1)
+        sample['gt_bbox'] = np.stack([xmin, ymin, xmin, ymin], axis=1)
+        return sample