更换文档检测模型

2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions
--- a/paddle_detection/ppdet/modeling/assigners/init.py
+++ b/paddle_detection/ppdet/modeling/assigners/init.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import utils
+from . import task_aligned_assigner
+from . import atss_assigner
+from . import simota_assigner
+from . import max_iou_assigner
+from . import fcosr_assigner
+from . import rotated_task_aligned_assigner
+from . import task_aligned_assigner_cr
+from . import uniform_assigner
+
+from .utils import *
+from .task_aligned_assigner import *
+from .atss_assigner import *
+from .simota_assigner import *
+from .max_iou_assigner import *
+from .fcosr_assigner import *
+from .rotated_task_aligned_assigner import *
+from .task_aligned_assigner_cr import *
+from .uniform_assigner import *
+from .hungarian_assigner import *
+from .pose_utils import *
--- a/paddle_detection/ppdet/modeling/assigners/atss_assigner.py
+++ b/paddle_detection/ppdet/modeling/assigners/atss_assigner.py
@@ -0,0 +1,225 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from ppdet.core.workspace import register
+from ..bbox_utils import iou_similarity, batch_iou_similarity
+from ..bbox_utils import bbox_center
+from .utils import (check_points_inside_bboxes, compute_max_iou_anchor,
+                    compute_max_iou_gt)
+
+__all__ = ['ATSSAssigner']
+
+
+@register
+class ATSSAssigner(nn.Layer):
+    """Bridging the Gap Between Anchor-based and Anchor-free Detection
+     via Adaptive Training Sample Selection
+    """
+    __shared__ = ['num_classes']
+
+    def __init__(self,
+                 topk=9,
+                 num_classes=80,
+                 force_gt_matching=False,
+                 eps=1e-9,
+                 sm_use=False):
+        super(ATSSAssigner, self).__init__()
+        self.topk = topk
+        self.num_classes = num_classes
+        self.force_gt_matching = force_gt_matching
+        self.eps = eps
+        self.sm_use = sm_use
+
+    def _gather_topk_pyramid(self, gt2anchor_distances, num_anchors_list,
+                             pad_gt_mask):
+        gt2anchor_distances_list = paddle.split(
+            gt2anchor_distances, num_anchors_list, axis=-1)
+        num_anchors_index = np.cumsum(num_anchors_list).tolist()
+        num_anchors_index = [0, ] + num_anchors_index[:-1]
+        is_in_topk_list = []
+        topk_idxs_list = []
+        for distances, anchors_index in zip(gt2anchor_distances_list,
+                                            num_anchors_index):
+            num_anchors = distances.shape[-1]
+            _, topk_idxs = paddle.topk(
+                distances, self.topk, axis=-1, largest=False)
+            topk_idxs_list.append(topk_idxs + anchors_index)
+            is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(
+                axis=-2).astype(gt2anchor_distances.dtype)
+            is_in_topk_list.append(is_in_topk * pad_gt_mask)
+        is_in_topk_list = paddle.concat(is_in_topk_list, axis=-1)
+        topk_idxs_list = paddle.concat(topk_idxs_list, axis=-1)
+        return is_in_topk_list, topk_idxs_list
+
+    @paddle.no_grad()
+    def forward(self,
+                anchor_bboxes,
+                num_anchors_list,
+                gt_labels,
+                gt_bboxes,
+                pad_gt_mask,
+                bg_index,
+                gt_scores=None,
+                pred_bboxes=None):
+        r"""This code is based on
+            https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/atss_assigner.py
+
+        The assignment is done in following steps
+        1. compute iou between all bbox (bbox of all pyramid levels) and gt
+        2. compute center distance between all bbox and gt
+        3. on each pyramid level, for each gt, select k bbox whose center
+           are closest to the gt center, so we total select k*l bbox as
+           candidates for each gt
+        4. get corresponding iou for the these candidates, and compute the
+           mean and std, set mean + std as the iou threshold
+        5. select these candidates whose iou are greater than or equal to
+           the threshold as positive
+        6. limit the positive sample's center in gt
+        7. if an anchor box is assigned to multiple gts, the one with the
+           highest iou will be selected.
+        Args:
+            anchor_bboxes (Tensor, float32): pre-defined anchors, shape(L, 4),
+                    "xmin, xmax, ymin, ymax" format
+            num_anchors_list (List): num of anchors in each level
+            gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
+            gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
+            pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
+            bg_index (int): background index
+            gt_scores (Tensor|None, float32) Score of gt_bboxes,
+                    shape(B, n, 1), if None, then it will initialize with one_hot label
+            pred_bboxes (Tensor, float32, optional): predicted bounding boxes, shape(B, L, 4)
+        Returns:
+            assigned_labels (Tensor): (B, L)
+            assigned_bboxes (Tensor): (B, L, 4)
+            assigned_scores (Tensor): (B, L, C), if pred_bboxes is not None, then output ious
+        """
+        assert gt_labels.ndim == gt_bboxes.ndim and \
+               gt_bboxes.ndim == 3
+
+        num_anchors, _ = anchor_bboxes.shape
+        batch_size, num_max_boxes, _ = gt_bboxes.shape
+
+        # negative batch
+        if num_max_boxes == 0:
+            assigned_labels = paddle.full(
+                [batch_size, num_anchors], bg_index, dtype='int32')
+            assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
+            assigned_scores = paddle.zeros(
+                [batch_size, num_anchors, self.num_classes])
+            return assigned_labels, assigned_bboxes, assigned_scores
+
+        # 1. compute iou between gt and anchor bbox, [B, n, L]
+        ious = iou_similarity(gt_bboxes.reshape([-1, 4]), anchor_bboxes)
+        ious = ious.reshape([batch_size, -1, num_anchors])
+
+        # 2. compute center distance between all anchors and gt, [B, n, L]
+        gt_centers = bbox_center(gt_bboxes.reshape([-1, 4])).unsqueeze(1)
+        anchor_centers = bbox_center(anchor_bboxes)
+        gt2anchor_distances = (gt_centers - anchor_centers.unsqueeze(0)) \
+            .norm(2, axis=-1).reshape([batch_size, -1, num_anchors])
+
+        # 3. on each pyramid level, selecting topk closest candidates
+        # based on the center distance, [B, n, L]
+        is_in_topk, topk_idxs = self._gather_topk_pyramid(
+            gt2anchor_distances, num_anchors_list, pad_gt_mask)
+
+        # 4. get corresponding iou for the these candidates, and compute the
+        # mean and std, 5. set mean + std as the iou threshold
+        iou_candidates = ious * is_in_topk
+        iou_threshold = paddle.index_sample(
+            iou_candidates.flatten(stop_axis=-2),
+            topk_idxs.flatten(stop_axis=-2))
+        iou_threshold = iou_threshold.reshape([batch_size, num_max_boxes, -1])
+        iou_threshold = iou_threshold.mean(axis=-1, keepdim=True) + \
+                        iou_threshold.std(axis=-1, keepdim=True)
+        is_in_topk = paddle.where(iou_candidates > iou_threshold, is_in_topk,
+                                  paddle.zeros_like(is_in_topk))
+
+        # 6. check the positive sample's center in gt, [B, n, L]
+        if self.sm_use:
+            is_in_gts = check_points_inside_bboxes(
+                anchor_centers, gt_bboxes, sm_use=True)
+        else:
+            is_in_gts = check_points_inside_bboxes(anchor_centers, gt_bboxes)
+
+        # select positive sample, [B, n, L]
+        mask_positive = is_in_topk * is_in_gts * pad_gt_mask
+
+        # 7. if an anchor box is assigned to multiple gts,
+        # the one with the highest iou will be selected.
+        mask_positive_sum = mask_positive.sum(axis=-2)
+        if mask_positive_sum.max() > 1:
+            mask_multiple_gts = (
+                mask_positive_sum.unsqueeze(1) > 1).astype('int32').tile(
+                    [1, num_max_boxes, 1]).astype('bool')
+            if self.sm_use:
+                is_max_iou = compute_max_iou_anchor(ious * mask_positive)
+            else:
+                is_max_iou = compute_max_iou_anchor(ious)
+            mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
+                                         mask_positive)
+            mask_positive_sum = mask_positive.sum(axis=-2)
+        # 8. make sure every gt_bbox matches the anchor
+        if self.force_gt_matching:
+            is_max_iou = compute_max_iou_gt(ious) * pad_gt_mask
+            mask_max_iou = (is_max_iou.sum(-2, keepdim=True) == 1).tile(
+                [1, num_max_boxes, 1])
+            mask_positive = paddle.where(mask_max_iou, is_max_iou,
+                                         mask_positive)
+            mask_positive_sum = mask_positive.sum(axis=-2)
+        assigned_gt_index = mask_positive.argmax(axis=-2)
+
+        # assigned target
+        batch_ind = paddle.arange(
+            end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
+        assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
+        assigned_labels = paddle.gather(
+            gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
+        assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
+        assigned_labels = paddle.where(
+            mask_positive_sum > 0, assigned_labels,
+            paddle.full_like(assigned_labels, bg_index))
+
+        assigned_bboxes = paddle.gather(
+            gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
+        assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
+
+        assigned_scores = F.one_hot(assigned_labels, self.num_classes + 1)
+        ind = list(range(self.num_classes + 1))
+        ind.remove(bg_index)
+        assigned_scores = paddle.index_select(
+            assigned_scores, paddle.to_tensor(ind), axis=-1)
+        if pred_bboxes is not None:
+            # assigned iou
+            ious = batch_iou_similarity(gt_bboxes, pred_bboxes) * mask_positive
+            ious = ious.max(axis=-2).unsqueeze(-1)
+            assigned_scores *= ious
+        elif gt_scores is not None:
+            gather_scores = paddle.gather(
+                gt_scores.flatten(), assigned_gt_index.flatten(), axis=0)
+            gather_scores = gather_scores.reshape([batch_size, num_anchors])
+            gather_scores = paddle.where(mask_positive_sum > 0, gather_scores,
+                                         paddle.zeros_like(gather_scores))
+            assigned_scores *= gather_scores.unsqueeze(-1)
+
+        return assigned_labels, assigned_bboxes, assigned_scores
--- a/paddle_detection/ppdet/modeling/assigners/clrnet_assigner.py
+++ b/paddle_detection/ppdet/modeling/assigners/clrnet_assigner.py
@@ -0,0 +1,147 @@
+import paddle
+import paddle.nn.functional as F
+from ppdet.modeling.losses.clrnet_line_iou_loss import line_iou
+
+
+def distance_cost(predictions, targets, img_w):
+    """
+    repeat predictions and targets to generate all combinations
+    use the abs distance as the new distance cost
+    """
+    num_priors = predictions.shape[0]
+    num_targets = targets.shape[0]
+    predictions = paddle.repeat_interleave(
+        predictions, num_targets, axis=0)[..., 6:]
+    targets = paddle.concat(x=num_priors * [targets])[..., 6:]
+    invalid_masks = (targets < 0) | (targets >= img_w)
+    lengths = (~invalid_masks).sum(axis=1)
+    distances = paddle.abs(x=targets - predictions)
+    distances[invalid_masks] = 0.0
+    distances = distances.sum(axis=1) / (lengths.cast("float32") + 1e-09)
+    distances = distances.reshape([num_priors, num_targets])
+    return distances
+
+
+def focal_cost(cls_pred, gt_labels, alpha=0.25, gamma=2, eps=1e-12):
+    """
+    Args:
+        cls_pred (Tensor): Predicted classification logits, shape
+            [num_query, num_class].
+        gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
+
+    Returns:
+        torch.Tensor: cls_cost value
+    """
+    cls_pred = F.sigmoid(cls_pred)
+    neg_cost = -(1 - cls_pred + eps).log() * (1 - alpha) * cls_pred.pow(gamma)
+    pos_cost = -(cls_pred + eps).log() * alpha * (1 - cls_pred).pow(gamma)
+    cls_cost = pos_cost.index_select(
+        gt_labels, axis=1) - neg_cost.index_select(
+            gt_labels, axis=1)
+    return cls_cost
+
+
+def dynamic_k_assign(cost, pair_wise_ious):
+    """
+    Assign grouth truths with priors dynamically.
+
+    Args:
+        cost: the assign cost.
+        pair_wise_ious: iou of grouth truth and priors.
+
+    Returns:
+        prior_idx: the index of assigned prior.
+        gt_idx: the corresponding ground truth index.
+    """
+    matching_matrix = paddle.zeros_like(cost)
+    ious_matrix = pair_wise_ious
+    ious_matrix[ious_matrix < 0] = 0.0
+    n_candidate_k = 4
+    topk_ious, _ = paddle.topk(ious_matrix, n_candidate_k, axis=0)
+    dynamic_ks = paddle.clip(x=topk_ious.sum(0).cast("int32"), min=1)
+    num_gt = cost.shape[1]
+
+    for gt_idx in range(num_gt):
+        _, pos_idx = paddle.topk(
+            x=cost[:, gt_idx], k=dynamic_ks[gt_idx].item(), largest=False)
+        matching_matrix[pos_idx, gt_idx] = 1.0
+    del topk_ious, dynamic_ks, pos_idx
+    matched_gt = matching_matrix.sum(axis=1)
+
+    if (matched_gt > 1).sum() > 0:
+        matched_gt_indices = paddle.nonzero(matched_gt > 1)[:, 0]
+        cost_argmin = paddle.argmin(
+            cost.index_select(matched_gt_indices), axis=1)
+        matching_matrix[matched_gt_indices][0] *= 0.0
+        matching_matrix[matched_gt_indices, cost_argmin] = 1.0
+
+    prior_idx = matching_matrix.sum(axis=1).nonzero()
+    gt_idx = matching_matrix[prior_idx].argmax(axis=-1)
+    return prior_idx.flatten(), gt_idx.flatten()
+
+
+def cdist_paddle(x1, x2, p=2):
+    assert x1.shape[1] == x2.shape[1]
+    B, M = x1.shape
+    # if p == np.inf:
+    #     dist = np.max(np.abs(x1[:, np.newaxis, :] - x2[np.newaxis, :, :]), axis=-1)
+    if p == 1:
+        dist = paddle.sum(
+            paddle.abs(x1.unsqueeze(axis=1) - x2.unsqueeze(axis=0)), axis=-1)
+    else:
+        dist = paddle.pow(paddle.sum(paddle.pow(
+            paddle.abs(x1.unsqueeze(axis=1) - x2.unsqueeze(axis=0)), p),
+                                     axis=-1),
+                          1 / p)
+    return dist
+
+
+def assign(predictions,
+           targets,
+           img_w,
+           img_h,
+           distance_cost_weight=3.0,
+           cls_cost_weight=1.0):
+    """
+    computes dynamicly matching based on the cost, including cls cost and lane similarity cost
+    Args:
+        predictions (Tensor): predictions predicted by each stage, shape: (num_priors, 78)
+        targets (Tensor): lane targets, shape: (num_targets, 78)
+    return:
+        matched_row_inds (Tensor): matched predictions, shape: (num_targets)
+        matched_col_inds (Tensor): matched targets, shape: (num_targets)
+    """
+    predictions = predictions.detach().clone()
+    predictions[:, 3] *= img_w - 1
+    predictions[:, 6:] *= img_w - 1
+
+    targets = targets.detach().clone()
+    distances_score = distance_cost(predictions, targets, img_w)
+    distances_score = 1 - distances_score / paddle.max(x=distances_score) + 0.01
+
+    cls_score = focal_cost(predictions[:, :2], targets[:, 1].cast('int64'))
+
+    num_priors = predictions.shape[0]
+    num_targets = targets.shape[0]
+    target_start_xys = targets[:, 2:4]
+    target_start_xys[..., 0] *= (img_h - 1)
+    prediction_start_xys = predictions[:, 2:4]
+    prediction_start_xys[..., 0] *= (img_h - 1)
+    start_xys_score = cdist_paddle(
+        prediction_start_xys, target_start_xys,
+        p=2).reshape([num_priors, num_targets])
+
+    start_xys_score = 1 - start_xys_score / paddle.max(x=start_xys_score) + 0.01
+
+    target_thetas = targets[:, 4].unsqueeze(axis=-1)
+    theta_score = cdist_paddle(
+        predictions[:, 4].unsqueeze(axis=-1), target_thetas,
+        p=1).reshape([num_priors, num_targets]) * 180
+    theta_score = 1 - theta_score / paddle.max(x=theta_score) + 0.01
+
+    cost = -(distances_score * start_xys_score * theta_score
+             )**2 * distance_cost_weight + cls_score * cls_cost_weight
+    iou = line_iou(predictions[..., 6:], targets[..., 6:], img_w, aligned=False)
+
+    matched_row_inds, matched_col_inds = dynamic_k_assign(cost, iou)
+    return matched_row_inds, matched_col_inds
--- a/paddle_detection/ppdet/modeling/assigners/fcosr_assigner.py
+++ b/paddle_detection/ppdet/modeling/assigners/fcosr_assigner.py
@@ -0,0 +1,227 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from ppdet.core.workspace import register
+from ppdet.modeling.rbox_utils import box2corners, check_points_in_polys, paddle_gather
+
+__all__ = ['FCOSRAssigner']
+
+EPS = 1e-9
+
+
+@register
+class FCOSRAssigner(nn.Layer):
+    """ FCOSR Assigner, refer to https://arxiv.org/abs/2111.10780 for details
+
+    1. compute normalized gaussian distribution score and refined gaussian distribution score
+    2. refer to ellipse center sampling, sample points whose normalized gaussian distribution score is greater than threshold
+    3. refer to multi-level sampling, assign ground truth to feature map which follows two conditions.
+        i). first, the ratio between the short edge of the target and the stride of the feature map is less than 2.
+        ii). second, the long edge of minimum bounding rectangle of the target is larger than the acceptance range of feature map
+    4. refer to fuzzy sample label assignment, the points satisfying 2 and 3 will be assigned to the ground truth according to gaussian distribution score
+    """
+    __shared__ = ['num_classes']
+
+    def __init__(self,
+                 num_classes=80,
+                 factor=12,
+                 threshold=0.23,
+                 boundary=[[-1, 128], [128, 320], [320, 10000]],
+                 score_type='iou'):
+        super(FCOSRAssigner, self).__init__()
+        self.num_classes = num_classes
+        self.factor = factor
+        self.threshold = threshold
+        self.boundary = [
+            paddle.to_tensor(
+                l, dtype=paddle.float32).reshape([1, 1, 2]) for l in boundary
+        ]
+        self.score_type = score_type
+
+    def get_gaussian_distribution_score(self, points, gt_rboxes, gt_polys):
+        # projecting points to coordinate system defined by each rbox
+        # [B, N, 4, 2] -> 4 * [B, N, 1, 2]
+        a, b, c, d = gt_polys.split(4, axis=2)
+        # [1, L, 2] -> [1, 1, L, 2]
+        points = points.unsqueeze(0)
+        ab = b - a
+        ad = d - a
+        # [B, N, 5] -> [B, N, 2], [B, N, 2], [B, N, 1]
+        xy, wh, angle = gt_rboxes.split([2, 2, 1], axis=-1)
+        # [B, N, 2] -> [B, N, 1, 2]
+        xy = xy.unsqueeze(2)
+        # vector of points to center [B, N, L, 2]
+        vec = points - xy
+        # <ab, vec> = |ab| * |vec| * cos(theta) [B, N, L]
+        vec_dot_ab = paddle.sum(vec * ab, axis=-1)
+        # <ad, vec> = |ad| * |vec| * cos(theta) [B, N, L]
+        vec_dot_ad = paddle.sum(vec * ad, axis=-1)
+        # norm_ab [B, N, L]
+        norm_ab = paddle.sum(ab * ab, axis=-1).sqrt()
+        # norm_ad [B, N, L]
+        norm_ad = paddle.sum(ad * ad, axis=-1).sqrt()
+        # min(h, w), [B, N, 1]
+        min_edge = paddle.min(wh, axis=-1, keepdim=True)
+        # delta_x, delta_y [B, N, L]
+        delta_x = vec_dot_ab.pow(2) / (norm_ab.pow(3) * min_edge + EPS)
+        delta_y = vec_dot_ad.pow(2) / (norm_ad.pow(3) * min_edge + EPS)
+        # score [B, N, L]
+        norm_score = paddle.exp(-0.5 * self.factor * (delta_x + delta_y))
+
+        # simplified calculation
+        sigma = min_edge / self.factor
+        refined_score = norm_score / (2 * np.pi * sigma + EPS)
+        return norm_score, refined_score
+
+    def get_rotated_inside_mask(self, points, gt_polys, scores):
+        inside_mask = check_points_in_polys(points, gt_polys)
+        center_mask = scores >= self.threshold
+        return (inside_mask & center_mask).cast(paddle.float32)
+
+    def get_inside_range_mask(self, points, gt_bboxes, gt_rboxes, stride_tensor,
+                              regress_range):
+        # [1, L, 2] -> [1, 1, L, 2]
+        points = points.unsqueeze(0)
+        # [B, n, 4] -> [B, n, 1, 4]
+        x1y1, x2y2 = gt_bboxes.unsqueeze(2).split(2, axis=-1)
+        # [B, n, L, 2]
+        lt = points - x1y1
+        rb = x2y2 - points
+        # [B, n, L, 4]
+        ltrb = paddle.concat([lt, rb], axis=-1)
+        # [B, n, L, 4] -> [B, n, L]
+        inside_mask = paddle.min(ltrb, axis=-1) > EPS
+        # regress_range [1, L, 2] -> [1, 1, L, 2]
+        regress_range = regress_range.unsqueeze(0)
+        # stride_tensor [1, L, 1] -> [1, 1, L]
+        stride_tensor = stride_tensor.transpose((0, 2, 1))
+        # fcos range
+        # [B, n, L, 4] -> [B, n, L]
+        ltrb_max = paddle.max(ltrb, axis=-1)
+        # [1, 1, L, 2] -> [1, 1, L]
+        low, high = regress_range[..., 0], regress_range[..., 1]
+        # [B, n, L]
+        regress_mask = (ltrb_max >= low) & (ltrb_max <= high)
+        # mask for rotated
+        # [B, n, 1]
+        min_edge = paddle.min(gt_rboxes[..., 2:4], axis=-1, keepdim=True)
+        # [B, n , L]
+        rotated_mask = ((min_edge / stride_tensor) < 2.0) & (ltrb_max > high)
+        mask = inside_mask & (regress_mask | rotated_mask)
+        return mask.cast(paddle.float32)
+
+    @paddle.no_grad()
+    def forward(self,
+                anchor_points,
+                stride_tensor,
+                num_anchors_list,
+                gt_labels,
+                gt_bboxes,
+                gt_rboxes,
+                pad_gt_mask,
+                bg_index,
+                pred_rboxes=None):
+        r"""
+
+        Args:
+            anchor_points (Tensor, float32): pre-defined anchor points, shape(1, L, 2),
+                    "x, y" format
+            stride_tensor (Tensor, float32): stride tensor, shape (1, L, 1)
+            num_anchors_list (List): num of anchors in each level
+            gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
+            gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
+            gt_rboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 5)
+            pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
+            bg_index (int): background index
+            pred_rboxes (Tensor, float32, optional): predicted bounding boxes, shape(B, L, 5)
+        Returns:
+            assigned_labels (Tensor): (B, L)
+            assigned_rboxes (Tensor): (B, L, 5)
+            assigned_scores (Tensor): (B, L, C), if pred_rboxes is not None, then output ious
+        """
+
+        _, num_anchors, _ = anchor_points.shape
+        batch_size, num_max_boxes, _ = gt_rboxes.shape
+        if num_max_boxes == 0:
+            assigned_labels = paddle.full(
+                [batch_size, num_anchors], bg_index, dtype=gt_labels.dtype)
+            assigned_rboxes = paddle.zeros([batch_size, num_anchors, 5])
+            assigned_scores = paddle.zeros(
+                [batch_size, num_anchors, self.num_classes])
+            return assigned_labels, assigned_rboxes, assigned_scores
+
+        # get normalized gaussian distribution score and refined distribution score
+        gt_polys = box2corners(gt_rboxes)
+        score, refined_score = self.get_gaussian_distribution_score(
+            anchor_points, gt_rboxes, gt_polys)
+        inside_mask = self.get_rotated_inside_mask(anchor_points, gt_polys,
+                                                   score)
+        regress_ranges = []
+        for num, bound in zip(num_anchors_list, self.boundary):
+            regress_ranges.append(bound.tile((1, num, 1)))
+        regress_ranges = paddle.concat(regress_ranges, axis=1)
+        regress_mask = self.get_inside_range_mask(
+            anchor_points, gt_bboxes, gt_rboxes, stride_tensor, regress_ranges)
+        # [B, n, L]
+        mask_positive = inside_mask * regress_mask * pad_gt_mask
+        refined_score = refined_score * mask_positive - (1. - mask_positive)
+
+        argmax_refined_score = refined_score.argmax(axis=-2)
+        max_refined_score = refined_score.max(axis=-2)
+        assigned_gt_index = argmax_refined_score
+
+        # assigned target
+        batch_ind = paddle.arange(
+            end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
+        assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
+        assigned_labels = paddle.gather(
+            gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
+        assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
+        assigned_labels = paddle.where(
+            max_refined_score > 0, assigned_labels,
+            paddle.full_like(assigned_labels, bg_index))
+
+        assigned_rboxes = paddle.gather(
+            gt_rboxes.reshape([-1, 5]), assigned_gt_index.flatten(), axis=0)
+        assigned_rboxes = assigned_rboxes.reshape([batch_size, num_anchors, 5])
+
+        assigned_scores = F.one_hot(assigned_labels, self.num_classes + 1)
+        ind = list(range(self.num_classes + 1))
+        ind.remove(bg_index)
+        assigned_scores = paddle.index_select(
+            assigned_scores, paddle.to_tensor(ind), axis=-1)
+
+        if self.score_type == 'gaussian':
+            selected_scores = paddle_gather(
+                score, 1, argmax_refined_score.unsqueeze(-2)).squeeze(-2)
+            assigned_scores = assigned_scores * selected_scores.unsqueeze(-1)
+        elif self.score_type == 'iou':
+            assert pred_rboxes is not None, 'If score type is iou, pred_rboxes should not be None'
+            from ext_op import matched_rbox_iou
+            b, l = pred_rboxes.shape[:2]
+            iou_score = matched_rbox_iou(
+                pred_rboxes.reshape((-1, 5)), assigned_rboxes.reshape(
+                    (-1, 5))).reshape((b, l, 1))
+            assigned_scores = assigned_scores * iou_score
+
+        return assigned_labels, assigned_rboxes, assigned_scores
--- a/paddle_detection/ppdet/modeling/assigners/hungarian_assigner.py
+++ b/paddle_detection/ppdet/modeling/assigners/hungarian_assigner.py
@@ -0,0 +1,316 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+try:
+    from scipy.optimize import linear_sum_assignment
+except ImportError:
+    linear_sum_assignment = None
+
+import paddle
+
+from ppdet.core.workspace import register
+
+__all__ = ['PoseHungarianAssigner', 'PseudoSampler']
+
+
+class AssignResult:
+    """Stores assignments between predicted and truth boxes.
+
+    Attributes:
+        num_gts (int): the number of truth boxes considered when computing this
+            assignment
+
+        gt_inds (LongTensor): for each predicted box indicates the 1-based
+            index of the assigned truth box. 0 means unassigned and -1 means
+            ignore.
+
+        max_overlaps (FloatTensor): the iou between the predicted box and its
+            assigned truth box.
+
+        labels (None | LongTensor): If specified, for each predicted box
+            indicates the category label of the assigned truth box.
+    """
+
+    def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
+        self.num_gts = num_gts
+        self.gt_inds = gt_inds
+        self.max_overlaps = max_overlaps
+        self.labels = labels
+        # Interface for possible user-defined properties
+        self._extra_properties = {}
+
+    @property
+    def num_preds(self):
+        """int: the number of predictions in this assignment"""
+        return len(self.gt_inds)
+
+    def set_extra_property(self, key, value):
+        """Set user-defined new property."""
+        assert key not in self.info
+        self._extra_properties[key] = value
+
+    def get_extra_property(self, key):
+        """Get user-defined property."""
+        return self._extra_properties.get(key, None)
+
+    @property
+    def info(self):
+        """dict: a dictionary of info about the object"""
+        basic_info = {
+            'num_gts': self.num_gts,
+            'num_preds': self.num_preds,
+            'gt_inds': self.gt_inds,
+            'max_overlaps': self.max_overlaps,
+            'labels': self.labels,
+        }
+        basic_info.update(self._extra_properties)
+        return basic_info
+
+
+@register
+class PoseHungarianAssigner:
+    """Computes one-to-one matching between predictions and ground truth.
+
+    This class computes an assignment between the targets and the predictions
+    based on the costs. The costs are weighted sum of three components:
+    classification cost, regression L1 cost and regression oks cost. The
+    targets don't include the no_object, so generally there are more
+    predictions than targets. After the one-to-one matching, the un-matched
+    are treated as backgrounds. Thus each query prediction will be assigned
+    with `0` or a positive integer indicating the ground truth index:
+
+    - 0: negative sample, no assigned gt.
+    - positive integer: positive sample, index (1-based) of assigned gt.
+
+    Args:
+        cls_weight (int | float, optional): The scale factor for classification
+            cost. Default 1.0.
+        kpt_weight (int | float, optional): The scale factor for regression
+            L1 cost. Default 1.0.
+        oks_weight (int | float, optional): The scale factor for regression
+            oks cost. Default 1.0.
+    """
+    __inject__ = ['cls_cost', 'kpt_cost', 'oks_cost']
+
+    def __init__(self,
+                 cls_cost='ClassificationCost',
+                 kpt_cost='KptL1Cost',
+                 oks_cost='OksCost'):
+        self.cls_cost = cls_cost
+        self.kpt_cost = kpt_cost
+        self.oks_cost = oks_cost
+
+    def assign(self,
+               cls_pred,
+               kpt_pred,
+               gt_labels,
+               gt_keypoints,
+               gt_areas,
+               img_meta,
+               eps=1e-7):
+        """Computes one-to-one matching based on the weighted costs.
+
+        This method assign each query prediction to a ground truth or
+        background. The `assigned_gt_inds` with -1 means don't care,
+        0 means negative sample, and positive number is the index (1-based)
+        of assigned gt.
+        The assignment is done in the following steps, the order matters.
+
+        1. assign every prediction to -1
+        2. compute the weighted costs
+        3. do Hungarian matching on CPU based on the costs
+        4. assign all to 0 (background) first, then for each matched pair
+           between predictions and gts, treat this prediction as foreground
+           and assign the corresponding gt index (plus 1) to it.
+
+        Args:
+            cls_pred (Tensor): Predicted classification logits, shape
+                [num_query, num_class].
+            kpt_pred (Tensor): Predicted keypoints with normalized coordinates
+                (x_{i}, y_{i}), which are all in range [0, 1]. Shape
+                [num_query, K*2].
+            gt_labels (Tensor): Label of `gt_keypoints`, shape (num_gt,).
+            gt_keypoints (Tensor): Ground truth keypoints with unnormalized
+                coordinates [p^{1}_x, p^{1}_y, p^{1}_v, ..., \
+                    p^{K}_x, p^{K}_y, p^{K}_v]. Shape [num_gt, K*3].
+            gt_areas (Tensor): Ground truth mask areas, shape (num_gt,).
+            img_meta (dict): Meta information for current image.
+            eps (int | float, optional): A value added to the denominator for
+                numerical stability. Default 1e-7.
+
+        Returns:
+            :obj:`AssignResult`: The assigned result.
+        """
+        num_gts, num_kpts = gt_keypoints.shape[0], kpt_pred.shape[0]
+        if not gt_keypoints.astype('bool').any():
+            num_gts = 0
+
+        # 1. assign -1 by default
+        assigned_gt_inds = paddle.full((num_kpts, ), -1, dtype="int64")
+        assigned_labels = paddle.full((num_kpts, ), -1, dtype="int64")
+        if num_gts == 0 or num_kpts == 0:
+            # No ground truth or keypoints, return empty assignment
+            if num_gts == 0:
+                # No ground truth, assign all to background
+                assigned_gt_inds[:] = 0
+            return AssignResult(
+                num_gts, assigned_gt_inds, None, labels=assigned_labels)
+        img_h, img_w, _ = img_meta['img_shape']
+        factor = paddle.to_tensor(
+            [img_w, img_h, img_w, img_h], dtype=gt_keypoints.dtype).reshape(
+                (1, -1))
+
+        # 2. compute the weighted costs
+        # classification cost
+        cls_cost = self.cls_cost(cls_pred, gt_labels)
+
+        # keypoint regression L1 cost
+        gt_keypoints_reshape = gt_keypoints.reshape((gt_keypoints.shape[0], -1,
+                                                     3))
+        valid_kpt_flag = gt_keypoints_reshape[..., -1]
+        kpt_pred_tmp = kpt_pred.clone().detach().reshape((kpt_pred.shape[0], -1,
+                                                          2))
+        normalize_gt_keypoints = gt_keypoints_reshape[
+            ..., :2] / factor[:, :2].unsqueeze(0)
+        kpt_cost = self.kpt_cost(kpt_pred_tmp, normalize_gt_keypoints,
+                                 valid_kpt_flag)
+        # keypoint OKS cost
+        kpt_pred_tmp = kpt_pred.clone().detach().reshape((kpt_pred.shape[0], -1,
+                                                          2))
+        kpt_pred_tmp = kpt_pred_tmp * factor[:, :2].unsqueeze(0)
+        oks_cost = self.oks_cost(kpt_pred_tmp, gt_keypoints_reshape[..., :2],
+                                 valid_kpt_flag, gt_areas)
+        # weighted sum of above three costs
+        cost = cls_cost + kpt_cost + oks_cost
+
+        # 3. do Hungarian matching on CPU using linear_sum_assignment
+        cost = cost.detach().cpu()
+        if linear_sum_assignment is None:
+            raise ImportError('Please run "pip install scipy" '
+                              'to install scipy first.')
+        matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
+        matched_row_inds = paddle.to_tensor(matched_row_inds)
+        matched_col_inds = paddle.to_tensor(matched_col_inds)
+
+        # 4. assign backgrounds and foregrounds
+        # assign all indices to backgrounds first
+        assigned_gt_inds[:] = 0
+        # assign foregrounds based on matching results
+        assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
+        assigned_labels[matched_row_inds] = gt_labels[matched_col_inds][
+            ..., 0].astype("int64")
+        return AssignResult(
+            num_gts, assigned_gt_inds, None, labels=assigned_labels)
+
+
+class SamplingResult:
+    """Bbox sampling result.
+    """
+
+    def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
+                 gt_flags):
+        self.pos_inds = pos_inds
+        self.neg_inds = neg_inds
+        if pos_inds.size > 0:
+            self.pos_bboxes = bboxes[pos_inds]
+            self.neg_bboxes = bboxes[neg_inds]
+            self.pos_is_gt = gt_flags[pos_inds]
+
+            self.num_gts = gt_bboxes.shape[0]
+            self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
+
+            if gt_bboxes.numel() == 0:
+                # hack for index error case
+                assert self.pos_assigned_gt_inds.numel() == 0
+                self.pos_gt_bboxes = paddle.zeros(
+                    gt_bboxes.shape, dtype=gt_bboxes.dtype).reshape((-1, 4))
+            else:
+                if len(gt_bboxes.shape) < 2:
+                    gt_bboxes = gt_bboxes.reshape((-1, 4))
+
+                self.pos_gt_bboxes = paddle.index_select(
+                    gt_bboxes,
+                    self.pos_assigned_gt_inds.astype('int64'),
+                    axis=0)
+
+            if assign_result.labels is not None:
+                self.pos_gt_labels = assign_result.labels[pos_inds]
+            else:
+                self.pos_gt_labels = None
+
+    @property
+    def bboxes(self):
+        """paddle.Tensor: concatenated positive and negative boxes"""
+        return paddle.concat([self.pos_bboxes, self.neg_bboxes])
+
+    def __nice__(self):
+        data = self.info.copy()
+        data['pos_bboxes'] = data.pop('pos_bboxes').shape
+        data['neg_bboxes'] = data.pop('neg_bboxes').shape
+        parts = [f"'{k}': {v!r}" for k, v in sorted(data.items())]
+        body = '    ' + ',\n    '.join(parts)
+        return '{\n' + body + '\n}'
+
+    @property
+    def info(self):
+        """Returns a dictionary of info about the object."""
+        return {
+            'pos_inds': self.pos_inds,
+            'neg_inds': self.neg_inds,
+            'pos_bboxes': self.pos_bboxes,
+            'neg_bboxes': self.neg_bboxes,
+            'pos_is_gt': self.pos_is_gt,
+            'num_gts': self.num_gts,
+            'pos_assigned_gt_inds': self.pos_assigned_gt_inds,
+        }
+
+
+@register
+class PseudoSampler:
+    """A pseudo sampler that does not do sampling actually."""
+
+    def __init__(self, **kwargs):
+        pass
+
+    def _sample_pos(self, **kwargs):
+        """Sample positive samples."""
+        raise NotImplementedError
+
+    def _sample_neg(self, **kwargs):
+        """Sample negative samples."""
+        raise NotImplementedError
+
+    def sample(self, assign_result, bboxes, gt_bboxes, *args, **kwargs):
+        """Directly returns the positive and negative indices  of samples.
+
+        Args:
+            assign_result (:obj:`AssignResult`): Assigned results
+            bboxes (paddle.Tensor): Bounding boxes
+            gt_bboxes (paddle.Tensor): Ground truth boxes
+
+        Returns:
+            :obj:`SamplingResult`: sampler results
+        """
+        pos_inds = paddle.nonzero(
+            assign_result.gt_inds > 0, as_tuple=False).squeeze(-1)
+        neg_inds = paddle.nonzero(
+            assign_result.gt_inds == 0, as_tuple=False).squeeze(-1)
+        gt_flags = paddle.zeros([bboxes.shape[0]], dtype='int32')
+        sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
+                                         assign_result, gt_flags)
+        return sampling_result
--- a/paddle_detection/ppdet/modeling/assigners/max_iou_assigner.py
+++ b/paddle_detection/ppdet/modeling/assigners/max_iou_assigner.py
@@ -0,0 +1,52 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from ppdet.core.workspace import register
+from ppdet.modeling.proposal_generator.target import label_box
+
+__all__ = ['MaxIoUAssigner']
+
+@register
+class MaxIoUAssigner(object):
+    """a standard bbox assigner based on max IoU, use ppdet's label_box 
+    as backend.
+    Args:
+        positive_overlap (float): threshold for defining positive samples 
+        negative_overlap (float): threshold for denining negative samples
+        allow_low_quality (bool): whether to lower IoU thr if a GT poorly
+            overlaps with candidate bboxes
+    """
+    def __init__(self,
+                 positive_overlap,
+                 negative_overlap,
+                 allow_low_quality=True):
+        self.positive_overlap = positive_overlap
+        self.negative_overlap = negative_overlap
+        self.allow_low_quality = allow_low_quality
+
+    def __call__(self, bboxes, gt_bboxes):
+        matches, match_labels = label_box(
+            bboxes,
+            gt_bboxes,
+            positive_overlap=self.positive_overlap,
+            negative_overlap=self.negative_overlap,
+            allow_low_quality=self.allow_low_quality,
+            ignore_thresh=-1,
+            is_crowd=None,
+            assign_on_cpu=False)
+        return matches, match_labels
--- a/paddle_detection/ppdet/modeling/assigners/pose_utils.py
+++ b/paddle_detection/ppdet/modeling/assigners/pose_utils.py
@@ -0,0 +1,275 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import paddle
+import paddle.nn.functional as F
+
+from ppdet.core.workspace import register
+
+__all__ = ['KptL1Cost', 'OksCost', 'ClassificationCost']
+
+
+def masked_fill(x, mask, value):
+    y = paddle.full(x.shape, value, x.dtype)
+    return paddle.where(mask, y, x)
+
+
+@register
+class KptL1Cost(object):
+    """KptL1Cost.
+
+    this function based on: https://github.com/hikvision-research/opera/blob/main/opera/core/bbox/match_costs/match_cost.py
+
+    Args:
+        weight (int | float, optional): loss_weight.
+    """
+
+    def __init__(self, weight=1.0):
+        self.weight = weight
+
+    def __call__(self, kpt_pred, gt_keypoints, valid_kpt_flag):
+        """
+        Args:
+            kpt_pred (Tensor): Predicted keypoints with normalized coordinates
+                (x_{i}, y_{i}), which are all in range [0, 1]. Shape
+                [num_query, K, 2].
+            gt_keypoints (Tensor): Ground truth keypoints with normalized
+                coordinates (x_{i}, y_{i}). Shape [num_gt, K, 2].
+            valid_kpt_flag (Tensor): valid flag of ground truth keypoints.
+                Shape [num_gt, K].
+
+        Returns:
+            paddle.Tensor: kpt_cost value with weight.
+        """
+        kpt_cost = []
+        for i in range(len(gt_keypoints)):
+            if gt_keypoints[i].size == 0:
+                kpt_cost.append(kpt_pred.sum() * 0)
+            kpt_pred_tmp = kpt_pred.clone()
+            valid_flag = valid_kpt_flag[i] > 0
+            valid_flag_expand = valid_flag.unsqueeze(0).unsqueeze(-1).expand_as(
+                kpt_pred_tmp)
+            if not valid_flag_expand.all():
+                kpt_pred_tmp = masked_fill(kpt_pred_tmp, ~valid_flag_expand, 0)
+            cost = F.pairwise_distance(
+                kpt_pred_tmp.reshape((kpt_pred_tmp.shape[0], -1)),
+                gt_keypoints[i].reshape((-1, )).unsqueeze(0),
+                p=1,
+                keepdim=True)
+            avg_factor = paddle.clip(
+                valid_flag.astype('float32').sum() * 2, 1.0)
+            cost = cost / avg_factor
+            kpt_cost.append(cost)
+        kpt_cost = paddle.concat(kpt_cost, axis=1)
+        return kpt_cost * self.weight
+
+
+@register
+class OksCost(object):
+    """OksCost.
+
+    this function based on: https://github.com/hikvision-research/opera/blob/main/opera/core/bbox/match_costs/match_cost.py
+
+    Args:
+        num_keypoints (int): number of keypoints
+        weight (int | float, optional): loss_weight.
+    """
+
+    def __init__(self, num_keypoints=17, weight=1.0):
+        self.weight = weight
+        if num_keypoints == 17:
+            self.sigmas = np.array(
+                [
+                    .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07,
+                    1.07, .87, .87, .89, .89
+                ],
+                dtype=np.float32) / 10.0
+        elif num_keypoints == 14:
+            self.sigmas = np.array(
+                [
+                    .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89,
+                    .89, .79, .79
+                ],
+                dtype=np.float32) / 10.0
+        else:
+            raise ValueError(f'Unsupported keypoints number {num_keypoints}')
+
+    def __call__(self, kpt_pred, gt_keypoints, valid_kpt_flag, gt_areas):
+        """
+        Args:
+            kpt_pred (Tensor): Predicted keypoints with unnormalized
+                coordinates (x_{i}, y_{i}). Shape [num_query, K, 2].
+            gt_keypoints (Tensor): Ground truth keypoints with unnormalized
+                coordinates (x_{i}, y_{i}). Shape [num_gt, K, 2].
+            valid_kpt_flag (Tensor): valid flag of ground truth keypoints.
+                Shape [num_gt, K].
+            gt_areas (Tensor): Ground truth mask areas. Shape [num_gt,].
+
+        Returns:
+            paddle.Tensor: oks_cost value with weight.
+        """
+        sigmas = paddle.to_tensor(self.sigmas)
+        variances = (sigmas * 2)**2
+
+        oks_cost = []
+        assert len(gt_keypoints) == len(gt_areas)
+        for i in range(len(gt_keypoints)):
+            if gt_keypoints[i].size == 0:
+                oks_cost.append(kpt_pred.sum() * 0)
+            squared_distance = \
+                (kpt_pred[:, :, 0] - gt_keypoints[i, :, 0].unsqueeze(0)) ** 2 + \
+                (kpt_pred[:, :, 1] - gt_keypoints[i, :, 1].unsqueeze(0)) ** 2
+            vis_flag = (valid_kpt_flag[i] > 0).astype('int')
+            vis_ind = vis_flag.nonzero(as_tuple=False)[:, 0]
+            num_vis_kpt = vis_ind.shape[0]
+            # assert num_vis_kpt > 0
+            if num_vis_kpt == 0:
+                oks_cost.append(paddle.zeros((squared_distance.shape[0], 1)))
+                continue
+            area = gt_areas[i]
+
+            squared_distance0 = squared_distance / (area * variances * 2)
+            squared_distance0 = paddle.index_select(
+                squared_distance0, vis_ind, axis=1)
+            squared_distance1 = paddle.exp(-squared_distance0).sum(axis=1,
+                                                                   keepdim=True)
+            oks = squared_distance1 / num_vis_kpt
+            # The 1 is a constant that doesn't change the matching, so omitted.
+            oks_cost.append(-oks)
+        oks_cost = paddle.concat(oks_cost, axis=1)
+        return oks_cost * self.weight
+
+
+@register
+class ClassificationCost:
+    """ClsSoftmaxCost.
+
+     Args:
+         weight (int | float, optional): loss_weight
+    """
+
+    def __init__(self, weight=1.):
+        self.weight = weight
+
+    def __call__(self, cls_pred, gt_labels):
+        """
+        Args:
+            cls_pred (Tensor): Predicted classification logits, shape
+                (num_query, num_class).
+            gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
+
+        Returns:
+            paddle.Tensor: cls_cost value with weight
+        """
+        # Following the official DETR repo, contrary to the loss that
+        # NLL is used, we approximate it in 1 - cls_score[gt_label].
+        # The 1 is a constant that doesn't change the matching,
+        # so it can be omitted.
+        cls_score = cls_pred.softmax(-1)
+        cls_cost = -cls_score[:, gt_labels]
+        return cls_cost * self.weight
+
+
+@register
+class FocalLossCost:
+    """FocalLossCost.
+
+     Args:
+         weight (int | float, optional): loss_weight
+         alpha (int | float, optional): focal_loss alpha
+         gamma (int | float, optional): focal_loss gamma
+         eps (float, optional): default 1e-12
+         binary_input (bool, optional): Whether the input is binary,
+            default False.
+    """
+
+    def __init__(self,
+                 weight=1.,
+                 alpha=0.25,
+                 gamma=2,
+                 eps=1e-12,
+                 binary_input=False):
+        self.weight = weight
+        self.alpha = alpha
+        self.gamma = gamma
+        self.eps = eps
+        self.binary_input = binary_input
+
+    def _focal_loss_cost(self, cls_pred, gt_labels):
+        """
+        Args:
+            cls_pred (Tensor): Predicted classification logits, shape
+                (num_query, num_class).
+            gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
+
+        Returns:
+            paddle.Tensor: cls_cost value with weight
+        """
+        if gt_labels.size == 0:
+            return cls_pred.sum() * 0
+        cls_pred = F.sigmoid(cls_pred)
+        neg_cost = -(1 - cls_pred + self.eps).log() * (
+            1 - self.alpha) * cls_pred.pow(self.gamma)
+        pos_cost = -(cls_pred + self.eps).log() * self.alpha * (
+            1 - cls_pred).pow(self.gamma)
+
+        cls_cost = paddle.index_select(
+            pos_cost, gt_labels, axis=1) - paddle.index_select(
+                neg_cost, gt_labels, axis=1)
+        return cls_cost * self.weight
+
+    def _mask_focal_loss_cost(self, cls_pred, gt_labels):
+        """
+        Args:
+            cls_pred (Tensor): Predicted classfication logits
+                in shape (num_query, d1, ..., dn), dtype=paddle.float32.
+            gt_labels (Tensor): Ground truth in shape (num_gt, d1, ..., dn),
+                dtype=paddle.long. Labels should be binary.
+
+        Returns:
+            Tensor: Focal cost matrix with weight in shape\
+                (num_query, num_gt).
+        """
+        cls_pred = cls_pred.flatten(1)
+        gt_labels = gt_labels.flatten(1).float()
+        n = cls_pred.shape[1]
+        cls_pred = F.sigmoid(cls_pred)
+        neg_cost = -(1 - cls_pred + self.eps).log() * (
+            1 - self.alpha) * cls_pred.pow(self.gamma)
+        pos_cost = -(cls_pred + self.eps).log() * self.alpha * (
+            1 - cls_pred).pow(self.gamma)
+
+        cls_cost = paddle.einsum('nc,mc->nm', pos_cost, gt_labels) + \
+            paddle.einsum('nc,mc->nm', neg_cost, (1 - gt_labels))
+        return cls_cost / n * self.weight
+
+    def __call__(self, cls_pred, gt_labels):
+        """
+        Args:
+            cls_pred (Tensor): Predicted classfication logits.
+            gt_labels (Tensor)): Labels.
+
+        Returns:
+            Tensor: Focal cost matrix with weight in shape\
+                (num_query, num_gt).
+        """
+        if self.binary_input:
+            return self._mask_focal_loss_cost(cls_pred, gt_labels)
+        else:
+            return self._focal_loss_cost(cls_pred, gt_labels)
--- a/paddle_detection/ppdet/modeling/assigners/rotated_task_aligned_assigner.py
+++ b/paddle_detection/ppdet/modeling/assigners/rotated_task_aligned_assigner.py
@@ -0,0 +1,164 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from ppdet.core.workspace import register
+from ..rbox_utils import rotated_iou_similarity, check_points_in_rotated_boxes
+from .utils import gather_topk_anchors, compute_max_iou_anchor
+
+__all__ = ['RotatedTaskAlignedAssigner']
+
+
+@register
+class RotatedTaskAlignedAssigner(nn.Layer):
+    """TOOD: Task-aligned One-stage Object Detection
+    """
+
+    def __init__(self, topk=13, alpha=1.0, beta=6.0, eps=1e-9):
+        super(RotatedTaskAlignedAssigner, self).__init__()
+        self.topk = topk
+        self.alpha = alpha
+        self.beta = beta
+        self.eps = eps
+
+    @paddle.no_grad()
+    def forward(self,
+                pred_scores,
+                pred_bboxes,
+                anchor_points,
+                num_anchors_list,
+                gt_labels,
+                gt_bboxes,
+                pad_gt_mask,
+                bg_index,
+                gt_scores=None):
+        r"""This code is based on
+            https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py
+
+        The assignment is done in following steps
+        1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt
+        2. select top-k bbox as candidates for each gt
+        3. limit the positive sample's center in gt (because the anchor-free detector
+           only can predict positive distance)
+        4. if an anchor box is assigned to multiple gts, the one with the
+           highest iou will be selected.
+        Args:
+            pred_scores (Tensor, float32): predicted class probability, shape(B, L, C)
+            pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 5)
+            anchor_points (Tensor, float32): pre-defined anchors, shape(1, L, 2), "cxcy" format
+            num_anchors_list (List): num of anchors in each level, shape(L)
+            gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
+            gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 5)
+            pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
+            bg_index (int): background index
+            gt_scores (Tensor|None, float32) Score of gt_bboxes, shape(B, n, 1)
+        Returns:
+            assigned_labels (Tensor): (B, L)
+            assigned_bboxes (Tensor): (B, L, 5)
+            assigned_scores (Tensor): (B, L, C)
+        """
+        assert pred_scores.ndim == pred_bboxes.ndim
+        assert gt_labels.ndim == gt_bboxes.ndim and \
+               gt_bboxes.ndim == 3
+
+        batch_size, num_anchors, num_classes = pred_scores.shape
+        _, num_max_boxes, _ = gt_bboxes.shape
+
+        # negative batch
+        if num_max_boxes == 0:
+            assigned_labels = paddle.full(
+                [batch_size, num_anchors], bg_index, dtype=gt_labels.dtype)
+            assigned_bboxes = paddle.zeros([batch_size, num_anchors, 5])
+            assigned_scores = paddle.zeros(
+                [batch_size, num_anchors, num_classes])
+            return assigned_labels, assigned_bboxes, assigned_scores
+
+        # compute iou between gt and pred bbox, [B, n, L]
+        ious = rotated_iou_similarity(gt_bboxes, pred_bboxes)
+        ious = paddle.where(ious > 1 + self.eps, paddle.zeros_like(ious), ious)
+        ious.stop_gradient = True
+        # gather pred bboxes class score
+        pred_scores = pred_scores.transpose([0, 2, 1])
+        batch_ind = paddle.arange(
+            end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
+        gt_labels_ind = paddle.stack(
+            [batch_ind.tile([1, num_max_boxes]), gt_labels.squeeze(-1)],
+            axis=-1)
+        bbox_cls_scores = paddle.gather_nd(pred_scores, gt_labels_ind)
+        # compute alignment metrics, [B, n, L]
+        alignment_metrics = bbox_cls_scores.pow(self.alpha) * ious.pow(
+            self.beta)
+
+        # check the positive sample's center in gt, [B, n, L]
+        is_in_gts = check_points_in_rotated_boxes(anchor_points, gt_bboxes)
+
+        # select topk largest alignment metrics pred bbox as candidates
+        # for each gt, [B, n, L]
+        is_in_topk = gather_topk_anchors(
+            alignment_metrics * is_in_gts, self.topk, topk_mask=pad_gt_mask)
+
+        # select positive sample, [B, n, L]
+        mask_positive = is_in_topk * is_in_gts * pad_gt_mask
+
+        # if an anchor box is assigned to multiple gts,
+        # the one with the highest iou will be selected, [B, n, L]
+        mask_positive_sum = mask_positive.sum(axis=-2)
+        if mask_positive_sum.max() > 1:
+            mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile(
+                [1, num_max_boxes, 1])
+            is_max_iou = compute_max_iou_anchor(ious)
+            mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
+                                         mask_positive)
+            mask_positive_sum = mask_positive.sum(axis=-2)
+        assigned_gt_index = mask_positive.argmax(axis=-2)
+
+        # assigned target
+        assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
+        assigned_labels = paddle.gather(
+            gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
+        assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
+        assigned_labels = paddle.where(
+            mask_positive_sum > 0, assigned_labels,
+            paddle.full_like(assigned_labels, bg_index))
+
+        assigned_bboxes = paddle.gather(
+            gt_bboxes.reshape([-1, 5]), assigned_gt_index.flatten(), axis=0)
+        assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 5])
+
+        assigned_scores = F.one_hot(assigned_labels, num_classes + 1)
+        ind = list(range(num_classes + 1))
+        ind.remove(bg_index)
+        assigned_scores = paddle.index_select(
+            assigned_scores, paddle.to_tensor(ind), axis=-1)
+        # rescale alignment metrics
+        alignment_metrics *= mask_positive
+        max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True)
+        max_ious_per_instance = (ious * mask_positive).max(axis=-1,
+                                                           keepdim=True)
+        alignment_metrics = alignment_metrics / (
+            max_metrics_per_instance + self.eps) * max_ious_per_instance
+        alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1)
+        assigned_scores = assigned_scores * alignment_metrics
+
+        assigned_bboxes.stop_gradient = True
+        assigned_scores.stop_gradient = True
+        assigned_labels.stop_gradient = True
+        return assigned_labels, assigned_bboxes, assigned_scores
--- a/paddle_detection/ppdet/modeling/assigners/simota_assigner.py
+++ b/paddle_detection/ppdet/modeling/assigners/simota_assigner.py
@@ -0,0 +1,265 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# The code is based on:
+# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/bbox/assigners/sim_ota_assigner.py
+
+import paddle
+import numpy as np
+import paddle.nn.functional as F
+
+from ppdet.modeling.losses.varifocal_loss import varifocal_loss
+from ppdet.modeling.bbox_utils import batch_bbox_overlaps
+from ppdet.core.workspace import register
+
+
+@register
+class SimOTAAssigner(object):
+    """Computes matching between predictions and ground truth.
+    Args:
+        center_radius (int | float, optional): Ground truth center size
+            to judge whether a prior is in center. Default 2.5.
+        candidate_topk (int, optional): The candidate top-k which used to
+            get top-k ious to calculate dynamic-k. Default 10.
+        iou_weight (int | float, optional): The scale factor for regression
+            iou cost. Default 3.0.
+        cls_weight (int | float, optional): The scale factor for classification
+            cost. Default 1.0.
+        num_classes (int): The num_classes of dataset.
+        use_vfl (int): Whether to use varifocal_loss when calculating the cost matrix.
+    """
+    __shared__ = ['num_classes']
+
+    def __init__(self,
+                 center_radius=2.5,
+                 candidate_topk=10,
+                 iou_weight=3.0,
+                 cls_weight=1.0,
+                 num_classes=80,
+                 use_vfl=True):
+        self.center_radius = center_radius
+        self.candidate_topk = candidate_topk
+        self.iou_weight = iou_weight
+        self.cls_weight = cls_weight
+        self.num_classes = num_classes
+        self.use_vfl = use_vfl
+
+    def get_in_gt_and_in_center_info(self, flatten_center_and_stride,
+                                     gt_bboxes):
+        num_gt = gt_bboxes.shape[0]
+
+        flatten_x = flatten_center_and_stride[:, 0].unsqueeze(1).tile(
+            [1, num_gt])
+        flatten_y = flatten_center_and_stride[:, 1].unsqueeze(1).tile(
+            [1, num_gt])
+        flatten_stride_x = flatten_center_and_stride[:, 2].unsqueeze(1).tile(
+            [1, num_gt])
+        flatten_stride_y = flatten_center_and_stride[:, 3].unsqueeze(1).tile(
+            [1, num_gt])
+
+        # is prior centers in gt bboxes, shape: [n_center, n_gt]
+        l_ = flatten_x - gt_bboxes[:, 0]
+        t_ = flatten_y - gt_bboxes[:, 1]
+        r_ = gt_bboxes[:, 2] - flatten_x
+        b_ = gt_bboxes[:, 3] - flatten_y
+
+        deltas = paddle.stack([l_, t_, r_, b_], axis=1)
+        is_in_gts = deltas.min(axis=1) > 0
+        is_in_gts_all = is_in_gts.sum(axis=1) > 0
+
+        # is prior centers in gt centers
+        gt_center_xs = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
+        gt_center_ys = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
+        ct_bound_l = gt_center_xs - self.center_radius * flatten_stride_x
+        ct_bound_t = gt_center_ys - self.center_radius * flatten_stride_y
+        ct_bound_r = gt_center_xs + self.center_radius * flatten_stride_x
+        ct_bound_b = gt_center_ys + self.center_radius * flatten_stride_y
+
+        cl_ = flatten_x - ct_bound_l
+        ct_ = flatten_y - ct_bound_t
+        cr_ = ct_bound_r - flatten_x
+        cb_ = ct_bound_b - flatten_y
+
+        ct_deltas = paddle.stack([cl_, ct_, cr_, cb_], axis=1)
+        is_in_cts = ct_deltas.min(axis=1) > 0
+        is_in_cts_all = is_in_cts.sum(axis=1) > 0
+
+        # in any of gts or gt centers, shape: [n_center]
+        is_in_gts_or_centers_all = paddle.logical_or(is_in_gts_all,
+                                                     is_in_cts_all)
+
+        is_in_gts_or_centers_all_inds = paddle.nonzero(
+            is_in_gts_or_centers_all).squeeze(1)
+
+        # both in gts and gt centers, shape: [num_fg, num_gt]
+        is_in_gts_and_centers = paddle.logical_and(
+            paddle.gather(
+                is_in_gts.cast('int'), is_in_gts_or_centers_all_inds,
+                axis=0).cast('bool'),
+            paddle.gather(
+                is_in_cts.cast('int'), is_in_gts_or_centers_all_inds,
+                axis=0).cast('bool'))
+        return is_in_gts_or_centers_all, is_in_gts_or_centers_all_inds, is_in_gts_and_centers
+
+    def dynamic_k_matching(self, cost_matrix, pairwise_ious, num_gt):
+        match_matrix = np.zeros_like(cost_matrix.numpy())
+        # select candidate topk ious for dynamic-k calculation
+        topk_ious, _ = paddle.topk(
+            pairwise_ious,
+            min(self.candidate_topk, pairwise_ious.shape[0]),
+            axis=0)
+        # calculate dynamic k for each gt
+        dynamic_ks = paddle.clip(topk_ious.sum(0).cast('int'), min=1)
+        for gt_idx in range(num_gt):
+            _, pos_idx = paddle.topk(
+                cost_matrix[:, gt_idx], k=dynamic_ks[gt_idx], largest=False)
+            match_matrix[:, gt_idx][pos_idx.numpy()] = 1.0
+
+        del topk_ious, dynamic_ks, pos_idx
+
+        # match points more than two gts
+        extra_match_gts_mask = match_matrix.sum(1) > 1
+        if extra_match_gts_mask.sum() > 0:
+            cost_matrix = cost_matrix.numpy()
+            cost_argmin = np.argmin(
+                cost_matrix[extra_match_gts_mask, :], axis=1)
+            match_matrix[extra_match_gts_mask, :] *= 0.0
+            match_matrix[extra_match_gts_mask, cost_argmin] = 1.0
+        # get foreground mask
+        match_fg_mask_inmatrix = match_matrix.sum(1) > 0
+        match_gt_inds_to_fg = match_matrix[match_fg_mask_inmatrix, :].argmax(1)
+
+        return match_gt_inds_to_fg, match_fg_mask_inmatrix
+
+    def get_sample(self, assign_gt_inds, gt_bboxes):
+        pos_inds = np.unique(np.nonzero(assign_gt_inds > 0)[0])
+        neg_inds = np.unique(np.nonzero(assign_gt_inds == 0)[0])
+        pos_assigned_gt_inds = assign_gt_inds[pos_inds] - 1
+
+        if gt_bboxes.size == 0:
+            # hack for index error case
+            assert pos_assigned_gt_inds.size == 0
+            pos_gt_bboxes = np.empty_like(gt_bboxes).reshape(-1, 4)
+        else:
+            if len(gt_bboxes.shape) < 2:
+                gt_bboxes = gt_bboxes.resize(-1, 4)
+            pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :]
+        return pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds
+
+    def __call__(self,
+                 flatten_cls_pred_scores,
+                 flatten_center_and_stride,
+                 flatten_bboxes,
+                 gt_bboxes,
+                 gt_labels,
+                 eps=1e-7):
+        """Assign gt to priors using SimOTA.
+        TODO: add comment.
+        Returns:
+            assign_result: The assigned result.
+        """
+        num_gt = gt_bboxes.shape[0]
+        num_bboxes = flatten_bboxes.shape[0]
+
+        if num_gt == 0 or num_bboxes == 0:
+            # No ground truth or boxes
+            label = np.ones([num_bboxes], dtype=np.int64) * self.num_classes
+            label_weight = np.ones([num_bboxes], dtype=np.float32)
+            bbox_target = np.zeros_like(flatten_center_and_stride)
+            return 0, label, label_weight, bbox_target
+
+        is_in_gts_or_centers_all, is_in_gts_or_centers_all_inds, is_in_boxes_and_center = self.get_in_gt_and_in_center_info(
+            flatten_center_and_stride, gt_bboxes)
+
+        # bboxes and scores to calculate matrix
+        valid_flatten_bboxes = flatten_bboxes[is_in_gts_or_centers_all_inds]
+        valid_cls_pred_scores = flatten_cls_pred_scores[
+            is_in_gts_or_centers_all_inds]
+        num_valid_bboxes = valid_flatten_bboxes.shape[0]
+
+        pairwise_ious = batch_bbox_overlaps(valid_flatten_bboxes,
+                                            gt_bboxes)  # [num_points,num_gts]
+        if self.use_vfl:
+            gt_vfl_labels = gt_labels.squeeze(-1).unsqueeze(0).tile(
+                [num_valid_bboxes, 1]).reshape([-1])
+            valid_pred_scores = valid_cls_pred_scores.unsqueeze(1).tile(
+                [1, num_gt, 1]).reshape([-1, self.num_classes])
+            vfl_score = np.zeros(valid_pred_scores.shape)
+            vfl_score[np.arange(0, vfl_score.shape[0]), gt_vfl_labels.numpy(
+            )] = pairwise_ious.reshape([-1])
+            vfl_score = paddle.to_tensor(vfl_score)
+            losses_vfl = varifocal_loss(
+                valid_pred_scores, vfl_score,
+                use_sigmoid=False).reshape([num_valid_bboxes, num_gt])
+            losses_giou = batch_bbox_overlaps(
+                valid_flatten_bboxes, gt_bboxes, mode='giou')
+            cost_matrix = (
+                losses_vfl * self.cls_weight + losses_giou * self.iou_weight +
+                paddle.logical_not(is_in_boxes_and_center).cast('float32') *
+                100000000)
+        else:
+            iou_cost = -paddle.log(pairwise_ious + eps)
+            gt_onehot_label = (F.one_hot(
+                gt_labels.squeeze(-1).cast(paddle.int64),
+                flatten_cls_pred_scores.shape[-1]).cast('float32').unsqueeze(0)
+                               .tile([num_valid_bboxes, 1, 1]))
+
+            valid_pred_scores = valid_cls_pred_scores.unsqueeze(1).tile(
+                [1, num_gt, 1])
+            cls_cost = F.binary_cross_entropy(
+                valid_pred_scores, gt_onehot_label, reduction='none').sum(-1)
+
+            cost_matrix = (
+                cls_cost * self.cls_weight + iou_cost * self.iou_weight +
+                paddle.logical_not(is_in_boxes_and_center).cast('float32') *
+                100000000)
+
+        match_gt_inds_to_fg, match_fg_mask_inmatrix = \
+            self.dynamic_k_matching(
+                cost_matrix, pairwise_ious, num_gt)
+
+        # sample and assign results
+        assigned_gt_inds = np.zeros([num_bboxes], dtype=np.int64)
+        match_fg_mask_inall = np.zeros_like(assigned_gt_inds)
+        match_fg_mask_inall[is_in_gts_or_centers_all.numpy(
+        )] = match_fg_mask_inmatrix
+
+        assigned_gt_inds[match_fg_mask_inall.astype(
+            np.bool_)] = match_gt_inds_to_fg + 1
+
+        pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds \
+            = self.get_sample(assigned_gt_inds, gt_bboxes.numpy())
+
+        bbox_target = np.zeros(flatten_bboxes.shape, paddle.common_ops_import.convert_dtype(flatten_bboxes.dtype))
+        bbox_weight = np.zeros_like(bbox_target)
+        label = np.ones([num_bboxes], dtype=np.int64) * self.num_classes
+        label_weight = np.zeros([num_bboxes], dtype=np.float32)
+
+        if len(pos_inds) > 0:
+            gt_labels = gt_labels.numpy()
+            pos_bbox_targets = pos_gt_bboxes
+            bbox_target[pos_inds, :] = pos_bbox_targets
+            bbox_weight[pos_inds, :] = 1.0
+            if not np.any(gt_labels):
+                label[pos_inds] = 0
+            else:
+                label[pos_inds] = gt_labels.squeeze(-1)[pos_assigned_gt_inds]
+
+            label_weight[pos_inds] = 1.0
+        if len(neg_inds) > 0:
+            label_weight[neg_inds] = 1.0
+
+        pos_num = max(pos_inds.size, 1)
+
+        return pos_num, label, label_weight, bbox_target
--- a/paddle_detection/ppdet/modeling/assigners/task_aligned_assigner.py
+++ b/paddle_detection/ppdet/modeling/assigners/task_aligned_assigner.py
@@ -0,0 +1,193 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from ppdet.core.workspace import register
+from ..bbox_utils import batch_iou_similarity
+from .utils import (gather_topk_anchors, check_points_inside_bboxes,
+                    compute_max_iou_anchor)
+
+__all__ = ['TaskAlignedAssigner']
+
+
+def is_close_gt(anchor, gt, stride_lst, max_dist=2.0, alpha=2.):
+    """Calculate distance ratio of box1 and box2 in batch for larger stride
+        anchors dist/stride to promote the survive of large distance match
+    Args:
+        anchor (Tensor): box with the shape [L, 2]
+        gt (Tensor): box with the shape [N, M2, 4]
+    Return:
+        dist (Tensor): dist ratio between box1 and box2 with the shape [N, M1, M2]
+    """
+    center1 = anchor.unsqueeze(0)
+    center2 = (gt[..., :2] + gt[..., -2:]) / 2.
+    center1 = center1.unsqueeze(1)  # [N, M1, 2] -> [N, 1, M1, 2]
+    center2 = center2.unsqueeze(2)  # [N, M2, 2] -> [N, M2, 1, 2]
+
+    stride = paddle.concat([
+        paddle.full([x], 32 / pow(2, idx)) for idx, x in enumerate(stride_lst)
+    ]).unsqueeze(0).unsqueeze(0)
+    dist = paddle.linalg.norm(center1 - center2, p=2, axis=-1) / stride
+    dist_ratio = dist
+    dist_ratio[dist < max_dist] = 1.
+    dist_ratio[dist >= max_dist] = 0.
+    return dist_ratio
+
+
+@register
+class TaskAlignedAssigner(nn.Layer):
+    """TOOD: Task-aligned One-stage Object Detection
+    """
+
+    def __init__(self,
+                 topk=13,
+                 alpha=1.0,
+                 beta=6.0,
+                 eps=1e-9,
+                 is_close_gt=False):
+        super(TaskAlignedAssigner, self).__init__()
+        self.topk = topk
+        self.alpha = alpha
+        self.beta = beta
+        self.eps = eps
+        self.is_close_gt = is_close_gt
+
+    @paddle.no_grad()
+    def forward(self,
+                pred_scores,
+                pred_bboxes,
+                anchor_points,
+                num_anchors_list,
+                gt_labels,
+                gt_bboxes,
+                pad_gt_mask,
+                bg_index,
+                gt_scores=None):
+        r"""This code is based on
+            https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py
+
+        The assignment is done in following steps
+        1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt
+        2. select top-k bbox as candidates for each gt
+        3. limit the positive sample's center in gt (because the anchor-free detector
+           only can predict positive distance)
+        4. if an anchor box is assigned to multiple gts, the one with the
+           highest iou will be selected.
+        Args:
+            pred_scores (Tensor, float32): predicted class probability, shape(B, L, C)
+            pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 4)
+            anchor_points (Tensor, float32): pre-defined anchors, shape(L, 2), "cxcy" format
+            num_anchors_list (List): num of anchors in each level, shape(L)
+            gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
+            gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
+            pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
+            bg_index (int): background index
+            gt_scores (Tensor|None, float32) Score of gt_bboxes, shape(B, n, 1)
+        Returns:
+            assigned_labels (Tensor): (B, L)
+            assigned_bboxes (Tensor): (B, L, 4)
+            assigned_scores (Tensor): (B, L, C)
+        """
+        assert pred_scores.ndim == pred_bboxes.ndim
+        assert gt_labels.ndim == gt_bboxes.ndim and \
+               gt_bboxes.ndim == 3
+
+        batch_size, num_anchors, num_classes = pred_scores.shape
+        _, num_max_boxes, _ = gt_bboxes.shape
+
+        # negative batch
+        if num_max_boxes == 0:
+            assigned_labels = paddle.full(
+                [batch_size, num_anchors], bg_index, dtype='int32')
+            assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
+            assigned_scores = paddle.zeros(
+                [batch_size, num_anchors, num_classes])
+            return assigned_labels, assigned_bboxes, assigned_scores
+
+        # compute iou between gt and pred bbox, [B, n, L]
+        ious = batch_iou_similarity(gt_bboxes, pred_bboxes)
+        # gather pred bboxes class score
+        pred_scores = pred_scores.transpose([0, 2, 1])
+        batch_ind = paddle.arange(
+            end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
+        gt_labels_ind = paddle.stack(
+            [batch_ind.tile([1, num_max_boxes]), gt_labels.squeeze(-1)],
+            axis=-1)
+        bbox_cls_scores = paddle.gather_nd(pred_scores, gt_labels_ind)
+        # compute alignment metrics, [B, n, L]
+        alignment_metrics = bbox_cls_scores.pow(self.alpha) * ious.pow(
+            self.beta)
+
+        # check the positive sample's center in gt, [B, n, L]
+        if self.is_close_gt:
+            is_in_gts = is_close_gt(anchor_points, gt_bboxes, num_anchors_list)
+        else:
+            is_in_gts = check_points_inside_bboxes(anchor_points, gt_bboxes)
+
+        # select topk largest alignment metrics pred bbox as candidates
+        # for each gt, [B, n, L]
+        is_in_topk = gather_topk_anchors(
+            alignment_metrics * is_in_gts, self.topk, topk_mask=pad_gt_mask)
+
+        # select positive sample, [B, n, L]
+        mask_positive = is_in_topk * is_in_gts * pad_gt_mask
+
+        # if an anchor box is assigned to multiple gts,
+        # the one with the highest iou will be selected, [B, n, L]
+        mask_positive_sum = mask_positive.sum(axis=-2)
+        if mask_positive_sum.max() > 1:
+            mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile(
+                [1, num_max_boxes, 1])
+            is_max_iou = compute_max_iou_anchor(ious)
+            mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
+                                         mask_positive)
+            mask_positive_sum = mask_positive.sum(axis=-2)
+        assigned_gt_index = mask_positive.argmax(axis=-2)
+
+        # assigned target
+        assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
+        assigned_labels = paddle.gather(
+            gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
+        assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
+        assigned_labels = paddle.where(
+            mask_positive_sum > 0, assigned_labels,
+            paddle.full_like(assigned_labels, bg_index))
+
+        assigned_bboxes = paddle.gather(
+            gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
+        assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
+
+        assigned_scores = F.one_hot(assigned_labels, num_classes + 1)
+        ind = list(range(num_classes + 1))
+        ind.remove(bg_index)
+        assigned_scores = paddle.index_select(
+            assigned_scores, paddle.to_tensor(ind), axis=-1)
+        # rescale alignment metrics
+        alignment_metrics *= mask_positive
+        max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True)
+        max_ious_per_instance = (ious * mask_positive).max(axis=-1,
+                                                           keepdim=True)
+        alignment_metrics = alignment_metrics / (
+            max_metrics_per_instance + self.eps) * max_ious_per_instance
+        alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1)
+        assigned_scores = assigned_scores * alignment_metrics
+
+        return assigned_labels, assigned_bboxes, assigned_scores
--- a/paddle_detection/ppdet/modeling/assigners/task_aligned_assigner_cr.py
+++ b/paddle_detection/ppdet/modeling/assigners/task_aligned_assigner_cr.py
@@ -0,0 +1,181 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from ppdet.core.workspace import register
+from ..bbox_utils import batch_iou_similarity
+from .utils import (gather_topk_anchors, check_points_inside_bboxes,
+                    compute_max_iou_anchor)
+
+__all__ = ['TaskAlignedAssigner_CR']
+
+
+@register
+class TaskAlignedAssigner_CR(nn.Layer):
+    """TOOD: Task-aligned One-stage Object Detection with Center R
+    """
+
+    def __init__(self,
+                 topk=13,
+                 alpha=1.0,
+                 beta=6.0,
+                 center_radius=None,
+                 eps=1e-9):
+        super(TaskAlignedAssigner_CR, self).__init__()
+        self.topk = topk
+        self.alpha = alpha
+        self.beta = beta
+        self.center_radius = center_radius
+        self.eps = eps
+
+    @paddle.no_grad()
+    def forward(self,
+                pred_scores,
+                pred_bboxes,
+                anchor_points,
+                stride_tensor,
+                gt_labels,
+                gt_bboxes,
+                pad_gt_mask,
+                bg_index,
+                gt_scores=None):
+        r"""This code is based on
+            https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py
+
+        The assignment is done in following steps
+        1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt
+        2. select top-k bbox as candidates for each gt
+        3. limit the positive sample's center in gt (because the anchor-free detector
+           only can predict positive distance)
+        4. if an anchor box is assigned to multiple gts, the one with the
+           highest iou will be selected.
+        Args:
+            pred_scores (Tensor, float32): predicted class probability, shape(B, L, C)
+            pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 4)
+            anchor_points (Tensor, float32): pre-defined anchors, shape(L, 2), "cxcy" format
+            stride_tensor (Tensor, float32): stride of feature map, shape(L, 1)
+            gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
+            gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
+            pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
+            bg_index (int): background index
+            gt_scores (Tensor|None, float32) Score of gt_bboxes, shape(B, n, 1)
+        Returns:
+            assigned_labels (Tensor): (B, L)
+            assigned_bboxes (Tensor): (B, L, 4)
+            assigned_scores (Tensor): (B, L, C)
+        """
+        assert pred_scores.ndim == pred_bboxes.ndim
+        assert gt_labels.ndim == gt_bboxes.ndim and \
+               gt_bboxes.ndim == 3
+
+        batch_size, num_anchors, num_classes = pred_scores.shape
+        _, num_max_boxes, _ = gt_bboxes.shape
+
+        # negative batch
+        if num_max_boxes == 0:
+            assigned_labels = paddle.full(
+                [batch_size, num_anchors], bg_index, dtype='int32')
+            assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
+            assigned_scores = paddle.zeros(
+                [batch_size, num_anchors, num_classes])
+            return assigned_labels, assigned_bboxes, assigned_scores
+
+        # compute iou between gt and pred bbox, [B, n, L]
+        ious = batch_iou_similarity(gt_bboxes, pred_bboxes)
+        # gather pred bboxes class score
+        pred_scores = pred_scores.transpose([0, 2, 1])
+        batch_ind = paddle.arange(
+            end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
+        gt_labels_ind = paddle.stack(
+            [batch_ind.tile([1, num_max_boxes]), gt_labels.squeeze(-1)],
+            axis=-1)
+        bbox_cls_scores = paddle.gather_nd(pred_scores, gt_labels_ind)
+        # compute alignment metrics, [B, n, L]
+        alignment_metrics = bbox_cls_scores.pow(self.alpha) * ious.pow(
+            self.beta) * pad_gt_mask
+
+        # select positive sample, [B, n, L]
+        if self.center_radius is None:
+            # check the positive sample's center in gt, [B, n, L]
+            is_in_gts = check_points_inside_bboxes(
+                anchor_points, gt_bboxes, sm_use=True)
+            # select topk largest alignment metrics pred bbox as candidates
+            # for each gt, [B, n, L]
+            mask_positive = gather_topk_anchors(
+                alignment_metrics, self.topk, topk_mask=pad_gt_mask) * is_in_gts
+        else:
+            is_in_gts, is_in_center = check_points_inside_bboxes(
+                anchor_points,
+                gt_bboxes,
+                stride_tensor * self.center_radius,
+                sm_use=True)
+            is_in_gts *= pad_gt_mask
+            is_in_center *= pad_gt_mask
+            candidate_metrics = paddle.where(
+                is_in_gts.sum(-1, keepdim=True) == 0,
+                alignment_metrics + is_in_center,
+                alignment_metrics)
+            mask_positive = gather_topk_anchors(
+                candidate_metrics, self.topk,
+                topk_mask=pad_gt_mask) * paddle.cast((is_in_center > 0) |
+                                                     (is_in_gts > 0), 'float32')
+
+        # if an anchor box is assigned to multiple gts,
+        # the one with the highest iou will be selected, [B, n, L]
+        mask_positive_sum = mask_positive.sum(axis=-2)
+        if mask_positive_sum.max() > 1:
+            mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile(
+                [1, num_max_boxes, 1])
+            is_max_iou = compute_max_iou_anchor(ious * mask_positive)
+            mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
+                                         mask_positive)
+            mask_positive_sum = mask_positive.sum(axis=-2)
+        assigned_gt_index = mask_positive.argmax(axis=-2)
+
+        # assigned target
+        assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
+        assigned_labels = paddle.gather(
+            gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
+        assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
+        assigned_labels = paddle.where(
+            mask_positive_sum > 0, assigned_labels,
+            paddle.full_like(assigned_labels, bg_index))
+
+        assigned_bboxes = paddle.gather(
+            gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
+        assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
+
+        assigned_scores = F.one_hot(assigned_labels, num_classes + 1)
+        ind = list(range(num_classes + 1))
+        ind.remove(bg_index)
+        assigned_scores = paddle.index_select(
+            assigned_scores, paddle.to_tensor(ind), axis=-1)
+        # rescale alignment metrics
+        alignment_metrics *= mask_positive
+        max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True)
+        max_ious_per_instance = (ious * mask_positive).max(axis=-1,
+                                                           keepdim=True)
+        alignment_metrics = alignment_metrics / (
+            max_metrics_per_instance + self.eps) * max_ious_per_instance
+        alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1)
+        assigned_scores = assigned_scores * alignment_metrics
+
+        return assigned_labels, assigned_bboxes, assigned_scores
--- a/paddle_detection/ppdet/modeling/assigners/uniform_assigner.py
+++ b/paddle_detection/ppdet/modeling/assigners/uniform_assigner.py
@@ -0,0 +1,93 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from ppdet.core.workspace import register
+
+from ppdet.modeling.bbox_utils import batch_bbox_overlaps
+from ppdet.modeling.transformers import bbox_xyxy_to_cxcywh
+
+__all__ = ['UniformAssigner']
+
+
+def batch_p_dist(x, y, p=2):
+    """
+    calculate pairwise p_dist, the first index of x and y are batch
+    return [x.shape[0], y.shape[0]]
+    """
+    x = x.unsqueeze(1)
+    diff = x - y
+    return paddle.norm(diff, p=p, axis=list(range(2, diff.dim())))
+
+
+@register
+class UniformAssigner(nn.Layer):
+    def __init__(self, pos_ignore_thr, neg_ignore_thr, match_times=4):
+        super(UniformAssigner, self).__init__()
+        self.pos_ignore_thr = pos_ignore_thr
+        self.neg_ignore_thr = neg_ignore_thr
+        self.match_times = match_times
+
+    def forward(self, bbox_pred, anchor, gt_bboxes, gt_labels=None):
+        num_bboxes = bbox_pred.shape[0]
+        num_gts = gt_bboxes.shape[0]
+        match_labels = paddle.full([num_bboxes], -1, dtype=paddle.int32)
+
+        pred_ious = batch_bbox_overlaps(bbox_pred, gt_bboxes)
+        pred_max_iou = pred_ious.max(axis=1)
+        neg_ignore = pred_max_iou > self.neg_ignore_thr
+        # exclude potential ignored neg samples first, deal with pos samples later
+        #match_labels: -2(ignore), -1(neg) or >=0(pos_inds)
+        match_labels = paddle.where(neg_ignore,
+                                    paddle.full_like(match_labels, -2),
+                                    match_labels)
+
+        bbox_pred_c = bbox_xyxy_to_cxcywh(bbox_pred)
+        anchor_c = bbox_xyxy_to_cxcywh(anchor)
+        gt_bboxes_c = bbox_xyxy_to_cxcywh(gt_bboxes)
+        bbox_pred_dist = batch_p_dist(bbox_pred_c, gt_bboxes_c, p=1)
+        anchor_dist = batch_p_dist(anchor_c, gt_bboxes_c, p=1)
+
+        top_pred = bbox_pred_dist.topk(
+            k=self.match_times, axis=0, largest=False)[1]
+        top_anchor = anchor_dist.topk(
+            k=self.match_times, axis=0, largest=False)[1]
+
+        tar_pred = paddle.arange(num_gts).expand([self.match_times, num_gts])
+        tar_anchor = paddle.arange(num_gts).expand([self.match_times, num_gts])
+        pos_places = paddle.concat([top_pred, top_anchor]).reshape([-1])
+        pos_inds = paddle.concat([tar_pred, tar_anchor]).reshape([-1])
+
+        pos_anchor = anchor[pos_places]
+        pos_tar_bbox = gt_bboxes[pos_inds]
+        pos_ious = batch_bbox_overlaps(
+            pos_anchor, pos_tar_bbox, is_aligned=True)
+        pos_ignore = pos_ious < self.pos_ignore_thr
+        pos_inds = paddle.where(pos_ignore,
+                                paddle.full_like(pos_inds, -2), pos_inds)
+        match_labels[pos_places] = pos_inds
+        match_labels.stop_gradient = True
+        pos_keep = ~pos_ignore
+
+        if pos_keep.sum() > 0:
+            pos_places_keep = pos_places[pos_keep]
+            pos_bbox_pred = bbox_pred[pos_places_keep].reshape([-1, 4])
+            pos_bbox_tar = pos_tar_bbox[pos_keep].reshape([-1, 4]).detach()
+        else:
+            pos_bbox_pred = None
+            pos_bbox_tar = None
+
+        return match_labels, pos_bbox_pred, pos_bbox_tar
--- a/paddle_detection/ppdet/modeling/assigners/utils.py
+++ b/paddle_detection/ppdet/modeling/assigners/utils.py
@@ -0,0 +1,230 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn.functional as F
+
+__all__ = [
+    'pad_gt', 'gather_topk_anchors', 'check_points_inside_bboxes',
+    'compute_max_iou_anchor', 'compute_max_iou_gt',
+    'generate_anchors_for_grid_cell'
+]
+
+
+def pad_gt(gt_labels, gt_bboxes, gt_scores=None):
+    r""" Pad 0 in gt_labels and gt_bboxes.
+    Args:
+        gt_labels (Tensor|List[Tensor], int64): Label of gt_bboxes,
+            shape is [B, n, 1] or [[n_1, 1], [n_2, 1], ...], here n = sum(n_i)
+        gt_bboxes (Tensor|List[Tensor], float32): Ground truth bboxes,
+            shape is [B, n, 4] or [[n_1, 4], [n_2, 4], ...], here n = sum(n_i)
+        gt_scores (Tensor|List[Tensor]|None, float32): Score of gt_bboxes,
+            shape is [B, n, 1] or [[n_1, 4], [n_2, 4], ...], here n = sum(n_i)
+    Returns:
+        pad_gt_labels (Tensor, int64): shape[B, n, 1]
+        pad_gt_bboxes (Tensor, float32): shape[B, n, 4]
+        pad_gt_scores (Tensor, float32): shape[B, n, 1]
+        pad_gt_mask (Tensor, float32): shape[B, n, 1], 1 means bbox, 0 means no bbox
+    """
+    if isinstance(gt_labels, paddle.Tensor) and isinstance(gt_bboxes,
+                                                           paddle.Tensor):
+        assert gt_labels.ndim == gt_bboxes.ndim and \
+               gt_bboxes.ndim == 3
+        pad_gt_mask = (
+            gt_bboxes.sum(axis=-1, keepdim=True) > 0).astype(gt_bboxes.dtype)
+        if gt_scores is None:
+            gt_scores = pad_gt_mask.clone()
+        assert gt_labels.ndim == gt_scores.ndim
+
+        return gt_labels, gt_bboxes, gt_scores, pad_gt_mask
+    elif isinstance(gt_labels, list) and isinstance(gt_bboxes, list):
+        assert len(gt_labels) == len(gt_bboxes), \
+            'The number of `gt_labels` and `gt_bboxes` is not equal. '
+        num_max_boxes = max([len(a) for a in gt_bboxes])
+        batch_size = len(gt_bboxes)
+        # pad label and bbox
+        pad_gt_labels = paddle.zeros(
+            [batch_size, num_max_boxes, 1], dtype=gt_labels[0].dtype)
+        pad_gt_bboxes = paddle.zeros(
+            [batch_size, num_max_boxes, 4], dtype=gt_bboxes[0].dtype)
+        pad_gt_scores = paddle.zeros(
+            [batch_size, num_max_boxes, 1], dtype=gt_bboxes[0].dtype)
+        pad_gt_mask = paddle.zeros(
+            [batch_size, num_max_boxes, 1], dtype=gt_bboxes[0].dtype)
+        for i, (label, bbox) in enumerate(zip(gt_labels, gt_bboxes)):
+            if len(label) > 0 and len(bbox) > 0:
+                pad_gt_labels[i, :len(label)] = label
+                pad_gt_bboxes[i, :len(bbox)] = bbox
+                pad_gt_mask[i, :len(bbox)] = 1.
+                if gt_scores is not None:
+                    pad_gt_scores[i, :len(gt_scores[i])] = gt_scores[i]
+        if gt_scores is None:
+            pad_gt_scores = pad_gt_mask.clone()
+        return pad_gt_labels, pad_gt_bboxes, pad_gt_scores, pad_gt_mask
+    else:
+        raise ValueError('The input `gt_labels` or `gt_bboxes` is invalid! ')
+
+
+def gather_topk_anchors(metrics, topk, largest=True, topk_mask=None, eps=1e-9):
+    r"""
+    Args:
+        metrics (Tensor, float32): shape[B, n, L], n: num_gts, L: num_anchors
+        topk (int): The number of top elements to look for along the axis.
+        largest (bool) : largest is a flag, if set to true,
+            algorithm will sort by descending order, otherwise sort by
+            ascending order. Default: True
+        topk_mask (Tensor, float32): shape[B, n, 1], ignore bbox mask,
+            Default: None
+        eps (float): Default: 1e-9
+    Returns:
+        is_in_topk (Tensor, float32): shape[B, n, L], value=1. means selected
+    """
+    num_anchors = metrics.shape[-1]
+    topk_metrics, topk_idxs = paddle.topk(
+        metrics, topk, axis=-1, largest=largest)
+    if topk_mask is None:
+        topk_mask = (
+            topk_metrics.max(axis=-1, keepdim=True) > eps).astype(metrics.dtype)
+    is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(
+        axis=-2).astype(metrics.dtype)
+    return is_in_topk * topk_mask
+
+
+def check_points_inside_bboxes(points,
+                               bboxes,
+                               center_radius_tensor=None,
+                               eps=1e-9,
+                               sm_use=False):
+    r"""
+    Args:
+        points (Tensor, float32): shape[L, 2], "xy" format, L: num_anchors
+        bboxes (Tensor, float32): shape[B, n, 4], "xmin, ymin, xmax, ymax" format
+        center_radius_tensor (Tensor, float32): shape [L, 1]. Default: None.
+        eps (float): Default: 1e-9
+    Returns:
+        is_in_bboxes (Tensor, float32): shape[B, n, L], value=1. means selected
+    """
+    points = points.unsqueeze([0, 1])
+    x, y = points.chunk(2, axis=-1)
+    xmin, ymin, xmax, ymax = bboxes.unsqueeze(2).chunk(4, axis=-1)
+    # check whether `points` is in `bboxes`
+    l = x - xmin
+    t = y - ymin
+    r = xmax - x
+    b = ymax - y
+    delta_ltrb = paddle.concat([l, t, r, b], axis=-1)
+    is_in_bboxes = (delta_ltrb.min(axis=-1) > eps)
+    if center_radius_tensor is not None:
+        # check whether `points` is in `center_radius`
+        center_radius_tensor = center_radius_tensor.unsqueeze([0, 1])
+        cx = (xmin + xmax) * 0.5
+        cy = (ymin + ymax) * 0.5
+        l = x - (cx - center_radius_tensor)
+        t = y - (cy - center_radius_tensor)
+        r = (cx + center_radius_tensor) - x
+        b = (cy + center_radius_tensor) - y
+        delta_ltrb_c = paddle.concat([l, t, r, b], axis=-1)
+        is_in_center = (delta_ltrb_c.min(axis=-1) > eps)
+        if sm_use:
+            return is_in_bboxes.astype(bboxes.dtype), is_in_center.astype(
+                bboxes.dtype)
+        else:
+            return (paddle.logical_and(is_in_bboxes, is_in_center),
+                    paddle.logical_or(is_in_bboxes, is_in_center))
+
+    return is_in_bboxes.astype(bboxes.dtype)
+
+
+def compute_max_iou_anchor(ious):
+    r"""
+    For each anchor, find the GT with the largest IOU.
+    Args:
+        ious (Tensor, float32): shape[B, n, L], n: num_gts, L: num_anchors
+    Returns:
+        is_max_iou (Tensor, float32): shape[B, n, L], value=1. means selected
+    """
+    num_max_boxes = ious.shape[-2]
+    max_iou_index = ious.argmax(axis=-2)
+    is_max_iou = F.one_hot(max_iou_index, num_max_boxes).transpose([0, 2, 1])
+    return is_max_iou.astype(ious.dtype)
+
+
+def compute_max_iou_gt(ious):
+    r"""
+    For each GT, find the anchor with the largest IOU.
+    Args:
+        ious (Tensor, float32): shape[B, n, L], n: num_gts, L: num_anchors
+    Returns:
+        is_max_iou (Tensor, float32): shape[B, n, L], value=1. means selected
+    """
+    num_anchors = ious.shape[-1]
+    max_iou_index = ious.argmax(axis=-1)
+    is_max_iou = F.one_hot(max_iou_index, num_anchors)
+    return is_max_iou.astype(ious.dtype)
+
+
+def generate_anchors_for_grid_cell(feats,
+                                   fpn_strides,
+                                   grid_cell_size=5.0,
+                                   grid_cell_offset=0.5,
+                                   dtype='float32'):
+    r"""
+    Like ATSS, generate anchors based on grid size.
+    Args:
+        feats (List[Tensor]): shape[s, (b, c, h, w)]
+        fpn_strides (tuple|list): shape[s], stride for each scale feature
+        grid_cell_size (float): anchor size
+        grid_cell_offset (float): The range is between 0 and 1.
+    Returns:
+        anchors (Tensor): shape[l, 4], "xmin, ymin, xmax, ymax" format.
+        anchor_points (Tensor): shape[l, 2], "x, y" format.
+        num_anchors_list (List[int]): shape[s], contains [s_1, s_2, ...].
+        stride_tensor (Tensor): shape[l, 1], contains the stride for each scale.
+    """
+    assert len(feats) == len(fpn_strides)
+    anchors = []
+    anchor_points = []
+    num_anchors_list = []
+    stride_tensor = []
+    for feat, stride in zip(feats, fpn_strides):
+        _, _, h, w = feat.shape
+        cell_half_size = grid_cell_size * stride * 0.5
+        shift_x = (paddle.arange(end=w) + grid_cell_offset) * stride
+        shift_y = (paddle.arange(end=h) + grid_cell_offset) * stride
+        shift_y, shift_x = paddle.meshgrid(shift_y, shift_x)
+        anchor = paddle.stack(
+            [
+                shift_x - cell_half_size, shift_y - cell_half_size,
+                shift_x + cell_half_size, shift_y + cell_half_size
+            ],
+            axis=-1).astype(dtype)
+        anchor_point = paddle.stack([shift_x, shift_y], axis=-1).astype(dtype)
+
+        anchors.append(anchor.reshape([-1, 4]))
+        anchor_points.append(anchor_point.reshape([-1, 2]))
+        num_anchors_list.append(len(anchors[-1]))
+        stride_tensor.append(
+            paddle.full(
+                [num_anchors_list[-1], 1], stride, dtype=dtype))
+    anchors = paddle.concat(anchors)
+    anchors.stop_gradient = True
+    anchor_points = paddle.concat(anchor_points)
+    anchor_points.stop_gradient = True
+    stride_tensor = paddle.concat(stride_tensor)
+    stride_tensor.stop_gradient = True
+    return anchors, anchor_points, num_anchors_list, stride_tensor