更换文档检测模型

This commit is contained in:
2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions

View File

@@ -0,0 +1,35 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import utils
from . import task_aligned_assigner
from . import atss_assigner
from . import simota_assigner
from . import max_iou_assigner
from . import fcosr_assigner
from . import rotated_task_aligned_assigner
from . import task_aligned_assigner_cr
from . import uniform_assigner
from .utils import *
from .task_aligned_assigner import *
from .atss_assigner import *
from .simota_assigner import *
from .max_iou_assigner import *
from .fcosr_assigner import *
from .rotated_task_aligned_assigner import *
from .task_aligned_assigner_cr import *
from .uniform_assigner import *
from .hungarian_assigner import *
from .pose_utils import *

View File

@@ -0,0 +1,225 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register
from ..bbox_utils import iou_similarity, batch_iou_similarity
from ..bbox_utils import bbox_center
from .utils import (check_points_inside_bboxes, compute_max_iou_anchor,
compute_max_iou_gt)
__all__ = ['ATSSAssigner']
@register
class ATSSAssigner(nn.Layer):
"""Bridging the Gap Between Anchor-based and Anchor-free Detection
via Adaptive Training Sample Selection
"""
__shared__ = ['num_classes']
def __init__(self,
topk=9,
num_classes=80,
force_gt_matching=False,
eps=1e-9,
sm_use=False):
super(ATSSAssigner, self).__init__()
self.topk = topk
self.num_classes = num_classes
self.force_gt_matching = force_gt_matching
self.eps = eps
self.sm_use = sm_use
def _gather_topk_pyramid(self, gt2anchor_distances, num_anchors_list,
pad_gt_mask):
gt2anchor_distances_list = paddle.split(
gt2anchor_distances, num_anchors_list, axis=-1)
num_anchors_index = np.cumsum(num_anchors_list).tolist()
num_anchors_index = [0, ] + num_anchors_index[:-1]
is_in_topk_list = []
topk_idxs_list = []
for distances, anchors_index in zip(gt2anchor_distances_list,
num_anchors_index):
num_anchors = distances.shape[-1]
_, topk_idxs = paddle.topk(
distances, self.topk, axis=-1, largest=False)
topk_idxs_list.append(topk_idxs + anchors_index)
is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(
axis=-2).astype(gt2anchor_distances.dtype)
is_in_topk_list.append(is_in_topk * pad_gt_mask)
is_in_topk_list = paddle.concat(is_in_topk_list, axis=-1)
topk_idxs_list = paddle.concat(topk_idxs_list, axis=-1)
return is_in_topk_list, topk_idxs_list
@paddle.no_grad()
def forward(self,
anchor_bboxes,
num_anchors_list,
gt_labels,
gt_bboxes,
pad_gt_mask,
bg_index,
gt_scores=None,
pred_bboxes=None):
r"""This code is based on
https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/atss_assigner.py
The assignment is done in following steps
1. compute iou between all bbox (bbox of all pyramid levels) and gt
2. compute center distance between all bbox and gt
3. on each pyramid level, for each gt, select k bbox whose center
are closest to the gt center, so we total select k*l bbox as
candidates for each gt
4. get corresponding iou for the these candidates, and compute the
mean and std, set mean + std as the iou threshold
5. select these candidates whose iou are greater than or equal to
the threshold as positive
6. limit the positive sample's center in gt
7. if an anchor box is assigned to multiple gts, the one with the
highest iou will be selected.
Args:
anchor_bboxes (Tensor, float32): pre-defined anchors, shape(L, 4),
"xmin, xmax, ymin, ymax" format
num_anchors_list (List): num of anchors in each level
gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
bg_index (int): background index
gt_scores (Tensor|None, float32) Score of gt_bboxes,
shape(B, n, 1), if None, then it will initialize with one_hot label
pred_bboxes (Tensor, float32, optional): predicted bounding boxes, shape(B, L, 4)
Returns:
assigned_labels (Tensor): (B, L)
assigned_bboxes (Tensor): (B, L, 4)
assigned_scores (Tensor): (B, L, C), if pred_bboxes is not None, then output ious
"""
assert gt_labels.ndim == gt_bboxes.ndim and \
gt_bboxes.ndim == 3
num_anchors, _ = anchor_bboxes.shape
batch_size, num_max_boxes, _ = gt_bboxes.shape
# negative batch
if num_max_boxes == 0:
assigned_labels = paddle.full(
[batch_size, num_anchors], bg_index, dtype='int32')
assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
assigned_scores = paddle.zeros(
[batch_size, num_anchors, self.num_classes])
return assigned_labels, assigned_bboxes, assigned_scores
# 1. compute iou between gt and anchor bbox, [B, n, L]
ious = iou_similarity(gt_bboxes.reshape([-1, 4]), anchor_bboxes)
ious = ious.reshape([batch_size, -1, num_anchors])
# 2. compute center distance between all anchors and gt, [B, n, L]
gt_centers = bbox_center(gt_bboxes.reshape([-1, 4])).unsqueeze(1)
anchor_centers = bbox_center(anchor_bboxes)
gt2anchor_distances = (gt_centers - anchor_centers.unsqueeze(0)) \
.norm(2, axis=-1).reshape([batch_size, -1, num_anchors])
# 3. on each pyramid level, selecting topk closest candidates
# based on the center distance, [B, n, L]
is_in_topk, topk_idxs = self._gather_topk_pyramid(
gt2anchor_distances, num_anchors_list, pad_gt_mask)
# 4. get corresponding iou for the these candidates, and compute the
# mean and std, 5. set mean + std as the iou threshold
iou_candidates = ious * is_in_topk
iou_threshold = paddle.index_sample(
iou_candidates.flatten(stop_axis=-2),
topk_idxs.flatten(stop_axis=-2))
iou_threshold = iou_threshold.reshape([batch_size, num_max_boxes, -1])
iou_threshold = iou_threshold.mean(axis=-1, keepdim=True) + \
iou_threshold.std(axis=-1, keepdim=True)
is_in_topk = paddle.where(iou_candidates > iou_threshold, is_in_topk,
paddle.zeros_like(is_in_topk))
# 6. check the positive sample's center in gt, [B, n, L]
if self.sm_use:
is_in_gts = check_points_inside_bboxes(
anchor_centers, gt_bboxes, sm_use=True)
else:
is_in_gts = check_points_inside_bboxes(anchor_centers, gt_bboxes)
# select positive sample, [B, n, L]
mask_positive = is_in_topk * is_in_gts * pad_gt_mask
# 7. if an anchor box is assigned to multiple gts,
# the one with the highest iou will be selected.
mask_positive_sum = mask_positive.sum(axis=-2)
if mask_positive_sum.max() > 1:
mask_multiple_gts = (
mask_positive_sum.unsqueeze(1) > 1).astype('int32').tile(
[1, num_max_boxes, 1]).astype('bool')
if self.sm_use:
is_max_iou = compute_max_iou_anchor(ious * mask_positive)
else:
is_max_iou = compute_max_iou_anchor(ious)
mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
mask_positive)
mask_positive_sum = mask_positive.sum(axis=-2)
# 8. make sure every gt_bbox matches the anchor
if self.force_gt_matching:
is_max_iou = compute_max_iou_gt(ious) * pad_gt_mask
mask_max_iou = (is_max_iou.sum(-2, keepdim=True) == 1).tile(
[1, num_max_boxes, 1])
mask_positive = paddle.where(mask_max_iou, is_max_iou,
mask_positive)
mask_positive_sum = mask_positive.sum(axis=-2)
assigned_gt_index = mask_positive.argmax(axis=-2)
# assigned target
batch_ind = paddle.arange(
end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
assigned_labels = paddle.gather(
gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
assigned_labels = paddle.where(
mask_positive_sum > 0, assigned_labels,
paddle.full_like(assigned_labels, bg_index))
assigned_bboxes = paddle.gather(
gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
assigned_scores = F.one_hot(assigned_labels, self.num_classes + 1)
ind = list(range(self.num_classes + 1))
ind.remove(bg_index)
assigned_scores = paddle.index_select(
assigned_scores, paddle.to_tensor(ind), axis=-1)
if pred_bboxes is not None:
# assigned iou
ious = batch_iou_similarity(gt_bboxes, pred_bboxes) * mask_positive
ious = ious.max(axis=-2).unsqueeze(-1)
assigned_scores *= ious
elif gt_scores is not None:
gather_scores = paddle.gather(
gt_scores.flatten(), assigned_gt_index.flatten(), axis=0)
gather_scores = gather_scores.reshape([batch_size, num_anchors])
gather_scores = paddle.where(mask_positive_sum > 0, gather_scores,
paddle.zeros_like(gather_scores))
assigned_scores *= gather_scores.unsqueeze(-1)
return assigned_labels, assigned_bboxes, assigned_scores

View File

@@ -0,0 +1,147 @@
import paddle
import paddle.nn.functional as F
from ppdet.modeling.losses.clrnet_line_iou_loss import line_iou
def distance_cost(predictions, targets, img_w):
"""
repeat predictions and targets to generate all combinations
use the abs distance as the new distance cost
"""
num_priors = predictions.shape[0]
num_targets = targets.shape[0]
predictions = paddle.repeat_interleave(
predictions, num_targets, axis=0)[..., 6:]
targets = paddle.concat(x=num_priors * [targets])[..., 6:]
invalid_masks = (targets < 0) | (targets >= img_w)
lengths = (~invalid_masks).sum(axis=1)
distances = paddle.abs(x=targets - predictions)
distances[invalid_masks] = 0.0
distances = distances.sum(axis=1) / (lengths.cast("float32") + 1e-09)
distances = distances.reshape([num_priors, num_targets])
return distances
def focal_cost(cls_pred, gt_labels, alpha=0.25, gamma=2, eps=1e-12):
"""
Args:
cls_pred (Tensor): Predicted classification logits, shape
[num_query, num_class].
gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
Returns:
torch.Tensor: cls_cost value
"""
cls_pred = F.sigmoid(cls_pred)
neg_cost = -(1 - cls_pred + eps).log() * (1 - alpha) * cls_pred.pow(gamma)
pos_cost = -(cls_pred + eps).log() * alpha * (1 - cls_pred).pow(gamma)
cls_cost = pos_cost.index_select(
gt_labels, axis=1) - neg_cost.index_select(
gt_labels, axis=1)
return cls_cost
def dynamic_k_assign(cost, pair_wise_ious):
"""
Assign grouth truths with priors dynamically.
Args:
cost: the assign cost.
pair_wise_ious: iou of grouth truth and priors.
Returns:
prior_idx: the index of assigned prior.
gt_idx: the corresponding ground truth index.
"""
matching_matrix = paddle.zeros_like(cost)
ious_matrix = pair_wise_ious
ious_matrix[ious_matrix < 0] = 0.0
n_candidate_k = 4
topk_ious, _ = paddle.topk(ious_matrix, n_candidate_k, axis=0)
dynamic_ks = paddle.clip(x=topk_ious.sum(0).cast("int32"), min=1)
num_gt = cost.shape[1]
for gt_idx in range(num_gt):
_, pos_idx = paddle.topk(
x=cost[:, gt_idx], k=dynamic_ks[gt_idx].item(), largest=False)
matching_matrix[pos_idx, gt_idx] = 1.0
del topk_ious, dynamic_ks, pos_idx
matched_gt = matching_matrix.sum(axis=1)
if (matched_gt > 1).sum() > 0:
matched_gt_indices = paddle.nonzero(matched_gt > 1)[:, 0]
cost_argmin = paddle.argmin(
cost.index_select(matched_gt_indices), axis=1)
matching_matrix[matched_gt_indices][0] *= 0.0
matching_matrix[matched_gt_indices, cost_argmin] = 1.0
prior_idx = matching_matrix.sum(axis=1).nonzero()
gt_idx = matching_matrix[prior_idx].argmax(axis=-1)
return prior_idx.flatten(), gt_idx.flatten()
def cdist_paddle(x1, x2, p=2):
assert x1.shape[1] == x2.shape[1]
B, M = x1.shape
# if p == np.inf:
# dist = np.max(np.abs(x1[:, np.newaxis, :] - x2[np.newaxis, :, :]), axis=-1)
if p == 1:
dist = paddle.sum(
paddle.abs(x1.unsqueeze(axis=1) - x2.unsqueeze(axis=0)), axis=-1)
else:
dist = paddle.pow(paddle.sum(paddle.pow(
paddle.abs(x1.unsqueeze(axis=1) - x2.unsqueeze(axis=0)), p),
axis=-1),
1 / p)
return dist
def assign(predictions,
targets,
img_w,
img_h,
distance_cost_weight=3.0,
cls_cost_weight=1.0):
"""
computes dynamicly matching based on the cost, including cls cost and lane similarity cost
Args:
predictions (Tensor): predictions predicted by each stage, shape: (num_priors, 78)
targets (Tensor): lane targets, shape: (num_targets, 78)
return:
matched_row_inds (Tensor): matched predictions, shape: (num_targets)
matched_col_inds (Tensor): matched targets, shape: (num_targets)
"""
predictions = predictions.detach().clone()
predictions[:, 3] *= img_w - 1
predictions[:, 6:] *= img_w - 1
targets = targets.detach().clone()
distances_score = distance_cost(predictions, targets, img_w)
distances_score = 1 - distances_score / paddle.max(x=distances_score) + 0.01
cls_score = focal_cost(predictions[:, :2], targets[:, 1].cast('int64'))
num_priors = predictions.shape[0]
num_targets = targets.shape[0]
target_start_xys = targets[:, 2:4]
target_start_xys[..., 0] *= (img_h - 1)
prediction_start_xys = predictions[:, 2:4]
prediction_start_xys[..., 0] *= (img_h - 1)
start_xys_score = cdist_paddle(
prediction_start_xys, target_start_xys,
p=2).reshape([num_priors, num_targets])
start_xys_score = 1 - start_xys_score / paddle.max(x=start_xys_score) + 0.01
target_thetas = targets[:, 4].unsqueeze(axis=-1)
theta_score = cdist_paddle(
predictions[:, 4].unsqueeze(axis=-1), target_thetas,
p=1).reshape([num_priors, num_targets]) * 180
theta_score = 1 - theta_score / paddle.max(x=theta_score) + 0.01
cost = -(distances_score * start_xys_score * theta_score
)**2 * distance_cost_weight + cls_score * cls_cost_weight
iou = line_iou(predictions[..., 6:], targets[..., 6:], img_w, aligned=False)
matched_row_inds, matched_col_inds = dynamic_k_assign(cost, iou)
return matched_row_inds, matched_col_inds

View File

@@ -0,0 +1,227 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register
from ppdet.modeling.rbox_utils import box2corners, check_points_in_polys, paddle_gather
__all__ = ['FCOSRAssigner']
EPS = 1e-9
@register
class FCOSRAssigner(nn.Layer):
""" FCOSR Assigner, refer to https://arxiv.org/abs/2111.10780 for details
1. compute normalized gaussian distribution score and refined gaussian distribution score
2. refer to ellipse center sampling, sample points whose normalized gaussian distribution score is greater than threshold
3. refer to multi-level sampling, assign ground truth to feature map which follows two conditions.
i). first, the ratio between the short edge of the target and the stride of the feature map is less than 2.
ii). second, the long edge of minimum bounding rectangle of the target is larger than the acceptance range of feature map
4. refer to fuzzy sample label assignment, the points satisfying 2 and 3 will be assigned to the ground truth according to gaussian distribution score
"""
__shared__ = ['num_classes']
def __init__(self,
num_classes=80,
factor=12,
threshold=0.23,
boundary=[[-1, 128], [128, 320], [320, 10000]],
score_type='iou'):
super(FCOSRAssigner, self).__init__()
self.num_classes = num_classes
self.factor = factor
self.threshold = threshold
self.boundary = [
paddle.to_tensor(
l, dtype=paddle.float32).reshape([1, 1, 2]) for l in boundary
]
self.score_type = score_type
def get_gaussian_distribution_score(self, points, gt_rboxes, gt_polys):
# projecting points to coordinate system defined by each rbox
# [B, N, 4, 2] -> 4 * [B, N, 1, 2]
a, b, c, d = gt_polys.split(4, axis=2)
# [1, L, 2] -> [1, 1, L, 2]
points = points.unsqueeze(0)
ab = b - a
ad = d - a
# [B, N, 5] -> [B, N, 2], [B, N, 2], [B, N, 1]
xy, wh, angle = gt_rboxes.split([2, 2, 1], axis=-1)
# [B, N, 2] -> [B, N, 1, 2]
xy = xy.unsqueeze(2)
# vector of points to center [B, N, L, 2]
vec = points - xy
# <ab, vec> = |ab| * |vec| * cos(theta) [B, N, L]
vec_dot_ab = paddle.sum(vec * ab, axis=-1)
# <ad, vec> = |ad| * |vec| * cos(theta) [B, N, L]
vec_dot_ad = paddle.sum(vec * ad, axis=-1)
# norm_ab [B, N, L]
norm_ab = paddle.sum(ab * ab, axis=-1).sqrt()
# norm_ad [B, N, L]
norm_ad = paddle.sum(ad * ad, axis=-1).sqrt()
# min(h, w), [B, N, 1]
min_edge = paddle.min(wh, axis=-1, keepdim=True)
# delta_x, delta_y [B, N, L]
delta_x = vec_dot_ab.pow(2) / (norm_ab.pow(3) * min_edge + EPS)
delta_y = vec_dot_ad.pow(2) / (norm_ad.pow(3) * min_edge + EPS)
# score [B, N, L]
norm_score = paddle.exp(-0.5 * self.factor * (delta_x + delta_y))
# simplified calculation
sigma = min_edge / self.factor
refined_score = norm_score / (2 * np.pi * sigma + EPS)
return norm_score, refined_score
def get_rotated_inside_mask(self, points, gt_polys, scores):
inside_mask = check_points_in_polys(points, gt_polys)
center_mask = scores >= self.threshold
return (inside_mask & center_mask).cast(paddle.float32)
def get_inside_range_mask(self, points, gt_bboxes, gt_rboxes, stride_tensor,
regress_range):
# [1, L, 2] -> [1, 1, L, 2]
points = points.unsqueeze(0)
# [B, n, 4] -> [B, n, 1, 4]
x1y1, x2y2 = gt_bboxes.unsqueeze(2).split(2, axis=-1)
# [B, n, L, 2]
lt = points - x1y1
rb = x2y2 - points
# [B, n, L, 4]
ltrb = paddle.concat([lt, rb], axis=-1)
# [B, n, L, 4] -> [B, n, L]
inside_mask = paddle.min(ltrb, axis=-1) > EPS
# regress_range [1, L, 2] -> [1, 1, L, 2]
regress_range = regress_range.unsqueeze(0)
# stride_tensor [1, L, 1] -> [1, 1, L]
stride_tensor = stride_tensor.transpose((0, 2, 1))
# fcos range
# [B, n, L, 4] -> [B, n, L]
ltrb_max = paddle.max(ltrb, axis=-1)
# [1, 1, L, 2] -> [1, 1, L]
low, high = regress_range[..., 0], regress_range[..., 1]
# [B, n, L]
regress_mask = (ltrb_max >= low) & (ltrb_max <= high)
# mask for rotated
# [B, n, 1]
min_edge = paddle.min(gt_rboxes[..., 2:4], axis=-1, keepdim=True)
# [B, n , L]
rotated_mask = ((min_edge / stride_tensor) < 2.0) & (ltrb_max > high)
mask = inside_mask & (regress_mask | rotated_mask)
return mask.cast(paddle.float32)
@paddle.no_grad()
def forward(self,
anchor_points,
stride_tensor,
num_anchors_list,
gt_labels,
gt_bboxes,
gt_rboxes,
pad_gt_mask,
bg_index,
pred_rboxes=None):
r"""
Args:
anchor_points (Tensor, float32): pre-defined anchor points, shape(1, L, 2),
"x, y" format
stride_tensor (Tensor, float32): stride tensor, shape (1, L, 1)
num_anchors_list (List): num of anchors in each level
gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
gt_rboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 5)
pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
bg_index (int): background index
pred_rboxes (Tensor, float32, optional): predicted bounding boxes, shape(B, L, 5)
Returns:
assigned_labels (Tensor): (B, L)
assigned_rboxes (Tensor): (B, L, 5)
assigned_scores (Tensor): (B, L, C), if pred_rboxes is not None, then output ious
"""
_, num_anchors, _ = anchor_points.shape
batch_size, num_max_boxes, _ = gt_rboxes.shape
if num_max_boxes == 0:
assigned_labels = paddle.full(
[batch_size, num_anchors], bg_index, dtype=gt_labels.dtype)
assigned_rboxes = paddle.zeros([batch_size, num_anchors, 5])
assigned_scores = paddle.zeros(
[batch_size, num_anchors, self.num_classes])
return assigned_labels, assigned_rboxes, assigned_scores
# get normalized gaussian distribution score and refined distribution score
gt_polys = box2corners(gt_rboxes)
score, refined_score = self.get_gaussian_distribution_score(
anchor_points, gt_rboxes, gt_polys)
inside_mask = self.get_rotated_inside_mask(anchor_points, gt_polys,
score)
regress_ranges = []
for num, bound in zip(num_anchors_list, self.boundary):
regress_ranges.append(bound.tile((1, num, 1)))
regress_ranges = paddle.concat(regress_ranges, axis=1)
regress_mask = self.get_inside_range_mask(
anchor_points, gt_bboxes, gt_rboxes, stride_tensor, regress_ranges)
# [B, n, L]
mask_positive = inside_mask * regress_mask * pad_gt_mask
refined_score = refined_score * mask_positive - (1. - mask_positive)
argmax_refined_score = refined_score.argmax(axis=-2)
max_refined_score = refined_score.max(axis=-2)
assigned_gt_index = argmax_refined_score
# assigned target
batch_ind = paddle.arange(
end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
assigned_labels = paddle.gather(
gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
assigned_labels = paddle.where(
max_refined_score > 0, assigned_labels,
paddle.full_like(assigned_labels, bg_index))
assigned_rboxes = paddle.gather(
gt_rboxes.reshape([-1, 5]), assigned_gt_index.flatten(), axis=0)
assigned_rboxes = assigned_rboxes.reshape([batch_size, num_anchors, 5])
assigned_scores = F.one_hot(assigned_labels, self.num_classes + 1)
ind = list(range(self.num_classes + 1))
ind.remove(bg_index)
assigned_scores = paddle.index_select(
assigned_scores, paddle.to_tensor(ind), axis=-1)
if self.score_type == 'gaussian':
selected_scores = paddle_gather(
score, 1, argmax_refined_score.unsqueeze(-2)).squeeze(-2)
assigned_scores = assigned_scores * selected_scores.unsqueeze(-1)
elif self.score_type == 'iou':
assert pred_rboxes is not None, 'If score type is iou, pred_rboxes should not be None'
from ext_op import matched_rbox_iou
b, l = pred_rboxes.shape[:2]
iou_score = matched_rbox_iou(
pred_rboxes.reshape((-1, 5)), assigned_rboxes.reshape(
(-1, 5))).reshape((b, l, 1))
assigned_scores = assigned_scores * iou_score
return assigned_labels, assigned_rboxes, assigned_scores

View File

@@ -0,0 +1,316 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
try:
from scipy.optimize import linear_sum_assignment
except ImportError:
linear_sum_assignment = None
import paddle
from ppdet.core.workspace import register
__all__ = ['PoseHungarianAssigner', 'PseudoSampler']
class AssignResult:
"""Stores assignments between predicted and truth boxes.
Attributes:
num_gts (int): the number of truth boxes considered when computing this
assignment
gt_inds (LongTensor): for each predicted box indicates the 1-based
index of the assigned truth box. 0 means unassigned and -1 means
ignore.
max_overlaps (FloatTensor): the iou between the predicted box and its
assigned truth box.
labels (None | LongTensor): If specified, for each predicted box
indicates the category label of the assigned truth box.
"""
def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
self.num_gts = num_gts
self.gt_inds = gt_inds
self.max_overlaps = max_overlaps
self.labels = labels
# Interface for possible user-defined properties
self._extra_properties = {}
@property
def num_preds(self):
"""int: the number of predictions in this assignment"""
return len(self.gt_inds)
def set_extra_property(self, key, value):
"""Set user-defined new property."""
assert key not in self.info
self._extra_properties[key] = value
def get_extra_property(self, key):
"""Get user-defined property."""
return self._extra_properties.get(key, None)
@property
def info(self):
"""dict: a dictionary of info about the object"""
basic_info = {
'num_gts': self.num_gts,
'num_preds': self.num_preds,
'gt_inds': self.gt_inds,
'max_overlaps': self.max_overlaps,
'labels': self.labels,
}
basic_info.update(self._extra_properties)
return basic_info
@register
class PoseHungarianAssigner:
"""Computes one-to-one matching between predictions and ground truth.
This class computes an assignment between the targets and the predictions
based on the costs. The costs are weighted sum of three components:
classification cost, regression L1 cost and regression oks cost. The
targets don't include the no_object, so generally there are more
predictions than targets. After the one-to-one matching, the un-matched
are treated as backgrounds. Thus each query prediction will be assigned
with `0` or a positive integer indicating the ground truth index:
- 0: negative sample, no assigned gt.
- positive integer: positive sample, index (1-based) of assigned gt.
Args:
cls_weight (int | float, optional): The scale factor for classification
cost. Default 1.0.
kpt_weight (int | float, optional): The scale factor for regression
L1 cost. Default 1.0.
oks_weight (int | float, optional): The scale factor for regression
oks cost. Default 1.0.
"""
__inject__ = ['cls_cost', 'kpt_cost', 'oks_cost']
def __init__(self,
cls_cost='ClassificationCost',
kpt_cost='KptL1Cost',
oks_cost='OksCost'):
self.cls_cost = cls_cost
self.kpt_cost = kpt_cost
self.oks_cost = oks_cost
def assign(self,
cls_pred,
kpt_pred,
gt_labels,
gt_keypoints,
gt_areas,
img_meta,
eps=1e-7):
"""Computes one-to-one matching based on the weighted costs.
This method assign each query prediction to a ground truth or
background. The `assigned_gt_inds` with -1 means don't care,
0 means negative sample, and positive number is the index (1-based)
of assigned gt.
The assignment is done in the following steps, the order matters.
1. assign every prediction to -1
2. compute the weighted costs
3. do Hungarian matching on CPU based on the costs
4. assign all to 0 (background) first, then for each matched pair
between predictions and gts, treat this prediction as foreground
and assign the corresponding gt index (plus 1) to it.
Args:
cls_pred (Tensor): Predicted classification logits, shape
[num_query, num_class].
kpt_pred (Tensor): Predicted keypoints with normalized coordinates
(x_{i}, y_{i}), which are all in range [0, 1]. Shape
[num_query, K*2].
gt_labels (Tensor): Label of `gt_keypoints`, shape (num_gt,).
gt_keypoints (Tensor): Ground truth keypoints with unnormalized
coordinates [p^{1}_x, p^{1}_y, p^{1}_v, ..., \
p^{K}_x, p^{K}_y, p^{K}_v]. Shape [num_gt, K*3].
gt_areas (Tensor): Ground truth mask areas, shape (num_gt,).
img_meta (dict): Meta information for current image.
eps (int | float, optional): A value added to the denominator for
numerical stability. Default 1e-7.
Returns:
:obj:`AssignResult`: The assigned result.
"""
num_gts, num_kpts = gt_keypoints.shape[0], kpt_pred.shape[0]
if not gt_keypoints.astype('bool').any():
num_gts = 0
# 1. assign -1 by default
assigned_gt_inds = paddle.full((num_kpts, ), -1, dtype="int64")
assigned_labels = paddle.full((num_kpts, ), -1, dtype="int64")
if num_gts == 0 or num_kpts == 0:
# No ground truth or keypoints, return empty assignment
if num_gts == 0:
# No ground truth, assign all to background
assigned_gt_inds[:] = 0
return AssignResult(
num_gts, assigned_gt_inds, None, labels=assigned_labels)
img_h, img_w, _ = img_meta['img_shape']
factor = paddle.to_tensor(
[img_w, img_h, img_w, img_h], dtype=gt_keypoints.dtype).reshape(
(1, -1))
# 2. compute the weighted costs
# classification cost
cls_cost = self.cls_cost(cls_pred, gt_labels)
# keypoint regression L1 cost
gt_keypoints_reshape = gt_keypoints.reshape((gt_keypoints.shape[0], -1,
3))
valid_kpt_flag = gt_keypoints_reshape[..., -1]
kpt_pred_tmp = kpt_pred.clone().detach().reshape((kpt_pred.shape[0], -1,
2))
normalize_gt_keypoints = gt_keypoints_reshape[
..., :2] / factor[:, :2].unsqueeze(0)
kpt_cost = self.kpt_cost(kpt_pred_tmp, normalize_gt_keypoints,
valid_kpt_flag)
# keypoint OKS cost
kpt_pred_tmp = kpt_pred.clone().detach().reshape((kpt_pred.shape[0], -1,
2))
kpt_pred_tmp = kpt_pred_tmp * factor[:, :2].unsqueeze(0)
oks_cost = self.oks_cost(kpt_pred_tmp, gt_keypoints_reshape[..., :2],
valid_kpt_flag, gt_areas)
# weighted sum of above three costs
cost = cls_cost + kpt_cost + oks_cost
# 3. do Hungarian matching on CPU using linear_sum_assignment
cost = cost.detach().cpu()
if linear_sum_assignment is None:
raise ImportError('Please run "pip install scipy" '
'to install scipy first.')
matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
matched_row_inds = paddle.to_tensor(matched_row_inds)
matched_col_inds = paddle.to_tensor(matched_col_inds)
# 4. assign backgrounds and foregrounds
# assign all indices to backgrounds first
assigned_gt_inds[:] = 0
# assign foregrounds based on matching results
assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
assigned_labels[matched_row_inds] = gt_labels[matched_col_inds][
..., 0].astype("int64")
return AssignResult(
num_gts, assigned_gt_inds, None, labels=assigned_labels)
class SamplingResult:
"""Bbox sampling result.
"""
def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
gt_flags):
self.pos_inds = pos_inds
self.neg_inds = neg_inds
if pos_inds.size > 0:
self.pos_bboxes = bboxes[pos_inds]
self.neg_bboxes = bboxes[neg_inds]
self.pos_is_gt = gt_flags[pos_inds]
self.num_gts = gt_bboxes.shape[0]
self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
if gt_bboxes.numel() == 0:
# hack for index error case
assert self.pos_assigned_gt_inds.numel() == 0
self.pos_gt_bboxes = paddle.zeros(
gt_bboxes.shape, dtype=gt_bboxes.dtype).reshape((-1, 4))
else:
if len(gt_bboxes.shape) < 2:
gt_bboxes = gt_bboxes.reshape((-1, 4))
self.pos_gt_bboxes = paddle.index_select(
gt_bboxes,
self.pos_assigned_gt_inds.astype('int64'),
axis=0)
if assign_result.labels is not None:
self.pos_gt_labels = assign_result.labels[pos_inds]
else:
self.pos_gt_labels = None
@property
def bboxes(self):
"""paddle.Tensor: concatenated positive and negative boxes"""
return paddle.concat([self.pos_bboxes, self.neg_bboxes])
def __nice__(self):
data = self.info.copy()
data['pos_bboxes'] = data.pop('pos_bboxes').shape
data['neg_bboxes'] = data.pop('neg_bboxes').shape
parts = [f"'{k}': {v!r}" for k, v in sorted(data.items())]
body = ' ' + ',\n '.join(parts)
return '{\n' + body + '\n}'
@property
def info(self):
"""Returns a dictionary of info about the object."""
return {
'pos_inds': self.pos_inds,
'neg_inds': self.neg_inds,
'pos_bboxes': self.pos_bboxes,
'neg_bboxes': self.neg_bboxes,
'pos_is_gt': self.pos_is_gt,
'num_gts': self.num_gts,
'pos_assigned_gt_inds': self.pos_assigned_gt_inds,
}
@register
class PseudoSampler:
"""A pseudo sampler that does not do sampling actually."""
def __init__(self, **kwargs):
pass
def _sample_pos(self, **kwargs):
"""Sample positive samples."""
raise NotImplementedError
def _sample_neg(self, **kwargs):
"""Sample negative samples."""
raise NotImplementedError
def sample(self, assign_result, bboxes, gt_bboxes, *args, **kwargs):
"""Directly returns the positive and negative indices of samples.
Args:
assign_result (:obj:`AssignResult`): Assigned results
bboxes (paddle.Tensor): Bounding boxes
gt_bboxes (paddle.Tensor): Ground truth boxes
Returns:
:obj:`SamplingResult`: sampler results
"""
pos_inds = paddle.nonzero(
assign_result.gt_inds > 0, as_tuple=False).squeeze(-1)
neg_inds = paddle.nonzero(
assign_result.gt_inds == 0, as_tuple=False).squeeze(-1)
gt_flags = paddle.zeros([bboxes.shape[0]], dtype='int32')
sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
assign_result, gt_flags)
return sampling_result

View File

@@ -0,0 +1,52 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from ppdet.core.workspace import register
from ppdet.modeling.proposal_generator.target import label_box
__all__ = ['MaxIoUAssigner']
@register
class MaxIoUAssigner(object):
"""a standard bbox assigner based on max IoU, use ppdet's label_box
as backend.
Args:
positive_overlap (float): threshold for defining positive samples
negative_overlap (float): threshold for denining negative samples
allow_low_quality (bool): whether to lower IoU thr if a GT poorly
overlaps with candidate bboxes
"""
def __init__(self,
positive_overlap,
negative_overlap,
allow_low_quality=True):
self.positive_overlap = positive_overlap
self.negative_overlap = negative_overlap
self.allow_low_quality = allow_low_quality
def __call__(self, bboxes, gt_bboxes):
matches, match_labels = label_box(
bboxes,
gt_bboxes,
positive_overlap=self.positive_overlap,
negative_overlap=self.negative_overlap,
allow_low_quality=self.allow_low_quality,
ignore_thresh=-1,
is_crowd=None,
assign_on_cpu=False)
return matches, match_labels

View File

@@ -0,0 +1,275 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
import paddle.nn.functional as F
from ppdet.core.workspace import register
__all__ = ['KptL1Cost', 'OksCost', 'ClassificationCost']
def masked_fill(x, mask, value):
y = paddle.full(x.shape, value, x.dtype)
return paddle.where(mask, y, x)
@register
class KptL1Cost(object):
"""KptL1Cost.
this function based on: https://github.com/hikvision-research/opera/blob/main/opera/core/bbox/match_costs/match_cost.py
Args:
weight (int | float, optional): loss_weight.
"""
def __init__(self, weight=1.0):
self.weight = weight
def __call__(self, kpt_pred, gt_keypoints, valid_kpt_flag):
"""
Args:
kpt_pred (Tensor): Predicted keypoints with normalized coordinates
(x_{i}, y_{i}), which are all in range [0, 1]. Shape
[num_query, K, 2].
gt_keypoints (Tensor): Ground truth keypoints with normalized
coordinates (x_{i}, y_{i}). Shape [num_gt, K, 2].
valid_kpt_flag (Tensor): valid flag of ground truth keypoints.
Shape [num_gt, K].
Returns:
paddle.Tensor: kpt_cost value with weight.
"""
kpt_cost = []
for i in range(len(gt_keypoints)):
if gt_keypoints[i].size == 0:
kpt_cost.append(kpt_pred.sum() * 0)
kpt_pred_tmp = kpt_pred.clone()
valid_flag = valid_kpt_flag[i] > 0
valid_flag_expand = valid_flag.unsqueeze(0).unsqueeze(-1).expand_as(
kpt_pred_tmp)
if not valid_flag_expand.all():
kpt_pred_tmp = masked_fill(kpt_pred_tmp, ~valid_flag_expand, 0)
cost = F.pairwise_distance(
kpt_pred_tmp.reshape((kpt_pred_tmp.shape[0], -1)),
gt_keypoints[i].reshape((-1, )).unsqueeze(0),
p=1,
keepdim=True)
avg_factor = paddle.clip(
valid_flag.astype('float32').sum() * 2, 1.0)
cost = cost / avg_factor
kpt_cost.append(cost)
kpt_cost = paddle.concat(kpt_cost, axis=1)
return kpt_cost * self.weight
@register
class OksCost(object):
"""OksCost.
this function based on: https://github.com/hikvision-research/opera/blob/main/opera/core/bbox/match_costs/match_cost.py
Args:
num_keypoints (int): number of keypoints
weight (int | float, optional): loss_weight.
"""
def __init__(self, num_keypoints=17, weight=1.0):
self.weight = weight
if num_keypoints == 17:
self.sigmas = np.array(
[
.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07,
1.07, .87, .87, .89, .89
],
dtype=np.float32) / 10.0
elif num_keypoints == 14:
self.sigmas = np.array(
[
.79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89,
.89, .79, .79
],
dtype=np.float32) / 10.0
else:
raise ValueError(f'Unsupported keypoints number {num_keypoints}')
def __call__(self, kpt_pred, gt_keypoints, valid_kpt_flag, gt_areas):
"""
Args:
kpt_pred (Tensor): Predicted keypoints with unnormalized
coordinates (x_{i}, y_{i}). Shape [num_query, K, 2].
gt_keypoints (Tensor): Ground truth keypoints with unnormalized
coordinates (x_{i}, y_{i}). Shape [num_gt, K, 2].
valid_kpt_flag (Tensor): valid flag of ground truth keypoints.
Shape [num_gt, K].
gt_areas (Tensor): Ground truth mask areas. Shape [num_gt,].
Returns:
paddle.Tensor: oks_cost value with weight.
"""
sigmas = paddle.to_tensor(self.sigmas)
variances = (sigmas * 2)**2
oks_cost = []
assert len(gt_keypoints) == len(gt_areas)
for i in range(len(gt_keypoints)):
if gt_keypoints[i].size == 0:
oks_cost.append(kpt_pred.sum() * 0)
squared_distance = \
(kpt_pred[:, :, 0] - gt_keypoints[i, :, 0].unsqueeze(0)) ** 2 + \
(kpt_pred[:, :, 1] - gt_keypoints[i, :, 1].unsqueeze(0)) ** 2
vis_flag = (valid_kpt_flag[i] > 0).astype('int')
vis_ind = vis_flag.nonzero(as_tuple=False)[:, 0]
num_vis_kpt = vis_ind.shape[0]
# assert num_vis_kpt > 0
if num_vis_kpt == 0:
oks_cost.append(paddle.zeros((squared_distance.shape[0], 1)))
continue
area = gt_areas[i]
squared_distance0 = squared_distance / (area * variances * 2)
squared_distance0 = paddle.index_select(
squared_distance0, vis_ind, axis=1)
squared_distance1 = paddle.exp(-squared_distance0).sum(axis=1,
keepdim=True)
oks = squared_distance1 / num_vis_kpt
# The 1 is a constant that doesn't change the matching, so omitted.
oks_cost.append(-oks)
oks_cost = paddle.concat(oks_cost, axis=1)
return oks_cost * self.weight
@register
class ClassificationCost:
"""ClsSoftmaxCost.
Args:
weight (int | float, optional): loss_weight
"""
def __init__(self, weight=1.):
self.weight = weight
def __call__(self, cls_pred, gt_labels):
"""
Args:
cls_pred (Tensor): Predicted classification logits, shape
(num_query, num_class).
gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
Returns:
paddle.Tensor: cls_cost value with weight
"""
# Following the official DETR repo, contrary to the loss that
# NLL is used, we approximate it in 1 - cls_score[gt_label].
# The 1 is a constant that doesn't change the matching,
# so it can be omitted.
cls_score = cls_pred.softmax(-1)
cls_cost = -cls_score[:, gt_labels]
return cls_cost * self.weight
@register
class FocalLossCost:
"""FocalLossCost.
Args:
weight (int | float, optional): loss_weight
alpha (int | float, optional): focal_loss alpha
gamma (int | float, optional): focal_loss gamma
eps (float, optional): default 1e-12
binary_input (bool, optional): Whether the input is binary,
default False.
"""
def __init__(self,
weight=1.,
alpha=0.25,
gamma=2,
eps=1e-12,
binary_input=False):
self.weight = weight
self.alpha = alpha
self.gamma = gamma
self.eps = eps
self.binary_input = binary_input
def _focal_loss_cost(self, cls_pred, gt_labels):
"""
Args:
cls_pred (Tensor): Predicted classification logits, shape
(num_query, num_class).
gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
Returns:
paddle.Tensor: cls_cost value with weight
"""
if gt_labels.size == 0:
return cls_pred.sum() * 0
cls_pred = F.sigmoid(cls_pred)
neg_cost = -(1 - cls_pred + self.eps).log() * (
1 - self.alpha) * cls_pred.pow(self.gamma)
pos_cost = -(cls_pred + self.eps).log() * self.alpha * (
1 - cls_pred).pow(self.gamma)
cls_cost = paddle.index_select(
pos_cost, gt_labels, axis=1) - paddle.index_select(
neg_cost, gt_labels, axis=1)
return cls_cost * self.weight
def _mask_focal_loss_cost(self, cls_pred, gt_labels):
"""
Args:
cls_pred (Tensor): Predicted classfication logits
in shape (num_query, d1, ..., dn), dtype=paddle.float32.
gt_labels (Tensor): Ground truth in shape (num_gt, d1, ..., dn),
dtype=paddle.long. Labels should be binary.
Returns:
Tensor: Focal cost matrix with weight in shape\
(num_query, num_gt).
"""
cls_pred = cls_pred.flatten(1)
gt_labels = gt_labels.flatten(1).float()
n = cls_pred.shape[1]
cls_pred = F.sigmoid(cls_pred)
neg_cost = -(1 - cls_pred + self.eps).log() * (
1 - self.alpha) * cls_pred.pow(self.gamma)
pos_cost = -(cls_pred + self.eps).log() * self.alpha * (
1 - cls_pred).pow(self.gamma)
cls_cost = paddle.einsum('nc,mc->nm', pos_cost, gt_labels) + \
paddle.einsum('nc,mc->nm', neg_cost, (1 - gt_labels))
return cls_cost / n * self.weight
def __call__(self, cls_pred, gt_labels):
"""
Args:
cls_pred (Tensor): Predicted classfication logits.
gt_labels (Tensor)): Labels.
Returns:
Tensor: Focal cost matrix with weight in shape\
(num_query, num_gt).
"""
if self.binary_input:
return self._mask_focal_loss_cost(cls_pred, gt_labels)
else:
return self._focal_loss_cost(cls_pred, gt_labels)

View File

@@ -0,0 +1,164 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register
from ..rbox_utils import rotated_iou_similarity, check_points_in_rotated_boxes
from .utils import gather_topk_anchors, compute_max_iou_anchor
__all__ = ['RotatedTaskAlignedAssigner']
@register
class RotatedTaskAlignedAssigner(nn.Layer):
"""TOOD: Task-aligned One-stage Object Detection
"""
def __init__(self, topk=13, alpha=1.0, beta=6.0, eps=1e-9):
super(RotatedTaskAlignedAssigner, self).__init__()
self.topk = topk
self.alpha = alpha
self.beta = beta
self.eps = eps
@paddle.no_grad()
def forward(self,
pred_scores,
pred_bboxes,
anchor_points,
num_anchors_list,
gt_labels,
gt_bboxes,
pad_gt_mask,
bg_index,
gt_scores=None):
r"""This code is based on
https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py
The assignment is done in following steps
1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt
2. select top-k bbox as candidates for each gt
3. limit the positive sample's center in gt (because the anchor-free detector
only can predict positive distance)
4. if an anchor box is assigned to multiple gts, the one with the
highest iou will be selected.
Args:
pred_scores (Tensor, float32): predicted class probability, shape(B, L, C)
pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 5)
anchor_points (Tensor, float32): pre-defined anchors, shape(1, L, 2), "cxcy" format
num_anchors_list (List): num of anchors in each level, shape(L)
gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 5)
pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
bg_index (int): background index
gt_scores (Tensor|None, float32) Score of gt_bboxes, shape(B, n, 1)
Returns:
assigned_labels (Tensor): (B, L)
assigned_bboxes (Tensor): (B, L, 5)
assigned_scores (Tensor): (B, L, C)
"""
assert pred_scores.ndim == pred_bboxes.ndim
assert gt_labels.ndim == gt_bboxes.ndim and \
gt_bboxes.ndim == 3
batch_size, num_anchors, num_classes = pred_scores.shape
_, num_max_boxes, _ = gt_bboxes.shape
# negative batch
if num_max_boxes == 0:
assigned_labels = paddle.full(
[batch_size, num_anchors], bg_index, dtype=gt_labels.dtype)
assigned_bboxes = paddle.zeros([batch_size, num_anchors, 5])
assigned_scores = paddle.zeros(
[batch_size, num_anchors, num_classes])
return assigned_labels, assigned_bboxes, assigned_scores
# compute iou between gt and pred bbox, [B, n, L]
ious = rotated_iou_similarity(gt_bboxes, pred_bboxes)
ious = paddle.where(ious > 1 + self.eps, paddle.zeros_like(ious), ious)
ious.stop_gradient = True
# gather pred bboxes class score
pred_scores = pred_scores.transpose([0, 2, 1])
batch_ind = paddle.arange(
end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
gt_labels_ind = paddle.stack(
[batch_ind.tile([1, num_max_boxes]), gt_labels.squeeze(-1)],
axis=-1)
bbox_cls_scores = paddle.gather_nd(pred_scores, gt_labels_ind)
# compute alignment metrics, [B, n, L]
alignment_metrics = bbox_cls_scores.pow(self.alpha) * ious.pow(
self.beta)
# check the positive sample's center in gt, [B, n, L]
is_in_gts = check_points_in_rotated_boxes(anchor_points, gt_bboxes)
# select topk largest alignment metrics pred bbox as candidates
# for each gt, [B, n, L]
is_in_topk = gather_topk_anchors(
alignment_metrics * is_in_gts, self.topk, topk_mask=pad_gt_mask)
# select positive sample, [B, n, L]
mask_positive = is_in_topk * is_in_gts * pad_gt_mask
# if an anchor box is assigned to multiple gts,
# the one with the highest iou will be selected, [B, n, L]
mask_positive_sum = mask_positive.sum(axis=-2)
if mask_positive_sum.max() > 1:
mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile(
[1, num_max_boxes, 1])
is_max_iou = compute_max_iou_anchor(ious)
mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
mask_positive)
mask_positive_sum = mask_positive.sum(axis=-2)
assigned_gt_index = mask_positive.argmax(axis=-2)
# assigned target
assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
assigned_labels = paddle.gather(
gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
assigned_labels = paddle.where(
mask_positive_sum > 0, assigned_labels,
paddle.full_like(assigned_labels, bg_index))
assigned_bboxes = paddle.gather(
gt_bboxes.reshape([-1, 5]), assigned_gt_index.flatten(), axis=0)
assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 5])
assigned_scores = F.one_hot(assigned_labels, num_classes + 1)
ind = list(range(num_classes + 1))
ind.remove(bg_index)
assigned_scores = paddle.index_select(
assigned_scores, paddle.to_tensor(ind), axis=-1)
# rescale alignment metrics
alignment_metrics *= mask_positive
max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True)
max_ious_per_instance = (ious * mask_positive).max(axis=-1,
keepdim=True)
alignment_metrics = alignment_metrics / (
max_metrics_per_instance + self.eps) * max_ious_per_instance
alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1)
assigned_scores = assigned_scores * alignment_metrics
assigned_bboxes.stop_gradient = True
assigned_scores.stop_gradient = True
assigned_labels.stop_gradient = True
return assigned_labels, assigned_bboxes, assigned_scores

View File

@@ -0,0 +1,265 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# The code is based on:
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/bbox/assigners/sim_ota_assigner.py
import paddle
import numpy as np
import paddle.nn.functional as F
from ppdet.modeling.losses.varifocal_loss import varifocal_loss
from ppdet.modeling.bbox_utils import batch_bbox_overlaps
from ppdet.core.workspace import register
@register
class SimOTAAssigner(object):
"""Computes matching between predictions and ground truth.
Args:
center_radius (int | float, optional): Ground truth center size
to judge whether a prior is in center. Default 2.5.
candidate_topk (int, optional): The candidate top-k which used to
get top-k ious to calculate dynamic-k. Default 10.
iou_weight (int | float, optional): The scale factor for regression
iou cost. Default 3.0.
cls_weight (int | float, optional): The scale factor for classification
cost. Default 1.0.
num_classes (int): The num_classes of dataset.
use_vfl (int): Whether to use varifocal_loss when calculating the cost matrix.
"""
__shared__ = ['num_classes']
def __init__(self,
center_radius=2.5,
candidate_topk=10,
iou_weight=3.0,
cls_weight=1.0,
num_classes=80,
use_vfl=True):
self.center_radius = center_radius
self.candidate_topk = candidate_topk
self.iou_weight = iou_weight
self.cls_weight = cls_weight
self.num_classes = num_classes
self.use_vfl = use_vfl
def get_in_gt_and_in_center_info(self, flatten_center_and_stride,
gt_bboxes):
num_gt = gt_bboxes.shape[0]
flatten_x = flatten_center_and_stride[:, 0].unsqueeze(1).tile(
[1, num_gt])
flatten_y = flatten_center_and_stride[:, 1].unsqueeze(1).tile(
[1, num_gt])
flatten_stride_x = flatten_center_and_stride[:, 2].unsqueeze(1).tile(
[1, num_gt])
flatten_stride_y = flatten_center_and_stride[:, 3].unsqueeze(1).tile(
[1, num_gt])
# is prior centers in gt bboxes, shape: [n_center, n_gt]
l_ = flatten_x - gt_bboxes[:, 0]
t_ = flatten_y - gt_bboxes[:, 1]
r_ = gt_bboxes[:, 2] - flatten_x
b_ = gt_bboxes[:, 3] - flatten_y
deltas = paddle.stack([l_, t_, r_, b_], axis=1)
is_in_gts = deltas.min(axis=1) > 0
is_in_gts_all = is_in_gts.sum(axis=1) > 0
# is prior centers in gt centers
gt_center_xs = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
gt_center_ys = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
ct_bound_l = gt_center_xs - self.center_radius * flatten_stride_x
ct_bound_t = gt_center_ys - self.center_radius * flatten_stride_y
ct_bound_r = gt_center_xs + self.center_radius * flatten_stride_x
ct_bound_b = gt_center_ys + self.center_radius * flatten_stride_y
cl_ = flatten_x - ct_bound_l
ct_ = flatten_y - ct_bound_t
cr_ = ct_bound_r - flatten_x
cb_ = ct_bound_b - flatten_y
ct_deltas = paddle.stack([cl_, ct_, cr_, cb_], axis=1)
is_in_cts = ct_deltas.min(axis=1) > 0
is_in_cts_all = is_in_cts.sum(axis=1) > 0
# in any of gts or gt centers, shape: [n_center]
is_in_gts_or_centers_all = paddle.logical_or(is_in_gts_all,
is_in_cts_all)
is_in_gts_or_centers_all_inds = paddle.nonzero(
is_in_gts_or_centers_all).squeeze(1)
# both in gts and gt centers, shape: [num_fg, num_gt]
is_in_gts_and_centers = paddle.logical_and(
paddle.gather(
is_in_gts.cast('int'), is_in_gts_or_centers_all_inds,
axis=0).cast('bool'),
paddle.gather(
is_in_cts.cast('int'), is_in_gts_or_centers_all_inds,
axis=0).cast('bool'))
return is_in_gts_or_centers_all, is_in_gts_or_centers_all_inds, is_in_gts_and_centers
def dynamic_k_matching(self, cost_matrix, pairwise_ious, num_gt):
match_matrix = np.zeros_like(cost_matrix.numpy())
# select candidate topk ious for dynamic-k calculation
topk_ious, _ = paddle.topk(
pairwise_ious,
min(self.candidate_topk, pairwise_ious.shape[0]),
axis=0)
# calculate dynamic k for each gt
dynamic_ks = paddle.clip(topk_ious.sum(0).cast('int'), min=1)
for gt_idx in range(num_gt):
_, pos_idx = paddle.topk(
cost_matrix[:, gt_idx], k=dynamic_ks[gt_idx], largest=False)
match_matrix[:, gt_idx][pos_idx.numpy()] = 1.0
del topk_ious, dynamic_ks, pos_idx
# match points more than two gts
extra_match_gts_mask = match_matrix.sum(1) > 1
if extra_match_gts_mask.sum() > 0:
cost_matrix = cost_matrix.numpy()
cost_argmin = np.argmin(
cost_matrix[extra_match_gts_mask, :], axis=1)
match_matrix[extra_match_gts_mask, :] *= 0.0
match_matrix[extra_match_gts_mask, cost_argmin] = 1.0
# get foreground mask
match_fg_mask_inmatrix = match_matrix.sum(1) > 0
match_gt_inds_to_fg = match_matrix[match_fg_mask_inmatrix, :].argmax(1)
return match_gt_inds_to_fg, match_fg_mask_inmatrix
def get_sample(self, assign_gt_inds, gt_bboxes):
pos_inds = np.unique(np.nonzero(assign_gt_inds > 0)[0])
neg_inds = np.unique(np.nonzero(assign_gt_inds == 0)[0])
pos_assigned_gt_inds = assign_gt_inds[pos_inds] - 1
if gt_bboxes.size == 0:
# hack for index error case
assert pos_assigned_gt_inds.size == 0
pos_gt_bboxes = np.empty_like(gt_bboxes).reshape(-1, 4)
else:
if len(gt_bboxes.shape) < 2:
gt_bboxes = gt_bboxes.resize(-1, 4)
pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :]
return pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds
def __call__(self,
flatten_cls_pred_scores,
flatten_center_and_stride,
flatten_bboxes,
gt_bboxes,
gt_labels,
eps=1e-7):
"""Assign gt to priors using SimOTA.
TODO: add comment.
Returns:
assign_result: The assigned result.
"""
num_gt = gt_bboxes.shape[0]
num_bboxes = flatten_bboxes.shape[0]
if num_gt == 0 or num_bboxes == 0:
# No ground truth or boxes
label = np.ones([num_bboxes], dtype=np.int64) * self.num_classes
label_weight = np.ones([num_bboxes], dtype=np.float32)
bbox_target = np.zeros_like(flatten_center_and_stride)
return 0, label, label_weight, bbox_target
is_in_gts_or_centers_all, is_in_gts_or_centers_all_inds, is_in_boxes_and_center = self.get_in_gt_and_in_center_info(
flatten_center_and_stride, gt_bboxes)
# bboxes and scores to calculate matrix
valid_flatten_bboxes = flatten_bboxes[is_in_gts_or_centers_all_inds]
valid_cls_pred_scores = flatten_cls_pred_scores[
is_in_gts_or_centers_all_inds]
num_valid_bboxes = valid_flatten_bboxes.shape[0]
pairwise_ious = batch_bbox_overlaps(valid_flatten_bboxes,
gt_bboxes) # [num_points,num_gts]
if self.use_vfl:
gt_vfl_labels = gt_labels.squeeze(-1).unsqueeze(0).tile(
[num_valid_bboxes, 1]).reshape([-1])
valid_pred_scores = valid_cls_pred_scores.unsqueeze(1).tile(
[1, num_gt, 1]).reshape([-1, self.num_classes])
vfl_score = np.zeros(valid_pred_scores.shape)
vfl_score[np.arange(0, vfl_score.shape[0]), gt_vfl_labels.numpy(
)] = pairwise_ious.reshape([-1])
vfl_score = paddle.to_tensor(vfl_score)
losses_vfl = varifocal_loss(
valid_pred_scores, vfl_score,
use_sigmoid=False).reshape([num_valid_bboxes, num_gt])
losses_giou = batch_bbox_overlaps(
valid_flatten_bboxes, gt_bboxes, mode='giou')
cost_matrix = (
losses_vfl * self.cls_weight + losses_giou * self.iou_weight +
paddle.logical_not(is_in_boxes_and_center).cast('float32') *
100000000)
else:
iou_cost = -paddle.log(pairwise_ious + eps)
gt_onehot_label = (F.one_hot(
gt_labels.squeeze(-1).cast(paddle.int64),
flatten_cls_pred_scores.shape[-1]).cast('float32').unsqueeze(0)
.tile([num_valid_bboxes, 1, 1]))
valid_pred_scores = valid_cls_pred_scores.unsqueeze(1).tile(
[1, num_gt, 1])
cls_cost = F.binary_cross_entropy(
valid_pred_scores, gt_onehot_label, reduction='none').sum(-1)
cost_matrix = (
cls_cost * self.cls_weight + iou_cost * self.iou_weight +
paddle.logical_not(is_in_boxes_and_center).cast('float32') *
100000000)
match_gt_inds_to_fg, match_fg_mask_inmatrix = \
self.dynamic_k_matching(
cost_matrix, pairwise_ious, num_gt)
# sample and assign results
assigned_gt_inds = np.zeros([num_bboxes], dtype=np.int64)
match_fg_mask_inall = np.zeros_like(assigned_gt_inds)
match_fg_mask_inall[is_in_gts_or_centers_all.numpy(
)] = match_fg_mask_inmatrix
assigned_gt_inds[match_fg_mask_inall.astype(
np.bool_)] = match_gt_inds_to_fg + 1
pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds \
= self.get_sample(assigned_gt_inds, gt_bboxes.numpy())
bbox_target = np.zeros(flatten_bboxes.shape, paddle.common_ops_import.convert_dtype(flatten_bboxes.dtype))
bbox_weight = np.zeros_like(bbox_target)
label = np.ones([num_bboxes], dtype=np.int64) * self.num_classes
label_weight = np.zeros([num_bboxes], dtype=np.float32)
if len(pos_inds) > 0:
gt_labels = gt_labels.numpy()
pos_bbox_targets = pos_gt_bboxes
bbox_target[pos_inds, :] = pos_bbox_targets
bbox_weight[pos_inds, :] = 1.0
if not np.any(gt_labels):
label[pos_inds] = 0
else:
label[pos_inds] = gt_labels.squeeze(-1)[pos_assigned_gt_inds]
label_weight[pos_inds] = 1.0
if len(neg_inds) > 0:
label_weight[neg_inds] = 1.0
pos_num = max(pos_inds.size, 1)
return pos_num, label, label_weight, bbox_target

View File

@@ -0,0 +1,193 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register
from ..bbox_utils import batch_iou_similarity
from .utils import (gather_topk_anchors, check_points_inside_bboxes,
compute_max_iou_anchor)
__all__ = ['TaskAlignedAssigner']
def is_close_gt(anchor, gt, stride_lst, max_dist=2.0, alpha=2.):
"""Calculate distance ratio of box1 and box2 in batch for larger stride
anchors dist/stride to promote the survive of large distance match
Args:
anchor (Tensor): box with the shape [L, 2]
gt (Tensor): box with the shape [N, M2, 4]
Return:
dist (Tensor): dist ratio between box1 and box2 with the shape [N, M1, M2]
"""
center1 = anchor.unsqueeze(0)
center2 = (gt[..., :2] + gt[..., -2:]) / 2.
center1 = center1.unsqueeze(1) # [N, M1, 2] -> [N, 1, M1, 2]
center2 = center2.unsqueeze(2) # [N, M2, 2] -> [N, M2, 1, 2]
stride = paddle.concat([
paddle.full([x], 32 / pow(2, idx)) for idx, x in enumerate(stride_lst)
]).unsqueeze(0).unsqueeze(0)
dist = paddle.linalg.norm(center1 - center2, p=2, axis=-1) / stride
dist_ratio = dist
dist_ratio[dist < max_dist] = 1.
dist_ratio[dist >= max_dist] = 0.
return dist_ratio
@register
class TaskAlignedAssigner(nn.Layer):
"""TOOD: Task-aligned One-stage Object Detection
"""
def __init__(self,
topk=13,
alpha=1.0,
beta=6.0,
eps=1e-9,
is_close_gt=False):
super(TaskAlignedAssigner, self).__init__()
self.topk = topk
self.alpha = alpha
self.beta = beta
self.eps = eps
self.is_close_gt = is_close_gt
@paddle.no_grad()
def forward(self,
pred_scores,
pred_bboxes,
anchor_points,
num_anchors_list,
gt_labels,
gt_bboxes,
pad_gt_mask,
bg_index,
gt_scores=None):
r"""This code is based on
https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py
The assignment is done in following steps
1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt
2. select top-k bbox as candidates for each gt
3. limit the positive sample's center in gt (because the anchor-free detector
only can predict positive distance)
4. if an anchor box is assigned to multiple gts, the one with the
highest iou will be selected.
Args:
pred_scores (Tensor, float32): predicted class probability, shape(B, L, C)
pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 4)
anchor_points (Tensor, float32): pre-defined anchors, shape(L, 2), "cxcy" format
num_anchors_list (List): num of anchors in each level, shape(L)
gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
bg_index (int): background index
gt_scores (Tensor|None, float32) Score of gt_bboxes, shape(B, n, 1)
Returns:
assigned_labels (Tensor): (B, L)
assigned_bboxes (Tensor): (B, L, 4)
assigned_scores (Tensor): (B, L, C)
"""
assert pred_scores.ndim == pred_bboxes.ndim
assert gt_labels.ndim == gt_bboxes.ndim and \
gt_bboxes.ndim == 3
batch_size, num_anchors, num_classes = pred_scores.shape
_, num_max_boxes, _ = gt_bboxes.shape
# negative batch
if num_max_boxes == 0:
assigned_labels = paddle.full(
[batch_size, num_anchors], bg_index, dtype='int32')
assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
assigned_scores = paddle.zeros(
[batch_size, num_anchors, num_classes])
return assigned_labels, assigned_bboxes, assigned_scores
# compute iou between gt and pred bbox, [B, n, L]
ious = batch_iou_similarity(gt_bboxes, pred_bboxes)
# gather pred bboxes class score
pred_scores = pred_scores.transpose([0, 2, 1])
batch_ind = paddle.arange(
end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
gt_labels_ind = paddle.stack(
[batch_ind.tile([1, num_max_boxes]), gt_labels.squeeze(-1)],
axis=-1)
bbox_cls_scores = paddle.gather_nd(pred_scores, gt_labels_ind)
# compute alignment metrics, [B, n, L]
alignment_metrics = bbox_cls_scores.pow(self.alpha) * ious.pow(
self.beta)
# check the positive sample's center in gt, [B, n, L]
if self.is_close_gt:
is_in_gts = is_close_gt(anchor_points, gt_bboxes, num_anchors_list)
else:
is_in_gts = check_points_inside_bboxes(anchor_points, gt_bboxes)
# select topk largest alignment metrics pred bbox as candidates
# for each gt, [B, n, L]
is_in_topk = gather_topk_anchors(
alignment_metrics * is_in_gts, self.topk, topk_mask=pad_gt_mask)
# select positive sample, [B, n, L]
mask_positive = is_in_topk * is_in_gts * pad_gt_mask
# if an anchor box is assigned to multiple gts,
# the one with the highest iou will be selected, [B, n, L]
mask_positive_sum = mask_positive.sum(axis=-2)
if mask_positive_sum.max() > 1:
mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile(
[1, num_max_boxes, 1])
is_max_iou = compute_max_iou_anchor(ious)
mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
mask_positive)
mask_positive_sum = mask_positive.sum(axis=-2)
assigned_gt_index = mask_positive.argmax(axis=-2)
# assigned target
assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
assigned_labels = paddle.gather(
gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
assigned_labels = paddle.where(
mask_positive_sum > 0, assigned_labels,
paddle.full_like(assigned_labels, bg_index))
assigned_bboxes = paddle.gather(
gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
assigned_scores = F.one_hot(assigned_labels, num_classes + 1)
ind = list(range(num_classes + 1))
ind.remove(bg_index)
assigned_scores = paddle.index_select(
assigned_scores, paddle.to_tensor(ind), axis=-1)
# rescale alignment metrics
alignment_metrics *= mask_positive
max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True)
max_ious_per_instance = (ious * mask_positive).max(axis=-1,
keepdim=True)
alignment_metrics = alignment_metrics / (
max_metrics_per_instance + self.eps) * max_ious_per_instance
alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1)
assigned_scores = assigned_scores * alignment_metrics
return assigned_labels, assigned_bboxes, assigned_scores

View File

@@ -0,0 +1,181 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register
from ..bbox_utils import batch_iou_similarity
from .utils import (gather_topk_anchors, check_points_inside_bboxes,
compute_max_iou_anchor)
__all__ = ['TaskAlignedAssigner_CR']
@register
class TaskAlignedAssigner_CR(nn.Layer):
"""TOOD: Task-aligned One-stage Object Detection with Center R
"""
def __init__(self,
topk=13,
alpha=1.0,
beta=6.0,
center_radius=None,
eps=1e-9):
super(TaskAlignedAssigner_CR, self).__init__()
self.topk = topk
self.alpha = alpha
self.beta = beta
self.center_radius = center_radius
self.eps = eps
@paddle.no_grad()
def forward(self,
pred_scores,
pred_bboxes,
anchor_points,
stride_tensor,
gt_labels,
gt_bboxes,
pad_gt_mask,
bg_index,
gt_scores=None):
r"""This code is based on
https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py
The assignment is done in following steps
1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt
2. select top-k bbox as candidates for each gt
3. limit the positive sample's center in gt (because the anchor-free detector
only can predict positive distance)
4. if an anchor box is assigned to multiple gts, the one with the
highest iou will be selected.
Args:
pred_scores (Tensor, float32): predicted class probability, shape(B, L, C)
pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 4)
anchor_points (Tensor, float32): pre-defined anchors, shape(L, 2), "cxcy" format
stride_tensor (Tensor, float32): stride of feature map, shape(L, 1)
gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
bg_index (int): background index
gt_scores (Tensor|None, float32) Score of gt_bboxes, shape(B, n, 1)
Returns:
assigned_labels (Tensor): (B, L)
assigned_bboxes (Tensor): (B, L, 4)
assigned_scores (Tensor): (B, L, C)
"""
assert pred_scores.ndim == pred_bboxes.ndim
assert gt_labels.ndim == gt_bboxes.ndim and \
gt_bboxes.ndim == 3
batch_size, num_anchors, num_classes = pred_scores.shape
_, num_max_boxes, _ = gt_bboxes.shape
# negative batch
if num_max_boxes == 0:
assigned_labels = paddle.full(
[batch_size, num_anchors], bg_index, dtype='int32')
assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
assigned_scores = paddle.zeros(
[batch_size, num_anchors, num_classes])
return assigned_labels, assigned_bboxes, assigned_scores
# compute iou between gt and pred bbox, [B, n, L]
ious = batch_iou_similarity(gt_bboxes, pred_bboxes)
# gather pred bboxes class score
pred_scores = pred_scores.transpose([0, 2, 1])
batch_ind = paddle.arange(
end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
gt_labels_ind = paddle.stack(
[batch_ind.tile([1, num_max_boxes]), gt_labels.squeeze(-1)],
axis=-1)
bbox_cls_scores = paddle.gather_nd(pred_scores, gt_labels_ind)
# compute alignment metrics, [B, n, L]
alignment_metrics = bbox_cls_scores.pow(self.alpha) * ious.pow(
self.beta) * pad_gt_mask
# select positive sample, [B, n, L]
if self.center_radius is None:
# check the positive sample's center in gt, [B, n, L]
is_in_gts = check_points_inside_bboxes(
anchor_points, gt_bboxes, sm_use=True)
# select topk largest alignment metrics pred bbox as candidates
# for each gt, [B, n, L]
mask_positive = gather_topk_anchors(
alignment_metrics, self.topk, topk_mask=pad_gt_mask) * is_in_gts
else:
is_in_gts, is_in_center = check_points_inside_bboxes(
anchor_points,
gt_bboxes,
stride_tensor * self.center_radius,
sm_use=True)
is_in_gts *= pad_gt_mask
is_in_center *= pad_gt_mask
candidate_metrics = paddle.where(
is_in_gts.sum(-1, keepdim=True) == 0,
alignment_metrics + is_in_center,
alignment_metrics)
mask_positive = gather_topk_anchors(
candidate_metrics, self.topk,
topk_mask=pad_gt_mask) * paddle.cast((is_in_center > 0) |
(is_in_gts > 0), 'float32')
# if an anchor box is assigned to multiple gts,
# the one with the highest iou will be selected, [B, n, L]
mask_positive_sum = mask_positive.sum(axis=-2)
if mask_positive_sum.max() > 1:
mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile(
[1, num_max_boxes, 1])
is_max_iou = compute_max_iou_anchor(ious * mask_positive)
mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
mask_positive)
mask_positive_sum = mask_positive.sum(axis=-2)
assigned_gt_index = mask_positive.argmax(axis=-2)
# assigned target
assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
assigned_labels = paddle.gather(
gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
assigned_labels = paddle.where(
mask_positive_sum > 0, assigned_labels,
paddle.full_like(assigned_labels, bg_index))
assigned_bboxes = paddle.gather(
gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
assigned_scores = F.one_hot(assigned_labels, num_classes + 1)
ind = list(range(num_classes + 1))
ind.remove(bg_index)
assigned_scores = paddle.index_select(
assigned_scores, paddle.to_tensor(ind), axis=-1)
# rescale alignment metrics
alignment_metrics *= mask_positive
max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True)
max_ious_per_instance = (ious * mask_positive).max(axis=-1,
keepdim=True)
alignment_metrics = alignment_metrics / (
max_metrics_per_instance + self.eps) * max_ious_per_instance
alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1)
assigned_scores = assigned_scores * alignment_metrics
return assigned_labels, assigned_bboxes, assigned_scores

View File

@@ -0,0 +1,93 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register
from ppdet.modeling.bbox_utils import batch_bbox_overlaps
from ppdet.modeling.transformers import bbox_xyxy_to_cxcywh
__all__ = ['UniformAssigner']
def batch_p_dist(x, y, p=2):
"""
calculate pairwise p_dist, the first index of x and y are batch
return [x.shape[0], y.shape[0]]
"""
x = x.unsqueeze(1)
diff = x - y
return paddle.norm(diff, p=p, axis=list(range(2, diff.dim())))
@register
class UniformAssigner(nn.Layer):
def __init__(self, pos_ignore_thr, neg_ignore_thr, match_times=4):
super(UniformAssigner, self).__init__()
self.pos_ignore_thr = pos_ignore_thr
self.neg_ignore_thr = neg_ignore_thr
self.match_times = match_times
def forward(self, bbox_pred, anchor, gt_bboxes, gt_labels=None):
num_bboxes = bbox_pred.shape[0]
num_gts = gt_bboxes.shape[0]
match_labels = paddle.full([num_bboxes], -1, dtype=paddle.int32)
pred_ious = batch_bbox_overlaps(bbox_pred, gt_bboxes)
pred_max_iou = pred_ious.max(axis=1)
neg_ignore = pred_max_iou > self.neg_ignore_thr
# exclude potential ignored neg samples first, deal with pos samples later
#match_labels: -2(ignore), -1(neg) or >=0(pos_inds)
match_labels = paddle.where(neg_ignore,
paddle.full_like(match_labels, -2),
match_labels)
bbox_pred_c = bbox_xyxy_to_cxcywh(bbox_pred)
anchor_c = bbox_xyxy_to_cxcywh(anchor)
gt_bboxes_c = bbox_xyxy_to_cxcywh(gt_bboxes)
bbox_pred_dist = batch_p_dist(bbox_pred_c, gt_bboxes_c, p=1)
anchor_dist = batch_p_dist(anchor_c, gt_bboxes_c, p=1)
top_pred = bbox_pred_dist.topk(
k=self.match_times, axis=0, largest=False)[1]
top_anchor = anchor_dist.topk(
k=self.match_times, axis=0, largest=False)[1]
tar_pred = paddle.arange(num_gts).expand([self.match_times, num_gts])
tar_anchor = paddle.arange(num_gts).expand([self.match_times, num_gts])
pos_places = paddle.concat([top_pred, top_anchor]).reshape([-1])
pos_inds = paddle.concat([tar_pred, tar_anchor]).reshape([-1])
pos_anchor = anchor[pos_places]
pos_tar_bbox = gt_bboxes[pos_inds]
pos_ious = batch_bbox_overlaps(
pos_anchor, pos_tar_bbox, is_aligned=True)
pos_ignore = pos_ious < self.pos_ignore_thr
pos_inds = paddle.where(pos_ignore,
paddle.full_like(pos_inds, -2), pos_inds)
match_labels[pos_places] = pos_inds
match_labels.stop_gradient = True
pos_keep = ~pos_ignore
if pos_keep.sum() > 0:
pos_places_keep = pos_places[pos_keep]
pos_bbox_pred = bbox_pred[pos_places_keep].reshape([-1, 4])
pos_bbox_tar = pos_tar_bbox[pos_keep].reshape([-1, 4]).detach()
else:
pos_bbox_pred = None
pos_bbox_tar = None
return match_labels, pos_bbox_pred, pos_bbox_tar

View File

@@ -0,0 +1,230 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn.functional as F
__all__ = [
'pad_gt', 'gather_topk_anchors', 'check_points_inside_bboxes',
'compute_max_iou_anchor', 'compute_max_iou_gt',
'generate_anchors_for_grid_cell'
]
def pad_gt(gt_labels, gt_bboxes, gt_scores=None):
r""" Pad 0 in gt_labels and gt_bboxes.
Args:
gt_labels (Tensor|List[Tensor], int64): Label of gt_bboxes,
shape is [B, n, 1] or [[n_1, 1], [n_2, 1], ...], here n = sum(n_i)
gt_bboxes (Tensor|List[Tensor], float32): Ground truth bboxes,
shape is [B, n, 4] or [[n_1, 4], [n_2, 4], ...], here n = sum(n_i)
gt_scores (Tensor|List[Tensor]|None, float32): Score of gt_bboxes,
shape is [B, n, 1] or [[n_1, 4], [n_2, 4], ...], here n = sum(n_i)
Returns:
pad_gt_labels (Tensor, int64): shape[B, n, 1]
pad_gt_bboxes (Tensor, float32): shape[B, n, 4]
pad_gt_scores (Tensor, float32): shape[B, n, 1]
pad_gt_mask (Tensor, float32): shape[B, n, 1], 1 means bbox, 0 means no bbox
"""
if isinstance(gt_labels, paddle.Tensor) and isinstance(gt_bboxes,
paddle.Tensor):
assert gt_labels.ndim == gt_bboxes.ndim and \
gt_bboxes.ndim == 3
pad_gt_mask = (
gt_bboxes.sum(axis=-1, keepdim=True) > 0).astype(gt_bboxes.dtype)
if gt_scores is None:
gt_scores = pad_gt_mask.clone()
assert gt_labels.ndim == gt_scores.ndim
return gt_labels, gt_bboxes, gt_scores, pad_gt_mask
elif isinstance(gt_labels, list) and isinstance(gt_bboxes, list):
assert len(gt_labels) == len(gt_bboxes), \
'The number of `gt_labels` and `gt_bboxes` is not equal. '
num_max_boxes = max([len(a) for a in gt_bboxes])
batch_size = len(gt_bboxes)
# pad label and bbox
pad_gt_labels = paddle.zeros(
[batch_size, num_max_boxes, 1], dtype=gt_labels[0].dtype)
pad_gt_bboxes = paddle.zeros(
[batch_size, num_max_boxes, 4], dtype=gt_bboxes[0].dtype)
pad_gt_scores = paddle.zeros(
[batch_size, num_max_boxes, 1], dtype=gt_bboxes[0].dtype)
pad_gt_mask = paddle.zeros(
[batch_size, num_max_boxes, 1], dtype=gt_bboxes[0].dtype)
for i, (label, bbox) in enumerate(zip(gt_labels, gt_bboxes)):
if len(label) > 0 and len(bbox) > 0:
pad_gt_labels[i, :len(label)] = label
pad_gt_bboxes[i, :len(bbox)] = bbox
pad_gt_mask[i, :len(bbox)] = 1.
if gt_scores is not None:
pad_gt_scores[i, :len(gt_scores[i])] = gt_scores[i]
if gt_scores is None:
pad_gt_scores = pad_gt_mask.clone()
return pad_gt_labels, pad_gt_bboxes, pad_gt_scores, pad_gt_mask
else:
raise ValueError('The input `gt_labels` or `gt_bboxes` is invalid! ')
def gather_topk_anchors(metrics, topk, largest=True, topk_mask=None, eps=1e-9):
r"""
Args:
metrics (Tensor, float32): shape[B, n, L], n: num_gts, L: num_anchors
topk (int): The number of top elements to look for along the axis.
largest (bool) : largest is a flag, if set to true,
algorithm will sort by descending order, otherwise sort by
ascending order. Default: True
topk_mask (Tensor, float32): shape[B, n, 1], ignore bbox mask,
Default: None
eps (float): Default: 1e-9
Returns:
is_in_topk (Tensor, float32): shape[B, n, L], value=1. means selected
"""
num_anchors = metrics.shape[-1]
topk_metrics, topk_idxs = paddle.topk(
metrics, topk, axis=-1, largest=largest)
if topk_mask is None:
topk_mask = (
topk_metrics.max(axis=-1, keepdim=True) > eps).astype(metrics.dtype)
is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(
axis=-2).astype(metrics.dtype)
return is_in_topk * topk_mask
def check_points_inside_bboxes(points,
bboxes,
center_radius_tensor=None,
eps=1e-9,
sm_use=False):
r"""
Args:
points (Tensor, float32): shape[L, 2], "xy" format, L: num_anchors
bboxes (Tensor, float32): shape[B, n, 4], "xmin, ymin, xmax, ymax" format
center_radius_tensor (Tensor, float32): shape [L, 1]. Default: None.
eps (float): Default: 1e-9
Returns:
is_in_bboxes (Tensor, float32): shape[B, n, L], value=1. means selected
"""
points = points.unsqueeze([0, 1])
x, y = points.chunk(2, axis=-1)
xmin, ymin, xmax, ymax = bboxes.unsqueeze(2).chunk(4, axis=-1)
# check whether `points` is in `bboxes`
l = x - xmin
t = y - ymin
r = xmax - x
b = ymax - y
delta_ltrb = paddle.concat([l, t, r, b], axis=-1)
is_in_bboxes = (delta_ltrb.min(axis=-1) > eps)
if center_radius_tensor is not None:
# check whether `points` is in `center_radius`
center_radius_tensor = center_radius_tensor.unsqueeze([0, 1])
cx = (xmin + xmax) * 0.5
cy = (ymin + ymax) * 0.5
l = x - (cx - center_radius_tensor)
t = y - (cy - center_radius_tensor)
r = (cx + center_radius_tensor) - x
b = (cy + center_radius_tensor) - y
delta_ltrb_c = paddle.concat([l, t, r, b], axis=-1)
is_in_center = (delta_ltrb_c.min(axis=-1) > eps)
if sm_use:
return is_in_bboxes.astype(bboxes.dtype), is_in_center.astype(
bboxes.dtype)
else:
return (paddle.logical_and(is_in_bboxes, is_in_center),
paddle.logical_or(is_in_bboxes, is_in_center))
return is_in_bboxes.astype(bboxes.dtype)
def compute_max_iou_anchor(ious):
r"""
For each anchor, find the GT with the largest IOU.
Args:
ious (Tensor, float32): shape[B, n, L], n: num_gts, L: num_anchors
Returns:
is_max_iou (Tensor, float32): shape[B, n, L], value=1. means selected
"""
num_max_boxes = ious.shape[-2]
max_iou_index = ious.argmax(axis=-2)
is_max_iou = F.one_hot(max_iou_index, num_max_boxes).transpose([0, 2, 1])
return is_max_iou.astype(ious.dtype)
def compute_max_iou_gt(ious):
r"""
For each GT, find the anchor with the largest IOU.
Args:
ious (Tensor, float32): shape[B, n, L], n: num_gts, L: num_anchors
Returns:
is_max_iou (Tensor, float32): shape[B, n, L], value=1. means selected
"""
num_anchors = ious.shape[-1]
max_iou_index = ious.argmax(axis=-1)
is_max_iou = F.one_hot(max_iou_index, num_anchors)
return is_max_iou.astype(ious.dtype)
def generate_anchors_for_grid_cell(feats,
fpn_strides,
grid_cell_size=5.0,
grid_cell_offset=0.5,
dtype='float32'):
r"""
Like ATSS, generate anchors based on grid size.
Args:
feats (List[Tensor]): shape[s, (b, c, h, w)]
fpn_strides (tuple|list): shape[s], stride for each scale feature
grid_cell_size (float): anchor size
grid_cell_offset (float): The range is between 0 and 1.
Returns:
anchors (Tensor): shape[l, 4], "xmin, ymin, xmax, ymax" format.
anchor_points (Tensor): shape[l, 2], "x, y" format.
num_anchors_list (List[int]): shape[s], contains [s_1, s_2, ...].
stride_tensor (Tensor): shape[l, 1], contains the stride for each scale.
"""
assert len(feats) == len(fpn_strides)
anchors = []
anchor_points = []
num_anchors_list = []
stride_tensor = []
for feat, stride in zip(feats, fpn_strides):
_, _, h, w = feat.shape
cell_half_size = grid_cell_size * stride * 0.5
shift_x = (paddle.arange(end=w) + grid_cell_offset) * stride
shift_y = (paddle.arange(end=h) + grid_cell_offset) * stride
shift_y, shift_x = paddle.meshgrid(shift_y, shift_x)
anchor = paddle.stack(
[
shift_x - cell_half_size, shift_y - cell_half_size,
shift_x + cell_half_size, shift_y + cell_half_size
],
axis=-1).astype(dtype)
anchor_point = paddle.stack([shift_x, shift_y], axis=-1).astype(dtype)
anchors.append(anchor.reshape([-1, 4]))
anchor_points.append(anchor_point.reshape([-1, 2]))
num_anchors_list.append(len(anchors[-1]))
stride_tensor.append(
paddle.full(
[num_anchors_list[-1], 1], stride, dtype=dtype))
anchors = paddle.concat(anchors)
anchors.stop_gradient = True
anchor_points = paddle.concat(anchor_points)
anchor_points.stop_gradient = True
stride_tensor = paddle.concat(stride_tensor)
stride_tensor.stop_gradient = True
return anchors, anchor_points, num_anchors_list, stride_tensor