更换文档检测模型
This commit is contained in:
35
paddle_detection/ppdet/modeling/assigners/__init__.py
Normal file
35
paddle_detection/ppdet/modeling/assigners/__init__.py
Normal file
@@ -0,0 +1,35 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from . import utils
|
||||
from . import task_aligned_assigner
|
||||
from . import atss_assigner
|
||||
from . import simota_assigner
|
||||
from . import max_iou_assigner
|
||||
from . import fcosr_assigner
|
||||
from . import rotated_task_aligned_assigner
|
||||
from . import task_aligned_assigner_cr
|
||||
from . import uniform_assigner
|
||||
|
||||
from .utils import *
|
||||
from .task_aligned_assigner import *
|
||||
from .atss_assigner import *
|
||||
from .simota_assigner import *
|
||||
from .max_iou_assigner import *
|
||||
from .fcosr_assigner import *
|
||||
from .rotated_task_aligned_assigner import *
|
||||
from .task_aligned_assigner_cr import *
|
||||
from .uniform_assigner import *
|
||||
from .hungarian_assigner import *
|
||||
from .pose_utils import *
|
||||
225
paddle_detection/ppdet/modeling/assigners/atss_assigner.py
Normal file
225
paddle_detection/ppdet/modeling/assigners/atss_assigner.py
Normal file
@@ -0,0 +1,225 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
|
||||
from ppdet.core.workspace import register
|
||||
from ..bbox_utils import iou_similarity, batch_iou_similarity
|
||||
from ..bbox_utils import bbox_center
|
||||
from .utils import (check_points_inside_bboxes, compute_max_iou_anchor,
|
||||
compute_max_iou_gt)
|
||||
|
||||
__all__ = ['ATSSAssigner']
|
||||
|
||||
|
||||
@register
|
||||
class ATSSAssigner(nn.Layer):
|
||||
"""Bridging the Gap Between Anchor-based and Anchor-free Detection
|
||||
via Adaptive Training Sample Selection
|
||||
"""
|
||||
__shared__ = ['num_classes']
|
||||
|
||||
def __init__(self,
|
||||
topk=9,
|
||||
num_classes=80,
|
||||
force_gt_matching=False,
|
||||
eps=1e-9,
|
||||
sm_use=False):
|
||||
super(ATSSAssigner, self).__init__()
|
||||
self.topk = topk
|
||||
self.num_classes = num_classes
|
||||
self.force_gt_matching = force_gt_matching
|
||||
self.eps = eps
|
||||
self.sm_use = sm_use
|
||||
|
||||
def _gather_topk_pyramid(self, gt2anchor_distances, num_anchors_list,
|
||||
pad_gt_mask):
|
||||
gt2anchor_distances_list = paddle.split(
|
||||
gt2anchor_distances, num_anchors_list, axis=-1)
|
||||
num_anchors_index = np.cumsum(num_anchors_list).tolist()
|
||||
num_anchors_index = [0, ] + num_anchors_index[:-1]
|
||||
is_in_topk_list = []
|
||||
topk_idxs_list = []
|
||||
for distances, anchors_index in zip(gt2anchor_distances_list,
|
||||
num_anchors_index):
|
||||
num_anchors = distances.shape[-1]
|
||||
_, topk_idxs = paddle.topk(
|
||||
distances, self.topk, axis=-1, largest=False)
|
||||
topk_idxs_list.append(topk_idxs + anchors_index)
|
||||
is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(
|
||||
axis=-2).astype(gt2anchor_distances.dtype)
|
||||
is_in_topk_list.append(is_in_topk * pad_gt_mask)
|
||||
is_in_topk_list = paddle.concat(is_in_topk_list, axis=-1)
|
||||
topk_idxs_list = paddle.concat(topk_idxs_list, axis=-1)
|
||||
return is_in_topk_list, topk_idxs_list
|
||||
|
||||
@paddle.no_grad()
|
||||
def forward(self,
|
||||
anchor_bboxes,
|
||||
num_anchors_list,
|
||||
gt_labels,
|
||||
gt_bboxes,
|
||||
pad_gt_mask,
|
||||
bg_index,
|
||||
gt_scores=None,
|
||||
pred_bboxes=None):
|
||||
r"""This code is based on
|
||||
https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/atss_assigner.py
|
||||
|
||||
The assignment is done in following steps
|
||||
1. compute iou between all bbox (bbox of all pyramid levels) and gt
|
||||
2. compute center distance between all bbox and gt
|
||||
3. on each pyramid level, for each gt, select k bbox whose center
|
||||
are closest to the gt center, so we total select k*l bbox as
|
||||
candidates for each gt
|
||||
4. get corresponding iou for the these candidates, and compute the
|
||||
mean and std, set mean + std as the iou threshold
|
||||
5. select these candidates whose iou are greater than or equal to
|
||||
the threshold as positive
|
||||
6. limit the positive sample's center in gt
|
||||
7. if an anchor box is assigned to multiple gts, the one with the
|
||||
highest iou will be selected.
|
||||
Args:
|
||||
anchor_bboxes (Tensor, float32): pre-defined anchors, shape(L, 4),
|
||||
"xmin, xmax, ymin, ymax" format
|
||||
num_anchors_list (List): num of anchors in each level
|
||||
gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
|
||||
gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
|
||||
pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
|
||||
bg_index (int): background index
|
||||
gt_scores (Tensor|None, float32) Score of gt_bboxes,
|
||||
shape(B, n, 1), if None, then it will initialize with one_hot label
|
||||
pred_bboxes (Tensor, float32, optional): predicted bounding boxes, shape(B, L, 4)
|
||||
Returns:
|
||||
assigned_labels (Tensor): (B, L)
|
||||
assigned_bboxes (Tensor): (B, L, 4)
|
||||
assigned_scores (Tensor): (B, L, C), if pred_bboxes is not None, then output ious
|
||||
"""
|
||||
assert gt_labels.ndim == gt_bboxes.ndim and \
|
||||
gt_bboxes.ndim == 3
|
||||
|
||||
num_anchors, _ = anchor_bboxes.shape
|
||||
batch_size, num_max_boxes, _ = gt_bboxes.shape
|
||||
|
||||
# negative batch
|
||||
if num_max_boxes == 0:
|
||||
assigned_labels = paddle.full(
|
||||
[batch_size, num_anchors], bg_index, dtype='int32')
|
||||
assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
|
||||
assigned_scores = paddle.zeros(
|
||||
[batch_size, num_anchors, self.num_classes])
|
||||
return assigned_labels, assigned_bboxes, assigned_scores
|
||||
|
||||
# 1. compute iou between gt and anchor bbox, [B, n, L]
|
||||
ious = iou_similarity(gt_bboxes.reshape([-1, 4]), anchor_bboxes)
|
||||
ious = ious.reshape([batch_size, -1, num_anchors])
|
||||
|
||||
# 2. compute center distance between all anchors and gt, [B, n, L]
|
||||
gt_centers = bbox_center(gt_bboxes.reshape([-1, 4])).unsqueeze(1)
|
||||
anchor_centers = bbox_center(anchor_bboxes)
|
||||
gt2anchor_distances = (gt_centers - anchor_centers.unsqueeze(0)) \
|
||||
.norm(2, axis=-1).reshape([batch_size, -1, num_anchors])
|
||||
|
||||
# 3. on each pyramid level, selecting topk closest candidates
|
||||
# based on the center distance, [B, n, L]
|
||||
is_in_topk, topk_idxs = self._gather_topk_pyramid(
|
||||
gt2anchor_distances, num_anchors_list, pad_gt_mask)
|
||||
|
||||
# 4. get corresponding iou for the these candidates, and compute the
|
||||
# mean and std, 5. set mean + std as the iou threshold
|
||||
iou_candidates = ious * is_in_topk
|
||||
iou_threshold = paddle.index_sample(
|
||||
iou_candidates.flatten(stop_axis=-2),
|
||||
topk_idxs.flatten(stop_axis=-2))
|
||||
iou_threshold = iou_threshold.reshape([batch_size, num_max_boxes, -1])
|
||||
iou_threshold = iou_threshold.mean(axis=-1, keepdim=True) + \
|
||||
iou_threshold.std(axis=-1, keepdim=True)
|
||||
is_in_topk = paddle.where(iou_candidates > iou_threshold, is_in_topk,
|
||||
paddle.zeros_like(is_in_topk))
|
||||
|
||||
# 6. check the positive sample's center in gt, [B, n, L]
|
||||
if self.sm_use:
|
||||
is_in_gts = check_points_inside_bboxes(
|
||||
anchor_centers, gt_bboxes, sm_use=True)
|
||||
else:
|
||||
is_in_gts = check_points_inside_bboxes(anchor_centers, gt_bboxes)
|
||||
|
||||
# select positive sample, [B, n, L]
|
||||
mask_positive = is_in_topk * is_in_gts * pad_gt_mask
|
||||
|
||||
# 7. if an anchor box is assigned to multiple gts,
|
||||
# the one with the highest iou will be selected.
|
||||
mask_positive_sum = mask_positive.sum(axis=-2)
|
||||
if mask_positive_sum.max() > 1:
|
||||
mask_multiple_gts = (
|
||||
mask_positive_sum.unsqueeze(1) > 1).astype('int32').tile(
|
||||
[1, num_max_boxes, 1]).astype('bool')
|
||||
if self.sm_use:
|
||||
is_max_iou = compute_max_iou_anchor(ious * mask_positive)
|
||||
else:
|
||||
is_max_iou = compute_max_iou_anchor(ious)
|
||||
mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
|
||||
mask_positive)
|
||||
mask_positive_sum = mask_positive.sum(axis=-2)
|
||||
# 8. make sure every gt_bbox matches the anchor
|
||||
if self.force_gt_matching:
|
||||
is_max_iou = compute_max_iou_gt(ious) * pad_gt_mask
|
||||
mask_max_iou = (is_max_iou.sum(-2, keepdim=True) == 1).tile(
|
||||
[1, num_max_boxes, 1])
|
||||
mask_positive = paddle.where(mask_max_iou, is_max_iou,
|
||||
mask_positive)
|
||||
mask_positive_sum = mask_positive.sum(axis=-2)
|
||||
assigned_gt_index = mask_positive.argmax(axis=-2)
|
||||
|
||||
# assigned target
|
||||
batch_ind = paddle.arange(
|
||||
end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
|
||||
assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
|
||||
assigned_labels = paddle.gather(
|
||||
gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
|
||||
assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
|
||||
assigned_labels = paddle.where(
|
||||
mask_positive_sum > 0, assigned_labels,
|
||||
paddle.full_like(assigned_labels, bg_index))
|
||||
|
||||
assigned_bboxes = paddle.gather(
|
||||
gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
|
||||
assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
|
||||
|
||||
assigned_scores = F.one_hot(assigned_labels, self.num_classes + 1)
|
||||
ind = list(range(self.num_classes + 1))
|
||||
ind.remove(bg_index)
|
||||
assigned_scores = paddle.index_select(
|
||||
assigned_scores, paddle.to_tensor(ind), axis=-1)
|
||||
if pred_bboxes is not None:
|
||||
# assigned iou
|
||||
ious = batch_iou_similarity(gt_bboxes, pred_bboxes) * mask_positive
|
||||
ious = ious.max(axis=-2).unsqueeze(-1)
|
||||
assigned_scores *= ious
|
||||
elif gt_scores is not None:
|
||||
gather_scores = paddle.gather(
|
||||
gt_scores.flatten(), assigned_gt_index.flatten(), axis=0)
|
||||
gather_scores = gather_scores.reshape([batch_size, num_anchors])
|
||||
gather_scores = paddle.where(mask_positive_sum > 0, gather_scores,
|
||||
paddle.zeros_like(gather_scores))
|
||||
assigned_scores *= gather_scores.unsqueeze(-1)
|
||||
|
||||
return assigned_labels, assigned_bboxes, assigned_scores
|
||||
147
paddle_detection/ppdet/modeling/assigners/clrnet_assigner.py
Normal file
147
paddle_detection/ppdet/modeling/assigners/clrnet_assigner.py
Normal file
@@ -0,0 +1,147 @@
|
||||
import paddle
|
||||
import paddle.nn.functional as F
|
||||
from ppdet.modeling.losses.clrnet_line_iou_loss import line_iou
|
||||
|
||||
|
||||
def distance_cost(predictions, targets, img_w):
|
||||
"""
|
||||
repeat predictions and targets to generate all combinations
|
||||
use the abs distance as the new distance cost
|
||||
"""
|
||||
num_priors = predictions.shape[0]
|
||||
num_targets = targets.shape[0]
|
||||
predictions = paddle.repeat_interleave(
|
||||
predictions, num_targets, axis=0)[..., 6:]
|
||||
targets = paddle.concat(x=num_priors * [targets])[..., 6:]
|
||||
invalid_masks = (targets < 0) | (targets >= img_w)
|
||||
lengths = (~invalid_masks).sum(axis=1)
|
||||
distances = paddle.abs(x=targets - predictions)
|
||||
distances[invalid_masks] = 0.0
|
||||
distances = distances.sum(axis=1) / (lengths.cast("float32") + 1e-09)
|
||||
distances = distances.reshape([num_priors, num_targets])
|
||||
return distances
|
||||
|
||||
|
||||
def focal_cost(cls_pred, gt_labels, alpha=0.25, gamma=2, eps=1e-12):
|
||||
"""
|
||||
Args:
|
||||
cls_pred (Tensor): Predicted classification logits, shape
|
||||
[num_query, num_class].
|
||||
gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
|
||||
|
||||
Returns:
|
||||
torch.Tensor: cls_cost value
|
||||
"""
|
||||
cls_pred = F.sigmoid(cls_pred)
|
||||
neg_cost = -(1 - cls_pred + eps).log() * (1 - alpha) * cls_pred.pow(gamma)
|
||||
pos_cost = -(cls_pred + eps).log() * alpha * (1 - cls_pred).pow(gamma)
|
||||
cls_cost = pos_cost.index_select(
|
||||
gt_labels, axis=1) - neg_cost.index_select(
|
||||
gt_labels, axis=1)
|
||||
return cls_cost
|
||||
|
||||
|
||||
def dynamic_k_assign(cost, pair_wise_ious):
|
||||
"""
|
||||
Assign grouth truths with priors dynamically.
|
||||
|
||||
Args:
|
||||
cost: the assign cost.
|
||||
pair_wise_ious: iou of grouth truth and priors.
|
||||
|
||||
Returns:
|
||||
prior_idx: the index of assigned prior.
|
||||
gt_idx: the corresponding ground truth index.
|
||||
"""
|
||||
matching_matrix = paddle.zeros_like(cost)
|
||||
ious_matrix = pair_wise_ious
|
||||
ious_matrix[ious_matrix < 0] = 0.0
|
||||
n_candidate_k = 4
|
||||
topk_ious, _ = paddle.topk(ious_matrix, n_candidate_k, axis=0)
|
||||
dynamic_ks = paddle.clip(x=topk_ious.sum(0).cast("int32"), min=1)
|
||||
num_gt = cost.shape[1]
|
||||
|
||||
for gt_idx in range(num_gt):
|
||||
_, pos_idx = paddle.topk(
|
||||
x=cost[:, gt_idx], k=dynamic_ks[gt_idx].item(), largest=False)
|
||||
matching_matrix[pos_idx, gt_idx] = 1.0
|
||||
del topk_ious, dynamic_ks, pos_idx
|
||||
matched_gt = matching_matrix.sum(axis=1)
|
||||
|
||||
if (matched_gt > 1).sum() > 0:
|
||||
matched_gt_indices = paddle.nonzero(matched_gt > 1)[:, 0]
|
||||
cost_argmin = paddle.argmin(
|
||||
cost.index_select(matched_gt_indices), axis=1)
|
||||
matching_matrix[matched_gt_indices][0] *= 0.0
|
||||
matching_matrix[matched_gt_indices, cost_argmin] = 1.0
|
||||
|
||||
prior_idx = matching_matrix.sum(axis=1).nonzero()
|
||||
gt_idx = matching_matrix[prior_idx].argmax(axis=-1)
|
||||
return prior_idx.flatten(), gt_idx.flatten()
|
||||
|
||||
|
||||
def cdist_paddle(x1, x2, p=2):
|
||||
assert x1.shape[1] == x2.shape[1]
|
||||
B, M = x1.shape
|
||||
# if p == np.inf:
|
||||
# dist = np.max(np.abs(x1[:, np.newaxis, :] - x2[np.newaxis, :, :]), axis=-1)
|
||||
if p == 1:
|
||||
dist = paddle.sum(
|
||||
paddle.abs(x1.unsqueeze(axis=1) - x2.unsqueeze(axis=0)), axis=-1)
|
||||
else:
|
||||
dist = paddle.pow(paddle.sum(paddle.pow(
|
||||
paddle.abs(x1.unsqueeze(axis=1) - x2.unsqueeze(axis=0)), p),
|
||||
axis=-1),
|
||||
1 / p)
|
||||
return dist
|
||||
|
||||
|
||||
def assign(predictions,
|
||||
targets,
|
||||
img_w,
|
||||
img_h,
|
||||
distance_cost_weight=3.0,
|
||||
cls_cost_weight=1.0):
|
||||
"""
|
||||
computes dynamicly matching based on the cost, including cls cost and lane similarity cost
|
||||
Args:
|
||||
predictions (Tensor): predictions predicted by each stage, shape: (num_priors, 78)
|
||||
targets (Tensor): lane targets, shape: (num_targets, 78)
|
||||
return:
|
||||
matched_row_inds (Tensor): matched predictions, shape: (num_targets)
|
||||
matched_col_inds (Tensor): matched targets, shape: (num_targets)
|
||||
"""
|
||||
predictions = predictions.detach().clone()
|
||||
predictions[:, 3] *= img_w - 1
|
||||
predictions[:, 6:] *= img_w - 1
|
||||
|
||||
targets = targets.detach().clone()
|
||||
distances_score = distance_cost(predictions, targets, img_w)
|
||||
distances_score = 1 - distances_score / paddle.max(x=distances_score) + 0.01
|
||||
|
||||
cls_score = focal_cost(predictions[:, :2], targets[:, 1].cast('int64'))
|
||||
|
||||
num_priors = predictions.shape[0]
|
||||
num_targets = targets.shape[0]
|
||||
target_start_xys = targets[:, 2:4]
|
||||
target_start_xys[..., 0] *= (img_h - 1)
|
||||
prediction_start_xys = predictions[:, 2:4]
|
||||
prediction_start_xys[..., 0] *= (img_h - 1)
|
||||
start_xys_score = cdist_paddle(
|
||||
prediction_start_xys, target_start_xys,
|
||||
p=2).reshape([num_priors, num_targets])
|
||||
|
||||
start_xys_score = 1 - start_xys_score / paddle.max(x=start_xys_score) + 0.01
|
||||
|
||||
target_thetas = targets[:, 4].unsqueeze(axis=-1)
|
||||
theta_score = cdist_paddle(
|
||||
predictions[:, 4].unsqueeze(axis=-1), target_thetas,
|
||||
p=1).reshape([num_priors, num_targets]) * 180
|
||||
theta_score = 1 - theta_score / paddle.max(x=theta_score) + 0.01
|
||||
|
||||
cost = -(distances_score * start_xys_score * theta_score
|
||||
)**2 * distance_cost_weight + cls_score * cls_cost_weight
|
||||
iou = line_iou(predictions[..., 6:], targets[..., 6:], img_w, aligned=False)
|
||||
|
||||
matched_row_inds, matched_col_inds = dynamic_k_assign(cost, iou)
|
||||
return matched_row_inds, matched_col_inds
|
||||
227
paddle_detection/ppdet/modeling/assigners/fcosr_assigner.py
Normal file
227
paddle_detection/ppdet/modeling/assigners/fcosr_assigner.py
Normal file
@@ -0,0 +1,227 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
|
||||
from ppdet.core.workspace import register
|
||||
from ppdet.modeling.rbox_utils import box2corners, check_points_in_polys, paddle_gather
|
||||
|
||||
__all__ = ['FCOSRAssigner']
|
||||
|
||||
EPS = 1e-9
|
||||
|
||||
|
||||
@register
|
||||
class FCOSRAssigner(nn.Layer):
|
||||
""" FCOSR Assigner, refer to https://arxiv.org/abs/2111.10780 for details
|
||||
|
||||
1. compute normalized gaussian distribution score and refined gaussian distribution score
|
||||
2. refer to ellipse center sampling, sample points whose normalized gaussian distribution score is greater than threshold
|
||||
3. refer to multi-level sampling, assign ground truth to feature map which follows two conditions.
|
||||
i). first, the ratio between the short edge of the target and the stride of the feature map is less than 2.
|
||||
ii). second, the long edge of minimum bounding rectangle of the target is larger than the acceptance range of feature map
|
||||
4. refer to fuzzy sample label assignment, the points satisfying 2 and 3 will be assigned to the ground truth according to gaussian distribution score
|
||||
"""
|
||||
__shared__ = ['num_classes']
|
||||
|
||||
def __init__(self,
|
||||
num_classes=80,
|
||||
factor=12,
|
||||
threshold=0.23,
|
||||
boundary=[[-1, 128], [128, 320], [320, 10000]],
|
||||
score_type='iou'):
|
||||
super(FCOSRAssigner, self).__init__()
|
||||
self.num_classes = num_classes
|
||||
self.factor = factor
|
||||
self.threshold = threshold
|
||||
self.boundary = [
|
||||
paddle.to_tensor(
|
||||
l, dtype=paddle.float32).reshape([1, 1, 2]) for l in boundary
|
||||
]
|
||||
self.score_type = score_type
|
||||
|
||||
def get_gaussian_distribution_score(self, points, gt_rboxes, gt_polys):
|
||||
# projecting points to coordinate system defined by each rbox
|
||||
# [B, N, 4, 2] -> 4 * [B, N, 1, 2]
|
||||
a, b, c, d = gt_polys.split(4, axis=2)
|
||||
# [1, L, 2] -> [1, 1, L, 2]
|
||||
points = points.unsqueeze(0)
|
||||
ab = b - a
|
||||
ad = d - a
|
||||
# [B, N, 5] -> [B, N, 2], [B, N, 2], [B, N, 1]
|
||||
xy, wh, angle = gt_rboxes.split([2, 2, 1], axis=-1)
|
||||
# [B, N, 2] -> [B, N, 1, 2]
|
||||
xy = xy.unsqueeze(2)
|
||||
# vector of points to center [B, N, L, 2]
|
||||
vec = points - xy
|
||||
# <ab, vec> = |ab| * |vec| * cos(theta) [B, N, L]
|
||||
vec_dot_ab = paddle.sum(vec * ab, axis=-1)
|
||||
# <ad, vec> = |ad| * |vec| * cos(theta) [B, N, L]
|
||||
vec_dot_ad = paddle.sum(vec * ad, axis=-1)
|
||||
# norm_ab [B, N, L]
|
||||
norm_ab = paddle.sum(ab * ab, axis=-1).sqrt()
|
||||
# norm_ad [B, N, L]
|
||||
norm_ad = paddle.sum(ad * ad, axis=-1).sqrt()
|
||||
# min(h, w), [B, N, 1]
|
||||
min_edge = paddle.min(wh, axis=-1, keepdim=True)
|
||||
# delta_x, delta_y [B, N, L]
|
||||
delta_x = vec_dot_ab.pow(2) / (norm_ab.pow(3) * min_edge + EPS)
|
||||
delta_y = vec_dot_ad.pow(2) / (norm_ad.pow(3) * min_edge + EPS)
|
||||
# score [B, N, L]
|
||||
norm_score = paddle.exp(-0.5 * self.factor * (delta_x + delta_y))
|
||||
|
||||
# simplified calculation
|
||||
sigma = min_edge / self.factor
|
||||
refined_score = norm_score / (2 * np.pi * sigma + EPS)
|
||||
return norm_score, refined_score
|
||||
|
||||
def get_rotated_inside_mask(self, points, gt_polys, scores):
|
||||
inside_mask = check_points_in_polys(points, gt_polys)
|
||||
center_mask = scores >= self.threshold
|
||||
return (inside_mask & center_mask).cast(paddle.float32)
|
||||
|
||||
def get_inside_range_mask(self, points, gt_bboxes, gt_rboxes, stride_tensor,
|
||||
regress_range):
|
||||
# [1, L, 2] -> [1, 1, L, 2]
|
||||
points = points.unsqueeze(0)
|
||||
# [B, n, 4] -> [B, n, 1, 4]
|
||||
x1y1, x2y2 = gt_bboxes.unsqueeze(2).split(2, axis=-1)
|
||||
# [B, n, L, 2]
|
||||
lt = points - x1y1
|
||||
rb = x2y2 - points
|
||||
# [B, n, L, 4]
|
||||
ltrb = paddle.concat([lt, rb], axis=-1)
|
||||
# [B, n, L, 4] -> [B, n, L]
|
||||
inside_mask = paddle.min(ltrb, axis=-1) > EPS
|
||||
# regress_range [1, L, 2] -> [1, 1, L, 2]
|
||||
regress_range = regress_range.unsqueeze(0)
|
||||
# stride_tensor [1, L, 1] -> [1, 1, L]
|
||||
stride_tensor = stride_tensor.transpose((0, 2, 1))
|
||||
# fcos range
|
||||
# [B, n, L, 4] -> [B, n, L]
|
||||
ltrb_max = paddle.max(ltrb, axis=-1)
|
||||
# [1, 1, L, 2] -> [1, 1, L]
|
||||
low, high = regress_range[..., 0], regress_range[..., 1]
|
||||
# [B, n, L]
|
||||
regress_mask = (ltrb_max >= low) & (ltrb_max <= high)
|
||||
# mask for rotated
|
||||
# [B, n, 1]
|
||||
min_edge = paddle.min(gt_rboxes[..., 2:4], axis=-1, keepdim=True)
|
||||
# [B, n , L]
|
||||
rotated_mask = ((min_edge / stride_tensor) < 2.0) & (ltrb_max > high)
|
||||
mask = inside_mask & (regress_mask | rotated_mask)
|
||||
return mask.cast(paddle.float32)
|
||||
|
||||
@paddle.no_grad()
|
||||
def forward(self,
|
||||
anchor_points,
|
||||
stride_tensor,
|
||||
num_anchors_list,
|
||||
gt_labels,
|
||||
gt_bboxes,
|
||||
gt_rboxes,
|
||||
pad_gt_mask,
|
||||
bg_index,
|
||||
pred_rboxes=None):
|
||||
r"""
|
||||
|
||||
Args:
|
||||
anchor_points (Tensor, float32): pre-defined anchor points, shape(1, L, 2),
|
||||
"x, y" format
|
||||
stride_tensor (Tensor, float32): stride tensor, shape (1, L, 1)
|
||||
num_anchors_list (List): num of anchors in each level
|
||||
gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
|
||||
gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
|
||||
gt_rboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 5)
|
||||
pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
|
||||
bg_index (int): background index
|
||||
pred_rboxes (Tensor, float32, optional): predicted bounding boxes, shape(B, L, 5)
|
||||
Returns:
|
||||
assigned_labels (Tensor): (B, L)
|
||||
assigned_rboxes (Tensor): (B, L, 5)
|
||||
assigned_scores (Tensor): (B, L, C), if pred_rboxes is not None, then output ious
|
||||
"""
|
||||
|
||||
_, num_anchors, _ = anchor_points.shape
|
||||
batch_size, num_max_boxes, _ = gt_rboxes.shape
|
||||
if num_max_boxes == 0:
|
||||
assigned_labels = paddle.full(
|
||||
[batch_size, num_anchors], bg_index, dtype=gt_labels.dtype)
|
||||
assigned_rboxes = paddle.zeros([batch_size, num_anchors, 5])
|
||||
assigned_scores = paddle.zeros(
|
||||
[batch_size, num_anchors, self.num_classes])
|
||||
return assigned_labels, assigned_rboxes, assigned_scores
|
||||
|
||||
# get normalized gaussian distribution score and refined distribution score
|
||||
gt_polys = box2corners(gt_rboxes)
|
||||
score, refined_score = self.get_gaussian_distribution_score(
|
||||
anchor_points, gt_rboxes, gt_polys)
|
||||
inside_mask = self.get_rotated_inside_mask(anchor_points, gt_polys,
|
||||
score)
|
||||
regress_ranges = []
|
||||
for num, bound in zip(num_anchors_list, self.boundary):
|
||||
regress_ranges.append(bound.tile((1, num, 1)))
|
||||
regress_ranges = paddle.concat(regress_ranges, axis=1)
|
||||
regress_mask = self.get_inside_range_mask(
|
||||
anchor_points, gt_bboxes, gt_rboxes, stride_tensor, regress_ranges)
|
||||
# [B, n, L]
|
||||
mask_positive = inside_mask * regress_mask * pad_gt_mask
|
||||
refined_score = refined_score * mask_positive - (1. - mask_positive)
|
||||
|
||||
argmax_refined_score = refined_score.argmax(axis=-2)
|
||||
max_refined_score = refined_score.max(axis=-2)
|
||||
assigned_gt_index = argmax_refined_score
|
||||
|
||||
# assigned target
|
||||
batch_ind = paddle.arange(
|
||||
end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
|
||||
assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
|
||||
assigned_labels = paddle.gather(
|
||||
gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
|
||||
assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
|
||||
assigned_labels = paddle.where(
|
||||
max_refined_score > 0, assigned_labels,
|
||||
paddle.full_like(assigned_labels, bg_index))
|
||||
|
||||
assigned_rboxes = paddle.gather(
|
||||
gt_rboxes.reshape([-1, 5]), assigned_gt_index.flatten(), axis=0)
|
||||
assigned_rboxes = assigned_rboxes.reshape([batch_size, num_anchors, 5])
|
||||
|
||||
assigned_scores = F.one_hot(assigned_labels, self.num_classes + 1)
|
||||
ind = list(range(self.num_classes + 1))
|
||||
ind.remove(bg_index)
|
||||
assigned_scores = paddle.index_select(
|
||||
assigned_scores, paddle.to_tensor(ind), axis=-1)
|
||||
|
||||
if self.score_type == 'gaussian':
|
||||
selected_scores = paddle_gather(
|
||||
score, 1, argmax_refined_score.unsqueeze(-2)).squeeze(-2)
|
||||
assigned_scores = assigned_scores * selected_scores.unsqueeze(-1)
|
||||
elif self.score_type == 'iou':
|
||||
assert pred_rboxes is not None, 'If score type is iou, pred_rboxes should not be None'
|
||||
from ext_op import matched_rbox_iou
|
||||
b, l = pred_rboxes.shape[:2]
|
||||
iou_score = matched_rbox_iou(
|
||||
pred_rboxes.reshape((-1, 5)), assigned_rboxes.reshape(
|
||||
(-1, 5))).reshape((b, l, 1))
|
||||
assigned_scores = assigned_scores * iou_score
|
||||
|
||||
return assigned_labels, assigned_rboxes, assigned_scores
|
||||
316
paddle_detection/ppdet/modeling/assigners/hungarian_assigner.py
Normal file
316
paddle_detection/ppdet/modeling/assigners/hungarian_assigner.py
Normal file
@@ -0,0 +1,316 @@
|
||||
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
try:
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
except ImportError:
|
||||
linear_sum_assignment = None
|
||||
|
||||
import paddle
|
||||
|
||||
from ppdet.core.workspace import register
|
||||
|
||||
__all__ = ['PoseHungarianAssigner', 'PseudoSampler']
|
||||
|
||||
|
||||
class AssignResult:
|
||||
"""Stores assignments between predicted and truth boxes.
|
||||
|
||||
Attributes:
|
||||
num_gts (int): the number of truth boxes considered when computing this
|
||||
assignment
|
||||
|
||||
gt_inds (LongTensor): for each predicted box indicates the 1-based
|
||||
index of the assigned truth box. 0 means unassigned and -1 means
|
||||
ignore.
|
||||
|
||||
max_overlaps (FloatTensor): the iou between the predicted box and its
|
||||
assigned truth box.
|
||||
|
||||
labels (None | LongTensor): If specified, for each predicted box
|
||||
indicates the category label of the assigned truth box.
|
||||
"""
|
||||
|
||||
def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
|
||||
self.num_gts = num_gts
|
||||
self.gt_inds = gt_inds
|
||||
self.max_overlaps = max_overlaps
|
||||
self.labels = labels
|
||||
# Interface for possible user-defined properties
|
||||
self._extra_properties = {}
|
||||
|
||||
@property
|
||||
def num_preds(self):
|
||||
"""int: the number of predictions in this assignment"""
|
||||
return len(self.gt_inds)
|
||||
|
||||
def set_extra_property(self, key, value):
|
||||
"""Set user-defined new property."""
|
||||
assert key not in self.info
|
||||
self._extra_properties[key] = value
|
||||
|
||||
def get_extra_property(self, key):
|
||||
"""Get user-defined property."""
|
||||
return self._extra_properties.get(key, None)
|
||||
|
||||
@property
|
||||
def info(self):
|
||||
"""dict: a dictionary of info about the object"""
|
||||
basic_info = {
|
||||
'num_gts': self.num_gts,
|
||||
'num_preds': self.num_preds,
|
||||
'gt_inds': self.gt_inds,
|
||||
'max_overlaps': self.max_overlaps,
|
||||
'labels': self.labels,
|
||||
}
|
||||
basic_info.update(self._extra_properties)
|
||||
return basic_info
|
||||
|
||||
|
||||
@register
|
||||
class PoseHungarianAssigner:
|
||||
"""Computes one-to-one matching between predictions and ground truth.
|
||||
|
||||
This class computes an assignment between the targets and the predictions
|
||||
based on the costs. The costs are weighted sum of three components:
|
||||
classification cost, regression L1 cost and regression oks cost. The
|
||||
targets don't include the no_object, so generally there are more
|
||||
predictions than targets. After the one-to-one matching, the un-matched
|
||||
are treated as backgrounds. Thus each query prediction will be assigned
|
||||
with `0` or a positive integer indicating the ground truth index:
|
||||
|
||||
- 0: negative sample, no assigned gt.
|
||||
- positive integer: positive sample, index (1-based) of assigned gt.
|
||||
|
||||
Args:
|
||||
cls_weight (int | float, optional): The scale factor for classification
|
||||
cost. Default 1.0.
|
||||
kpt_weight (int | float, optional): The scale factor for regression
|
||||
L1 cost. Default 1.0.
|
||||
oks_weight (int | float, optional): The scale factor for regression
|
||||
oks cost. Default 1.0.
|
||||
"""
|
||||
__inject__ = ['cls_cost', 'kpt_cost', 'oks_cost']
|
||||
|
||||
def __init__(self,
|
||||
cls_cost='ClassificationCost',
|
||||
kpt_cost='KptL1Cost',
|
||||
oks_cost='OksCost'):
|
||||
self.cls_cost = cls_cost
|
||||
self.kpt_cost = kpt_cost
|
||||
self.oks_cost = oks_cost
|
||||
|
||||
def assign(self,
|
||||
cls_pred,
|
||||
kpt_pred,
|
||||
gt_labels,
|
||||
gt_keypoints,
|
||||
gt_areas,
|
||||
img_meta,
|
||||
eps=1e-7):
|
||||
"""Computes one-to-one matching based on the weighted costs.
|
||||
|
||||
This method assign each query prediction to a ground truth or
|
||||
background. The `assigned_gt_inds` with -1 means don't care,
|
||||
0 means negative sample, and positive number is the index (1-based)
|
||||
of assigned gt.
|
||||
The assignment is done in the following steps, the order matters.
|
||||
|
||||
1. assign every prediction to -1
|
||||
2. compute the weighted costs
|
||||
3. do Hungarian matching on CPU based on the costs
|
||||
4. assign all to 0 (background) first, then for each matched pair
|
||||
between predictions and gts, treat this prediction as foreground
|
||||
and assign the corresponding gt index (plus 1) to it.
|
||||
|
||||
Args:
|
||||
cls_pred (Tensor): Predicted classification logits, shape
|
||||
[num_query, num_class].
|
||||
kpt_pred (Tensor): Predicted keypoints with normalized coordinates
|
||||
(x_{i}, y_{i}), which are all in range [0, 1]. Shape
|
||||
[num_query, K*2].
|
||||
gt_labels (Tensor): Label of `gt_keypoints`, shape (num_gt,).
|
||||
gt_keypoints (Tensor): Ground truth keypoints with unnormalized
|
||||
coordinates [p^{1}_x, p^{1}_y, p^{1}_v, ..., \
|
||||
p^{K}_x, p^{K}_y, p^{K}_v]. Shape [num_gt, K*3].
|
||||
gt_areas (Tensor): Ground truth mask areas, shape (num_gt,).
|
||||
img_meta (dict): Meta information for current image.
|
||||
eps (int | float, optional): A value added to the denominator for
|
||||
numerical stability. Default 1e-7.
|
||||
|
||||
Returns:
|
||||
:obj:`AssignResult`: The assigned result.
|
||||
"""
|
||||
num_gts, num_kpts = gt_keypoints.shape[0], kpt_pred.shape[0]
|
||||
if not gt_keypoints.astype('bool').any():
|
||||
num_gts = 0
|
||||
|
||||
# 1. assign -1 by default
|
||||
assigned_gt_inds = paddle.full((num_kpts, ), -1, dtype="int64")
|
||||
assigned_labels = paddle.full((num_kpts, ), -1, dtype="int64")
|
||||
if num_gts == 0 or num_kpts == 0:
|
||||
# No ground truth or keypoints, return empty assignment
|
||||
if num_gts == 0:
|
||||
# No ground truth, assign all to background
|
||||
assigned_gt_inds[:] = 0
|
||||
return AssignResult(
|
||||
num_gts, assigned_gt_inds, None, labels=assigned_labels)
|
||||
img_h, img_w, _ = img_meta['img_shape']
|
||||
factor = paddle.to_tensor(
|
||||
[img_w, img_h, img_w, img_h], dtype=gt_keypoints.dtype).reshape(
|
||||
(1, -1))
|
||||
|
||||
# 2. compute the weighted costs
|
||||
# classification cost
|
||||
cls_cost = self.cls_cost(cls_pred, gt_labels)
|
||||
|
||||
# keypoint regression L1 cost
|
||||
gt_keypoints_reshape = gt_keypoints.reshape((gt_keypoints.shape[0], -1,
|
||||
3))
|
||||
valid_kpt_flag = gt_keypoints_reshape[..., -1]
|
||||
kpt_pred_tmp = kpt_pred.clone().detach().reshape((kpt_pred.shape[0], -1,
|
||||
2))
|
||||
normalize_gt_keypoints = gt_keypoints_reshape[
|
||||
..., :2] / factor[:, :2].unsqueeze(0)
|
||||
kpt_cost = self.kpt_cost(kpt_pred_tmp, normalize_gt_keypoints,
|
||||
valid_kpt_flag)
|
||||
# keypoint OKS cost
|
||||
kpt_pred_tmp = kpt_pred.clone().detach().reshape((kpt_pred.shape[0], -1,
|
||||
2))
|
||||
kpt_pred_tmp = kpt_pred_tmp * factor[:, :2].unsqueeze(0)
|
||||
oks_cost = self.oks_cost(kpt_pred_tmp, gt_keypoints_reshape[..., :2],
|
||||
valid_kpt_flag, gt_areas)
|
||||
# weighted sum of above three costs
|
||||
cost = cls_cost + kpt_cost + oks_cost
|
||||
|
||||
# 3. do Hungarian matching on CPU using linear_sum_assignment
|
||||
cost = cost.detach().cpu()
|
||||
if linear_sum_assignment is None:
|
||||
raise ImportError('Please run "pip install scipy" '
|
||||
'to install scipy first.')
|
||||
matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
|
||||
matched_row_inds = paddle.to_tensor(matched_row_inds)
|
||||
matched_col_inds = paddle.to_tensor(matched_col_inds)
|
||||
|
||||
# 4. assign backgrounds and foregrounds
|
||||
# assign all indices to backgrounds first
|
||||
assigned_gt_inds[:] = 0
|
||||
# assign foregrounds based on matching results
|
||||
assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
|
||||
assigned_labels[matched_row_inds] = gt_labels[matched_col_inds][
|
||||
..., 0].astype("int64")
|
||||
return AssignResult(
|
||||
num_gts, assigned_gt_inds, None, labels=assigned_labels)
|
||||
|
||||
|
||||
class SamplingResult:
|
||||
"""Bbox sampling result.
|
||||
"""
|
||||
|
||||
def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
|
||||
gt_flags):
|
||||
self.pos_inds = pos_inds
|
||||
self.neg_inds = neg_inds
|
||||
if pos_inds.size > 0:
|
||||
self.pos_bboxes = bboxes[pos_inds]
|
||||
self.neg_bboxes = bboxes[neg_inds]
|
||||
self.pos_is_gt = gt_flags[pos_inds]
|
||||
|
||||
self.num_gts = gt_bboxes.shape[0]
|
||||
self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
|
||||
|
||||
if gt_bboxes.numel() == 0:
|
||||
# hack for index error case
|
||||
assert self.pos_assigned_gt_inds.numel() == 0
|
||||
self.pos_gt_bboxes = paddle.zeros(
|
||||
gt_bboxes.shape, dtype=gt_bboxes.dtype).reshape((-1, 4))
|
||||
else:
|
||||
if len(gt_bboxes.shape) < 2:
|
||||
gt_bboxes = gt_bboxes.reshape((-1, 4))
|
||||
|
||||
self.pos_gt_bboxes = paddle.index_select(
|
||||
gt_bboxes,
|
||||
self.pos_assigned_gt_inds.astype('int64'),
|
||||
axis=0)
|
||||
|
||||
if assign_result.labels is not None:
|
||||
self.pos_gt_labels = assign_result.labels[pos_inds]
|
||||
else:
|
||||
self.pos_gt_labels = None
|
||||
|
||||
@property
|
||||
def bboxes(self):
|
||||
"""paddle.Tensor: concatenated positive and negative boxes"""
|
||||
return paddle.concat([self.pos_bboxes, self.neg_bboxes])
|
||||
|
||||
def __nice__(self):
|
||||
data = self.info.copy()
|
||||
data['pos_bboxes'] = data.pop('pos_bboxes').shape
|
||||
data['neg_bboxes'] = data.pop('neg_bboxes').shape
|
||||
parts = [f"'{k}': {v!r}" for k, v in sorted(data.items())]
|
||||
body = ' ' + ',\n '.join(parts)
|
||||
return '{\n' + body + '\n}'
|
||||
|
||||
@property
|
||||
def info(self):
|
||||
"""Returns a dictionary of info about the object."""
|
||||
return {
|
||||
'pos_inds': self.pos_inds,
|
||||
'neg_inds': self.neg_inds,
|
||||
'pos_bboxes': self.pos_bboxes,
|
||||
'neg_bboxes': self.neg_bboxes,
|
||||
'pos_is_gt': self.pos_is_gt,
|
||||
'num_gts': self.num_gts,
|
||||
'pos_assigned_gt_inds': self.pos_assigned_gt_inds,
|
||||
}
|
||||
|
||||
|
||||
@register
|
||||
class PseudoSampler:
|
||||
"""A pseudo sampler that does not do sampling actually."""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
pass
|
||||
|
||||
def _sample_pos(self, **kwargs):
|
||||
"""Sample positive samples."""
|
||||
raise NotImplementedError
|
||||
|
||||
def _sample_neg(self, **kwargs):
|
||||
"""Sample negative samples."""
|
||||
raise NotImplementedError
|
||||
|
||||
def sample(self, assign_result, bboxes, gt_bboxes, *args, **kwargs):
|
||||
"""Directly returns the positive and negative indices of samples.
|
||||
|
||||
Args:
|
||||
assign_result (:obj:`AssignResult`): Assigned results
|
||||
bboxes (paddle.Tensor): Bounding boxes
|
||||
gt_bboxes (paddle.Tensor): Ground truth boxes
|
||||
|
||||
Returns:
|
||||
:obj:`SamplingResult`: sampler results
|
||||
"""
|
||||
pos_inds = paddle.nonzero(
|
||||
assign_result.gt_inds > 0, as_tuple=False).squeeze(-1)
|
||||
neg_inds = paddle.nonzero(
|
||||
assign_result.gt_inds == 0, as_tuple=False).squeeze(-1)
|
||||
gt_flags = paddle.zeros([bboxes.shape[0]], dtype='int32')
|
||||
sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
|
||||
assign_result, gt_flags)
|
||||
return sampling_result
|
||||
@@ -0,0 +1,52 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from ppdet.core.workspace import register
|
||||
from ppdet.modeling.proposal_generator.target import label_box
|
||||
|
||||
__all__ = ['MaxIoUAssigner']
|
||||
|
||||
@register
|
||||
class MaxIoUAssigner(object):
|
||||
"""a standard bbox assigner based on max IoU, use ppdet's label_box
|
||||
as backend.
|
||||
Args:
|
||||
positive_overlap (float): threshold for defining positive samples
|
||||
negative_overlap (float): threshold for denining negative samples
|
||||
allow_low_quality (bool): whether to lower IoU thr if a GT poorly
|
||||
overlaps with candidate bboxes
|
||||
"""
|
||||
def __init__(self,
|
||||
positive_overlap,
|
||||
negative_overlap,
|
||||
allow_low_quality=True):
|
||||
self.positive_overlap = positive_overlap
|
||||
self.negative_overlap = negative_overlap
|
||||
self.allow_low_quality = allow_low_quality
|
||||
|
||||
def __call__(self, bboxes, gt_bboxes):
|
||||
matches, match_labels = label_box(
|
||||
bboxes,
|
||||
gt_bboxes,
|
||||
positive_overlap=self.positive_overlap,
|
||||
negative_overlap=self.negative_overlap,
|
||||
allow_low_quality=self.allow_low_quality,
|
||||
ignore_thresh=-1,
|
||||
is_crowd=None,
|
||||
assign_on_cpu=False)
|
||||
return matches, match_labels
|
||||
275
paddle_detection/ppdet/modeling/assigners/pose_utils.py
Normal file
275
paddle_detection/ppdet/modeling/assigners/pose_utils.py
Normal file
@@ -0,0 +1,275 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
import paddle.nn.functional as F
|
||||
|
||||
from ppdet.core.workspace import register
|
||||
|
||||
__all__ = ['KptL1Cost', 'OksCost', 'ClassificationCost']
|
||||
|
||||
|
||||
def masked_fill(x, mask, value):
|
||||
y = paddle.full(x.shape, value, x.dtype)
|
||||
return paddle.where(mask, y, x)
|
||||
|
||||
|
||||
@register
|
||||
class KptL1Cost(object):
|
||||
"""KptL1Cost.
|
||||
|
||||
this function based on: https://github.com/hikvision-research/opera/blob/main/opera/core/bbox/match_costs/match_cost.py
|
||||
|
||||
Args:
|
||||
weight (int | float, optional): loss_weight.
|
||||
"""
|
||||
|
||||
def __init__(self, weight=1.0):
|
||||
self.weight = weight
|
||||
|
||||
def __call__(self, kpt_pred, gt_keypoints, valid_kpt_flag):
|
||||
"""
|
||||
Args:
|
||||
kpt_pred (Tensor): Predicted keypoints with normalized coordinates
|
||||
(x_{i}, y_{i}), which are all in range [0, 1]. Shape
|
||||
[num_query, K, 2].
|
||||
gt_keypoints (Tensor): Ground truth keypoints with normalized
|
||||
coordinates (x_{i}, y_{i}). Shape [num_gt, K, 2].
|
||||
valid_kpt_flag (Tensor): valid flag of ground truth keypoints.
|
||||
Shape [num_gt, K].
|
||||
|
||||
Returns:
|
||||
paddle.Tensor: kpt_cost value with weight.
|
||||
"""
|
||||
kpt_cost = []
|
||||
for i in range(len(gt_keypoints)):
|
||||
if gt_keypoints[i].size == 0:
|
||||
kpt_cost.append(kpt_pred.sum() * 0)
|
||||
kpt_pred_tmp = kpt_pred.clone()
|
||||
valid_flag = valid_kpt_flag[i] > 0
|
||||
valid_flag_expand = valid_flag.unsqueeze(0).unsqueeze(-1).expand_as(
|
||||
kpt_pred_tmp)
|
||||
if not valid_flag_expand.all():
|
||||
kpt_pred_tmp = masked_fill(kpt_pred_tmp, ~valid_flag_expand, 0)
|
||||
cost = F.pairwise_distance(
|
||||
kpt_pred_tmp.reshape((kpt_pred_tmp.shape[0], -1)),
|
||||
gt_keypoints[i].reshape((-1, )).unsqueeze(0),
|
||||
p=1,
|
||||
keepdim=True)
|
||||
avg_factor = paddle.clip(
|
||||
valid_flag.astype('float32').sum() * 2, 1.0)
|
||||
cost = cost / avg_factor
|
||||
kpt_cost.append(cost)
|
||||
kpt_cost = paddle.concat(kpt_cost, axis=1)
|
||||
return kpt_cost * self.weight
|
||||
|
||||
|
||||
@register
|
||||
class OksCost(object):
|
||||
"""OksCost.
|
||||
|
||||
this function based on: https://github.com/hikvision-research/opera/blob/main/opera/core/bbox/match_costs/match_cost.py
|
||||
|
||||
Args:
|
||||
num_keypoints (int): number of keypoints
|
||||
weight (int | float, optional): loss_weight.
|
||||
"""
|
||||
|
||||
def __init__(self, num_keypoints=17, weight=1.0):
|
||||
self.weight = weight
|
||||
if num_keypoints == 17:
|
||||
self.sigmas = np.array(
|
||||
[
|
||||
.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07,
|
||||
1.07, .87, .87, .89, .89
|
||||
],
|
||||
dtype=np.float32) / 10.0
|
||||
elif num_keypoints == 14:
|
||||
self.sigmas = np.array(
|
||||
[
|
||||
.79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89,
|
||||
.89, .79, .79
|
||||
],
|
||||
dtype=np.float32) / 10.0
|
||||
else:
|
||||
raise ValueError(f'Unsupported keypoints number {num_keypoints}')
|
||||
|
||||
def __call__(self, kpt_pred, gt_keypoints, valid_kpt_flag, gt_areas):
|
||||
"""
|
||||
Args:
|
||||
kpt_pred (Tensor): Predicted keypoints with unnormalized
|
||||
coordinates (x_{i}, y_{i}). Shape [num_query, K, 2].
|
||||
gt_keypoints (Tensor): Ground truth keypoints with unnormalized
|
||||
coordinates (x_{i}, y_{i}). Shape [num_gt, K, 2].
|
||||
valid_kpt_flag (Tensor): valid flag of ground truth keypoints.
|
||||
Shape [num_gt, K].
|
||||
gt_areas (Tensor): Ground truth mask areas. Shape [num_gt,].
|
||||
|
||||
Returns:
|
||||
paddle.Tensor: oks_cost value with weight.
|
||||
"""
|
||||
sigmas = paddle.to_tensor(self.sigmas)
|
||||
variances = (sigmas * 2)**2
|
||||
|
||||
oks_cost = []
|
||||
assert len(gt_keypoints) == len(gt_areas)
|
||||
for i in range(len(gt_keypoints)):
|
||||
if gt_keypoints[i].size == 0:
|
||||
oks_cost.append(kpt_pred.sum() * 0)
|
||||
squared_distance = \
|
||||
(kpt_pred[:, :, 0] - gt_keypoints[i, :, 0].unsqueeze(0)) ** 2 + \
|
||||
(kpt_pred[:, :, 1] - gt_keypoints[i, :, 1].unsqueeze(0)) ** 2
|
||||
vis_flag = (valid_kpt_flag[i] > 0).astype('int')
|
||||
vis_ind = vis_flag.nonzero(as_tuple=False)[:, 0]
|
||||
num_vis_kpt = vis_ind.shape[0]
|
||||
# assert num_vis_kpt > 0
|
||||
if num_vis_kpt == 0:
|
||||
oks_cost.append(paddle.zeros((squared_distance.shape[0], 1)))
|
||||
continue
|
||||
area = gt_areas[i]
|
||||
|
||||
squared_distance0 = squared_distance / (area * variances * 2)
|
||||
squared_distance0 = paddle.index_select(
|
||||
squared_distance0, vis_ind, axis=1)
|
||||
squared_distance1 = paddle.exp(-squared_distance0).sum(axis=1,
|
||||
keepdim=True)
|
||||
oks = squared_distance1 / num_vis_kpt
|
||||
# The 1 is a constant that doesn't change the matching, so omitted.
|
||||
oks_cost.append(-oks)
|
||||
oks_cost = paddle.concat(oks_cost, axis=1)
|
||||
return oks_cost * self.weight
|
||||
|
||||
|
||||
@register
|
||||
class ClassificationCost:
|
||||
"""ClsSoftmaxCost.
|
||||
|
||||
Args:
|
||||
weight (int | float, optional): loss_weight
|
||||
"""
|
||||
|
||||
def __init__(self, weight=1.):
|
||||
self.weight = weight
|
||||
|
||||
def __call__(self, cls_pred, gt_labels):
|
||||
"""
|
||||
Args:
|
||||
cls_pred (Tensor): Predicted classification logits, shape
|
||||
(num_query, num_class).
|
||||
gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
|
||||
|
||||
Returns:
|
||||
paddle.Tensor: cls_cost value with weight
|
||||
"""
|
||||
# Following the official DETR repo, contrary to the loss that
|
||||
# NLL is used, we approximate it in 1 - cls_score[gt_label].
|
||||
# The 1 is a constant that doesn't change the matching,
|
||||
# so it can be omitted.
|
||||
cls_score = cls_pred.softmax(-1)
|
||||
cls_cost = -cls_score[:, gt_labels]
|
||||
return cls_cost * self.weight
|
||||
|
||||
|
||||
@register
|
||||
class FocalLossCost:
|
||||
"""FocalLossCost.
|
||||
|
||||
Args:
|
||||
weight (int | float, optional): loss_weight
|
||||
alpha (int | float, optional): focal_loss alpha
|
||||
gamma (int | float, optional): focal_loss gamma
|
||||
eps (float, optional): default 1e-12
|
||||
binary_input (bool, optional): Whether the input is binary,
|
||||
default False.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
weight=1.,
|
||||
alpha=0.25,
|
||||
gamma=2,
|
||||
eps=1e-12,
|
||||
binary_input=False):
|
||||
self.weight = weight
|
||||
self.alpha = alpha
|
||||
self.gamma = gamma
|
||||
self.eps = eps
|
||||
self.binary_input = binary_input
|
||||
|
||||
def _focal_loss_cost(self, cls_pred, gt_labels):
|
||||
"""
|
||||
Args:
|
||||
cls_pred (Tensor): Predicted classification logits, shape
|
||||
(num_query, num_class).
|
||||
gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
|
||||
|
||||
Returns:
|
||||
paddle.Tensor: cls_cost value with weight
|
||||
"""
|
||||
if gt_labels.size == 0:
|
||||
return cls_pred.sum() * 0
|
||||
cls_pred = F.sigmoid(cls_pred)
|
||||
neg_cost = -(1 - cls_pred + self.eps).log() * (
|
||||
1 - self.alpha) * cls_pred.pow(self.gamma)
|
||||
pos_cost = -(cls_pred + self.eps).log() * self.alpha * (
|
||||
1 - cls_pred).pow(self.gamma)
|
||||
|
||||
cls_cost = paddle.index_select(
|
||||
pos_cost, gt_labels, axis=1) - paddle.index_select(
|
||||
neg_cost, gt_labels, axis=1)
|
||||
return cls_cost * self.weight
|
||||
|
||||
def _mask_focal_loss_cost(self, cls_pred, gt_labels):
|
||||
"""
|
||||
Args:
|
||||
cls_pred (Tensor): Predicted classfication logits
|
||||
in shape (num_query, d1, ..., dn), dtype=paddle.float32.
|
||||
gt_labels (Tensor): Ground truth in shape (num_gt, d1, ..., dn),
|
||||
dtype=paddle.long. Labels should be binary.
|
||||
|
||||
Returns:
|
||||
Tensor: Focal cost matrix with weight in shape\
|
||||
(num_query, num_gt).
|
||||
"""
|
||||
cls_pred = cls_pred.flatten(1)
|
||||
gt_labels = gt_labels.flatten(1).float()
|
||||
n = cls_pred.shape[1]
|
||||
cls_pred = F.sigmoid(cls_pred)
|
||||
neg_cost = -(1 - cls_pred + self.eps).log() * (
|
||||
1 - self.alpha) * cls_pred.pow(self.gamma)
|
||||
pos_cost = -(cls_pred + self.eps).log() * self.alpha * (
|
||||
1 - cls_pred).pow(self.gamma)
|
||||
|
||||
cls_cost = paddle.einsum('nc,mc->nm', pos_cost, gt_labels) + \
|
||||
paddle.einsum('nc,mc->nm', neg_cost, (1 - gt_labels))
|
||||
return cls_cost / n * self.weight
|
||||
|
||||
def __call__(self, cls_pred, gt_labels):
|
||||
"""
|
||||
Args:
|
||||
cls_pred (Tensor): Predicted classfication logits.
|
||||
gt_labels (Tensor)): Labels.
|
||||
|
||||
Returns:
|
||||
Tensor: Focal cost matrix with weight in shape\
|
||||
(num_query, num_gt).
|
||||
"""
|
||||
if self.binary_input:
|
||||
return self._mask_focal_loss_cost(cls_pred, gt_labels)
|
||||
else:
|
||||
return self._focal_loss_cost(cls_pred, gt_labels)
|
||||
@@ -0,0 +1,164 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
|
||||
from ppdet.core.workspace import register
|
||||
from ..rbox_utils import rotated_iou_similarity, check_points_in_rotated_boxes
|
||||
from .utils import gather_topk_anchors, compute_max_iou_anchor
|
||||
|
||||
__all__ = ['RotatedTaskAlignedAssigner']
|
||||
|
||||
|
||||
@register
|
||||
class RotatedTaskAlignedAssigner(nn.Layer):
|
||||
"""TOOD: Task-aligned One-stage Object Detection
|
||||
"""
|
||||
|
||||
def __init__(self, topk=13, alpha=1.0, beta=6.0, eps=1e-9):
|
||||
super(RotatedTaskAlignedAssigner, self).__init__()
|
||||
self.topk = topk
|
||||
self.alpha = alpha
|
||||
self.beta = beta
|
||||
self.eps = eps
|
||||
|
||||
@paddle.no_grad()
|
||||
def forward(self,
|
||||
pred_scores,
|
||||
pred_bboxes,
|
||||
anchor_points,
|
||||
num_anchors_list,
|
||||
gt_labels,
|
||||
gt_bboxes,
|
||||
pad_gt_mask,
|
||||
bg_index,
|
||||
gt_scores=None):
|
||||
r"""This code is based on
|
||||
https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py
|
||||
|
||||
The assignment is done in following steps
|
||||
1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt
|
||||
2. select top-k bbox as candidates for each gt
|
||||
3. limit the positive sample's center in gt (because the anchor-free detector
|
||||
only can predict positive distance)
|
||||
4. if an anchor box is assigned to multiple gts, the one with the
|
||||
highest iou will be selected.
|
||||
Args:
|
||||
pred_scores (Tensor, float32): predicted class probability, shape(B, L, C)
|
||||
pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 5)
|
||||
anchor_points (Tensor, float32): pre-defined anchors, shape(1, L, 2), "cxcy" format
|
||||
num_anchors_list (List): num of anchors in each level, shape(L)
|
||||
gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
|
||||
gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 5)
|
||||
pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
|
||||
bg_index (int): background index
|
||||
gt_scores (Tensor|None, float32) Score of gt_bboxes, shape(B, n, 1)
|
||||
Returns:
|
||||
assigned_labels (Tensor): (B, L)
|
||||
assigned_bboxes (Tensor): (B, L, 5)
|
||||
assigned_scores (Tensor): (B, L, C)
|
||||
"""
|
||||
assert pred_scores.ndim == pred_bboxes.ndim
|
||||
assert gt_labels.ndim == gt_bboxes.ndim and \
|
||||
gt_bboxes.ndim == 3
|
||||
|
||||
batch_size, num_anchors, num_classes = pred_scores.shape
|
||||
_, num_max_boxes, _ = gt_bboxes.shape
|
||||
|
||||
# negative batch
|
||||
if num_max_boxes == 0:
|
||||
assigned_labels = paddle.full(
|
||||
[batch_size, num_anchors], bg_index, dtype=gt_labels.dtype)
|
||||
assigned_bboxes = paddle.zeros([batch_size, num_anchors, 5])
|
||||
assigned_scores = paddle.zeros(
|
||||
[batch_size, num_anchors, num_classes])
|
||||
return assigned_labels, assigned_bboxes, assigned_scores
|
||||
|
||||
# compute iou between gt and pred bbox, [B, n, L]
|
||||
ious = rotated_iou_similarity(gt_bboxes, pred_bboxes)
|
||||
ious = paddle.where(ious > 1 + self.eps, paddle.zeros_like(ious), ious)
|
||||
ious.stop_gradient = True
|
||||
# gather pred bboxes class score
|
||||
pred_scores = pred_scores.transpose([0, 2, 1])
|
||||
batch_ind = paddle.arange(
|
||||
end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
|
||||
gt_labels_ind = paddle.stack(
|
||||
[batch_ind.tile([1, num_max_boxes]), gt_labels.squeeze(-1)],
|
||||
axis=-1)
|
||||
bbox_cls_scores = paddle.gather_nd(pred_scores, gt_labels_ind)
|
||||
# compute alignment metrics, [B, n, L]
|
||||
alignment_metrics = bbox_cls_scores.pow(self.alpha) * ious.pow(
|
||||
self.beta)
|
||||
|
||||
# check the positive sample's center in gt, [B, n, L]
|
||||
is_in_gts = check_points_in_rotated_boxes(anchor_points, gt_bboxes)
|
||||
|
||||
# select topk largest alignment metrics pred bbox as candidates
|
||||
# for each gt, [B, n, L]
|
||||
is_in_topk = gather_topk_anchors(
|
||||
alignment_metrics * is_in_gts, self.topk, topk_mask=pad_gt_mask)
|
||||
|
||||
# select positive sample, [B, n, L]
|
||||
mask_positive = is_in_topk * is_in_gts * pad_gt_mask
|
||||
|
||||
# if an anchor box is assigned to multiple gts,
|
||||
# the one with the highest iou will be selected, [B, n, L]
|
||||
mask_positive_sum = mask_positive.sum(axis=-2)
|
||||
if mask_positive_sum.max() > 1:
|
||||
mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile(
|
||||
[1, num_max_boxes, 1])
|
||||
is_max_iou = compute_max_iou_anchor(ious)
|
||||
mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
|
||||
mask_positive)
|
||||
mask_positive_sum = mask_positive.sum(axis=-2)
|
||||
assigned_gt_index = mask_positive.argmax(axis=-2)
|
||||
|
||||
# assigned target
|
||||
assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
|
||||
assigned_labels = paddle.gather(
|
||||
gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
|
||||
assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
|
||||
assigned_labels = paddle.where(
|
||||
mask_positive_sum > 0, assigned_labels,
|
||||
paddle.full_like(assigned_labels, bg_index))
|
||||
|
||||
assigned_bboxes = paddle.gather(
|
||||
gt_bboxes.reshape([-1, 5]), assigned_gt_index.flatten(), axis=0)
|
||||
assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 5])
|
||||
|
||||
assigned_scores = F.one_hot(assigned_labels, num_classes + 1)
|
||||
ind = list(range(num_classes + 1))
|
||||
ind.remove(bg_index)
|
||||
assigned_scores = paddle.index_select(
|
||||
assigned_scores, paddle.to_tensor(ind), axis=-1)
|
||||
# rescale alignment metrics
|
||||
alignment_metrics *= mask_positive
|
||||
max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True)
|
||||
max_ious_per_instance = (ious * mask_positive).max(axis=-1,
|
||||
keepdim=True)
|
||||
alignment_metrics = alignment_metrics / (
|
||||
max_metrics_per_instance + self.eps) * max_ious_per_instance
|
||||
alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1)
|
||||
assigned_scores = assigned_scores * alignment_metrics
|
||||
|
||||
assigned_bboxes.stop_gradient = True
|
||||
assigned_scores.stop_gradient = True
|
||||
assigned_labels.stop_gradient = True
|
||||
return assigned_labels, assigned_bboxes, assigned_scores
|
||||
265
paddle_detection/ppdet/modeling/assigners/simota_assigner.py
Normal file
265
paddle_detection/ppdet/modeling/assigners/simota_assigner.py
Normal file
@@ -0,0 +1,265 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# The code is based on:
|
||||
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/bbox/assigners/sim_ota_assigner.py
|
||||
|
||||
import paddle
|
||||
import numpy as np
|
||||
import paddle.nn.functional as F
|
||||
|
||||
from ppdet.modeling.losses.varifocal_loss import varifocal_loss
|
||||
from ppdet.modeling.bbox_utils import batch_bbox_overlaps
|
||||
from ppdet.core.workspace import register
|
||||
|
||||
|
||||
@register
|
||||
class SimOTAAssigner(object):
|
||||
"""Computes matching between predictions and ground truth.
|
||||
Args:
|
||||
center_radius (int | float, optional): Ground truth center size
|
||||
to judge whether a prior is in center. Default 2.5.
|
||||
candidate_topk (int, optional): The candidate top-k which used to
|
||||
get top-k ious to calculate dynamic-k. Default 10.
|
||||
iou_weight (int | float, optional): The scale factor for regression
|
||||
iou cost. Default 3.0.
|
||||
cls_weight (int | float, optional): The scale factor for classification
|
||||
cost. Default 1.0.
|
||||
num_classes (int): The num_classes of dataset.
|
||||
use_vfl (int): Whether to use varifocal_loss when calculating the cost matrix.
|
||||
"""
|
||||
__shared__ = ['num_classes']
|
||||
|
||||
def __init__(self,
|
||||
center_radius=2.5,
|
||||
candidate_topk=10,
|
||||
iou_weight=3.0,
|
||||
cls_weight=1.0,
|
||||
num_classes=80,
|
||||
use_vfl=True):
|
||||
self.center_radius = center_radius
|
||||
self.candidate_topk = candidate_topk
|
||||
self.iou_weight = iou_weight
|
||||
self.cls_weight = cls_weight
|
||||
self.num_classes = num_classes
|
||||
self.use_vfl = use_vfl
|
||||
|
||||
def get_in_gt_and_in_center_info(self, flatten_center_and_stride,
|
||||
gt_bboxes):
|
||||
num_gt = gt_bboxes.shape[0]
|
||||
|
||||
flatten_x = flatten_center_and_stride[:, 0].unsqueeze(1).tile(
|
||||
[1, num_gt])
|
||||
flatten_y = flatten_center_and_stride[:, 1].unsqueeze(1).tile(
|
||||
[1, num_gt])
|
||||
flatten_stride_x = flatten_center_and_stride[:, 2].unsqueeze(1).tile(
|
||||
[1, num_gt])
|
||||
flatten_stride_y = flatten_center_and_stride[:, 3].unsqueeze(1).tile(
|
||||
[1, num_gt])
|
||||
|
||||
# is prior centers in gt bboxes, shape: [n_center, n_gt]
|
||||
l_ = flatten_x - gt_bboxes[:, 0]
|
||||
t_ = flatten_y - gt_bboxes[:, 1]
|
||||
r_ = gt_bboxes[:, 2] - flatten_x
|
||||
b_ = gt_bboxes[:, 3] - flatten_y
|
||||
|
||||
deltas = paddle.stack([l_, t_, r_, b_], axis=1)
|
||||
is_in_gts = deltas.min(axis=1) > 0
|
||||
is_in_gts_all = is_in_gts.sum(axis=1) > 0
|
||||
|
||||
# is prior centers in gt centers
|
||||
gt_center_xs = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
|
||||
gt_center_ys = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
|
||||
ct_bound_l = gt_center_xs - self.center_radius * flatten_stride_x
|
||||
ct_bound_t = gt_center_ys - self.center_radius * flatten_stride_y
|
||||
ct_bound_r = gt_center_xs + self.center_radius * flatten_stride_x
|
||||
ct_bound_b = gt_center_ys + self.center_radius * flatten_stride_y
|
||||
|
||||
cl_ = flatten_x - ct_bound_l
|
||||
ct_ = flatten_y - ct_bound_t
|
||||
cr_ = ct_bound_r - flatten_x
|
||||
cb_ = ct_bound_b - flatten_y
|
||||
|
||||
ct_deltas = paddle.stack([cl_, ct_, cr_, cb_], axis=1)
|
||||
is_in_cts = ct_deltas.min(axis=1) > 0
|
||||
is_in_cts_all = is_in_cts.sum(axis=1) > 0
|
||||
|
||||
# in any of gts or gt centers, shape: [n_center]
|
||||
is_in_gts_or_centers_all = paddle.logical_or(is_in_gts_all,
|
||||
is_in_cts_all)
|
||||
|
||||
is_in_gts_or_centers_all_inds = paddle.nonzero(
|
||||
is_in_gts_or_centers_all).squeeze(1)
|
||||
|
||||
# both in gts and gt centers, shape: [num_fg, num_gt]
|
||||
is_in_gts_and_centers = paddle.logical_and(
|
||||
paddle.gather(
|
||||
is_in_gts.cast('int'), is_in_gts_or_centers_all_inds,
|
||||
axis=0).cast('bool'),
|
||||
paddle.gather(
|
||||
is_in_cts.cast('int'), is_in_gts_or_centers_all_inds,
|
||||
axis=0).cast('bool'))
|
||||
return is_in_gts_or_centers_all, is_in_gts_or_centers_all_inds, is_in_gts_and_centers
|
||||
|
||||
def dynamic_k_matching(self, cost_matrix, pairwise_ious, num_gt):
|
||||
match_matrix = np.zeros_like(cost_matrix.numpy())
|
||||
# select candidate topk ious for dynamic-k calculation
|
||||
topk_ious, _ = paddle.topk(
|
||||
pairwise_ious,
|
||||
min(self.candidate_topk, pairwise_ious.shape[0]),
|
||||
axis=0)
|
||||
# calculate dynamic k for each gt
|
||||
dynamic_ks = paddle.clip(topk_ious.sum(0).cast('int'), min=1)
|
||||
for gt_idx in range(num_gt):
|
||||
_, pos_idx = paddle.topk(
|
||||
cost_matrix[:, gt_idx], k=dynamic_ks[gt_idx], largest=False)
|
||||
match_matrix[:, gt_idx][pos_idx.numpy()] = 1.0
|
||||
|
||||
del topk_ious, dynamic_ks, pos_idx
|
||||
|
||||
# match points more than two gts
|
||||
extra_match_gts_mask = match_matrix.sum(1) > 1
|
||||
if extra_match_gts_mask.sum() > 0:
|
||||
cost_matrix = cost_matrix.numpy()
|
||||
cost_argmin = np.argmin(
|
||||
cost_matrix[extra_match_gts_mask, :], axis=1)
|
||||
match_matrix[extra_match_gts_mask, :] *= 0.0
|
||||
match_matrix[extra_match_gts_mask, cost_argmin] = 1.0
|
||||
# get foreground mask
|
||||
match_fg_mask_inmatrix = match_matrix.sum(1) > 0
|
||||
match_gt_inds_to_fg = match_matrix[match_fg_mask_inmatrix, :].argmax(1)
|
||||
|
||||
return match_gt_inds_to_fg, match_fg_mask_inmatrix
|
||||
|
||||
def get_sample(self, assign_gt_inds, gt_bboxes):
|
||||
pos_inds = np.unique(np.nonzero(assign_gt_inds > 0)[0])
|
||||
neg_inds = np.unique(np.nonzero(assign_gt_inds == 0)[0])
|
||||
pos_assigned_gt_inds = assign_gt_inds[pos_inds] - 1
|
||||
|
||||
if gt_bboxes.size == 0:
|
||||
# hack for index error case
|
||||
assert pos_assigned_gt_inds.size == 0
|
||||
pos_gt_bboxes = np.empty_like(gt_bboxes).reshape(-1, 4)
|
||||
else:
|
||||
if len(gt_bboxes.shape) < 2:
|
||||
gt_bboxes = gt_bboxes.resize(-1, 4)
|
||||
pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :]
|
||||
return pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds
|
||||
|
||||
def __call__(self,
|
||||
flatten_cls_pred_scores,
|
||||
flatten_center_and_stride,
|
||||
flatten_bboxes,
|
||||
gt_bboxes,
|
||||
gt_labels,
|
||||
eps=1e-7):
|
||||
"""Assign gt to priors using SimOTA.
|
||||
TODO: add comment.
|
||||
Returns:
|
||||
assign_result: The assigned result.
|
||||
"""
|
||||
num_gt = gt_bboxes.shape[0]
|
||||
num_bboxes = flatten_bboxes.shape[0]
|
||||
|
||||
if num_gt == 0 or num_bboxes == 0:
|
||||
# No ground truth or boxes
|
||||
label = np.ones([num_bboxes], dtype=np.int64) * self.num_classes
|
||||
label_weight = np.ones([num_bboxes], dtype=np.float32)
|
||||
bbox_target = np.zeros_like(flatten_center_and_stride)
|
||||
return 0, label, label_weight, bbox_target
|
||||
|
||||
is_in_gts_or_centers_all, is_in_gts_or_centers_all_inds, is_in_boxes_and_center = self.get_in_gt_and_in_center_info(
|
||||
flatten_center_and_stride, gt_bboxes)
|
||||
|
||||
# bboxes and scores to calculate matrix
|
||||
valid_flatten_bboxes = flatten_bboxes[is_in_gts_or_centers_all_inds]
|
||||
valid_cls_pred_scores = flatten_cls_pred_scores[
|
||||
is_in_gts_or_centers_all_inds]
|
||||
num_valid_bboxes = valid_flatten_bboxes.shape[0]
|
||||
|
||||
pairwise_ious = batch_bbox_overlaps(valid_flatten_bboxes,
|
||||
gt_bboxes) # [num_points,num_gts]
|
||||
if self.use_vfl:
|
||||
gt_vfl_labels = gt_labels.squeeze(-1).unsqueeze(0).tile(
|
||||
[num_valid_bboxes, 1]).reshape([-1])
|
||||
valid_pred_scores = valid_cls_pred_scores.unsqueeze(1).tile(
|
||||
[1, num_gt, 1]).reshape([-1, self.num_classes])
|
||||
vfl_score = np.zeros(valid_pred_scores.shape)
|
||||
vfl_score[np.arange(0, vfl_score.shape[0]), gt_vfl_labels.numpy(
|
||||
)] = pairwise_ious.reshape([-1])
|
||||
vfl_score = paddle.to_tensor(vfl_score)
|
||||
losses_vfl = varifocal_loss(
|
||||
valid_pred_scores, vfl_score,
|
||||
use_sigmoid=False).reshape([num_valid_bboxes, num_gt])
|
||||
losses_giou = batch_bbox_overlaps(
|
||||
valid_flatten_bboxes, gt_bboxes, mode='giou')
|
||||
cost_matrix = (
|
||||
losses_vfl * self.cls_weight + losses_giou * self.iou_weight +
|
||||
paddle.logical_not(is_in_boxes_and_center).cast('float32') *
|
||||
100000000)
|
||||
else:
|
||||
iou_cost = -paddle.log(pairwise_ious + eps)
|
||||
gt_onehot_label = (F.one_hot(
|
||||
gt_labels.squeeze(-1).cast(paddle.int64),
|
||||
flatten_cls_pred_scores.shape[-1]).cast('float32').unsqueeze(0)
|
||||
.tile([num_valid_bboxes, 1, 1]))
|
||||
|
||||
valid_pred_scores = valid_cls_pred_scores.unsqueeze(1).tile(
|
||||
[1, num_gt, 1])
|
||||
cls_cost = F.binary_cross_entropy(
|
||||
valid_pred_scores, gt_onehot_label, reduction='none').sum(-1)
|
||||
|
||||
cost_matrix = (
|
||||
cls_cost * self.cls_weight + iou_cost * self.iou_weight +
|
||||
paddle.logical_not(is_in_boxes_and_center).cast('float32') *
|
||||
100000000)
|
||||
|
||||
match_gt_inds_to_fg, match_fg_mask_inmatrix = \
|
||||
self.dynamic_k_matching(
|
||||
cost_matrix, pairwise_ious, num_gt)
|
||||
|
||||
# sample and assign results
|
||||
assigned_gt_inds = np.zeros([num_bboxes], dtype=np.int64)
|
||||
match_fg_mask_inall = np.zeros_like(assigned_gt_inds)
|
||||
match_fg_mask_inall[is_in_gts_or_centers_all.numpy(
|
||||
)] = match_fg_mask_inmatrix
|
||||
|
||||
assigned_gt_inds[match_fg_mask_inall.astype(
|
||||
np.bool_)] = match_gt_inds_to_fg + 1
|
||||
|
||||
pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds \
|
||||
= self.get_sample(assigned_gt_inds, gt_bboxes.numpy())
|
||||
|
||||
bbox_target = np.zeros(flatten_bboxes.shape, paddle.common_ops_import.convert_dtype(flatten_bboxes.dtype))
|
||||
bbox_weight = np.zeros_like(bbox_target)
|
||||
label = np.ones([num_bboxes], dtype=np.int64) * self.num_classes
|
||||
label_weight = np.zeros([num_bboxes], dtype=np.float32)
|
||||
|
||||
if len(pos_inds) > 0:
|
||||
gt_labels = gt_labels.numpy()
|
||||
pos_bbox_targets = pos_gt_bboxes
|
||||
bbox_target[pos_inds, :] = pos_bbox_targets
|
||||
bbox_weight[pos_inds, :] = 1.0
|
||||
if not np.any(gt_labels):
|
||||
label[pos_inds] = 0
|
||||
else:
|
||||
label[pos_inds] = gt_labels.squeeze(-1)[pos_assigned_gt_inds]
|
||||
|
||||
label_weight[pos_inds] = 1.0
|
||||
if len(neg_inds) > 0:
|
||||
label_weight[neg_inds] = 1.0
|
||||
|
||||
pos_num = max(pos_inds.size, 1)
|
||||
|
||||
return pos_num, label, label_weight, bbox_target
|
||||
@@ -0,0 +1,193 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
|
||||
from ppdet.core.workspace import register
|
||||
from ..bbox_utils import batch_iou_similarity
|
||||
from .utils import (gather_topk_anchors, check_points_inside_bboxes,
|
||||
compute_max_iou_anchor)
|
||||
|
||||
__all__ = ['TaskAlignedAssigner']
|
||||
|
||||
|
||||
def is_close_gt(anchor, gt, stride_lst, max_dist=2.0, alpha=2.):
|
||||
"""Calculate distance ratio of box1 and box2 in batch for larger stride
|
||||
anchors dist/stride to promote the survive of large distance match
|
||||
Args:
|
||||
anchor (Tensor): box with the shape [L, 2]
|
||||
gt (Tensor): box with the shape [N, M2, 4]
|
||||
Return:
|
||||
dist (Tensor): dist ratio between box1 and box2 with the shape [N, M1, M2]
|
||||
"""
|
||||
center1 = anchor.unsqueeze(0)
|
||||
center2 = (gt[..., :2] + gt[..., -2:]) / 2.
|
||||
center1 = center1.unsqueeze(1) # [N, M1, 2] -> [N, 1, M1, 2]
|
||||
center2 = center2.unsqueeze(2) # [N, M2, 2] -> [N, M2, 1, 2]
|
||||
|
||||
stride = paddle.concat([
|
||||
paddle.full([x], 32 / pow(2, idx)) for idx, x in enumerate(stride_lst)
|
||||
]).unsqueeze(0).unsqueeze(0)
|
||||
dist = paddle.linalg.norm(center1 - center2, p=2, axis=-1) / stride
|
||||
dist_ratio = dist
|
||||
dist_ratio[dist < max_dist] = 1.
|
||||
dist_ratio[dist >= max_dist] = 0.
|
||||
return dist_ratio
|
||||
|
||||
|
||||
@register
|
||||
class TaskAlignedAssigner(nn.Layer):
|
||||
"""TOOD: Task-aligned One-stage Object Detection
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
topk=13,
|
||||
alpha=1.0,
|
||||
beta=6.0,
|
||||
eps=1e-9,
|
||||
is_close_gt=False):
|
||||
super(TaskAlignedAssigner, self).__init__()
|
||||
self.topk = topk
|
||||
self.alpha = alpha
|
||||
self.beta = beta
|
||||
self.eps = eps
|
||||
self.is_close_gt = is_close_gt
|
||||
|
||||
@paddle.no_grad()
|
||||
def forward(self,
|
||||
pred_scores,
|
||||
pred_bboxes,
|
||||
anchor_points,
|
||||
num_anchors_list,
|
||||
gt_labels,
|
||||
gt_bboxes,
|
||||
pad_gt_mask,
|
||||
bg_index,
|
||||
gt_scores=None):
|
||||
r"""This code is based on
|
||||
https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py
|
||||
|
||||
The assignment is done in following steps
|
||||
1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt
|
||||
2. select top-k bbox as candidates for each gt
|
||||
3. limit the positive sample's center in gt (because the anchor-free detector
|
||||
only can predict positive distance)
|
||||
4. if an anchor box is assigned to multiple gts, the one with the
|
||||
highest iou will be selected.
|
||||
Args:
|
||||
pred_scores (Tensor, float32): predicted class probability, shape(B, L, C)
|
||||
pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 4)
|
||||
anchor_points (Tensor, float32): pre-defined anchors, shape(L, 2), "cxcy" format
|
||||
num_anchors_list (List): num of anchors in each level, shape(L)
|
||||
gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
|
||||
gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
|
||||
pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
|
||||
bg_index (int): background index
|
||||
gt_scores (Tensor|None, float32) Score of gt_bboxes, shape(B, n, 1)
|
||||
Returns:
|
||||
assigned_labels (Tensor): (B, L)
|
||||
assigned_bboxes (Tensor): (B, L, 4)
|
||||
assigned_scores (Tensor): (B, L, C)
|
||||
"""
|
||||
assert pred_scores.ndim == pred_bboxes.ndim
|
||||
assert gt_labels.ndim == gt_bboxes.ndim and \
|
||||
gt_bboxes.ndim == 3
|
||||
|
||||
batch_size, num_anchors, num_classes = pred_scores.shape
|
||||
_, num_max_boxes, _ = gt_bboxes.shape
|
||||
|
||||
# negative batch
|
||||
if num_max_boxes == 0:
|
||||
assigned_labels = paddle.full(
|
||||
[batch_size, num_anchors], bg_index, dtype='int32')
|
||||
assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
|
||||
assigned_scores = paddle.zeros(
|
||||
[batch_size, num_anchors, num_classes])
|
||||
return assigned_labels, assigned_bboxes, assigned_scores
|
||||
|
||||
# compute iou between gt and pred bbox, [B, n, L]
|
||||
ious = batch_iou_similarity(gt_bboxes, pred_bboxes)
|
||||
# gather pred bboxes class score
|
||||
pred_scores = pred_scores.transpose([0, 2, 1])
|
||||
batch_ind = paddle.arange(
|
||||
end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
|
||||
gt_labels_ind = paddle.stack(
|
||||
[batch_ind.tile([1, num_max_boxes]), gt_labels.squeeze(-1)],
|
||||
axis=-1)
|
||||
bbox_cls_scores = paddle.gather_nd(pred_scores, gt_labels_ind)
|
||||
# compute alignment metrics, [B, n, L]
|
||||
alignment_metrics = bbox_cls_scores.pow(self.alpha) * ious.pow(
|
||||
self.beta)
|
||||
|
||||
# check the positive sample's center in gt, [B, n, L]
|
||||
if self.is_close_gt:
|
||||
is_in_gts = is_close_gt(anchor_points, gt_bboxes, num_anchors_list)
|
||||
else:
|
||||
is_in_gts = check_points_inside_bboxes(anchor_points, gt_bboxes)
|
||||
|
||||
# select topk largest alignment metrics pred bbox as candidates
|
||||
# for each gt, [B, n, L]
|
||||
is_in_topk = gather_topk_anchors(
|
||||
alignment_metrics * is_in_gts, self.topk, topk_mask=pad_gt_mask)
|
||||
|
||||
# select positive sample, [B, n, L]
|
||||
mask_positive = is_in_topk * is_in_gts * pad_gt_mask
|
||||
|
||||
# if an anchor box is assigned to multiple gts,
|
||||
# the one with the highest iou will be selected, [B, n, L]
|
||||
mask_positive_sum = mask_positive.sum(axis=-2)
|
||||
if mask_positive_sum.max() > 1:
|
||||
mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile(
|
||||
[1, num_max_boxes, 1])
|
||||
is_max_iou = compute_max_iou_anchor(ious)
|
||||
mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
|
||||
mask_positive)
|
||||
mask_positive_sum = mask_positive.sum(axis=-2)
|
||||
assigned_gt_index = mask_positive.argmax(axis=-2)
|
||||
|
||||
# assigned target
|
||||
assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
|
||||
assigned_labels = paddle.gather(
|
||||
gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
|
||||
assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
|
||||
assigned_labels = paddle.where(
|
||||
mask_positive_sum > 0, assigned_labels,
|
||||
paddle.full_like(assigned_labels, bg_index))
|
||||
|
||||
assigned_bboxes = paddle.gather(
|
||||
gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
|
||||
assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
|
||||
|
||||
assigned_scores = F.one_hot(assigned_labels, num_classes + 1)
|
||||
ind = list(range(num_classes + 1))
|
||||
ind.remove(bg_index)
|
||||
assigned_scores = paddle.index_select(
|
||||
assigned_scores, paddle.to_tensor(ind), axis=-1)
|
||||
# rescale alignment metrics
|
||||
alignment_metrics *= mask_positive
|
||||
max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True)
|
||||
max_ious_per_instance = (ious * mask_positive).max(axis=-1,
|
||||
keepdim=True)
|
||||
alignment_metrics = alignment_metrics / (
|
||||
max_metrics_per_instance + self.eps) * max_ious_per_instance
|
||||
alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1)
|
||||
assigned_scores = assigned_scores * alignment_metrics
|
||||
|
||||
return assigned_labels, assigned_bboxes, assigned_scores
|
||||
@@ -0,0 +1,181 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
|
||||
from ppdet.core.workspace import register
|
||||
from ..bbox_utils import batch_iou_similarity
|
||||
from .utils import (gather_topk_anchors, check_points_inside_bboxes,
|
||||
compute_max_iou_anchor)
|
||||
|
||||
__all__ = ['TaskAlignedAssigner_CR']
|
||||
|
||||
|
||||
@register
|
||||
class TaskAlignedAssigner_CR(nn.Layer):
|
||||
"""TOOD: Task-aligned One-stage Object Detection with Center R
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
topk=13,
|
||||
alpha=1.0,
|
||||
beta=6.0,
|
||||
center_radius=None,
|
||||
eps=1e-9):
|
||||
super(TaskAlignedAssigner_CR, self).__init__()
|
||||
self.topk = topk
|
||||
self.alpha = alpha
|
||||
self.beta = beta
|
||||
self.center_radius = center_radius
|
||||
self.eps = eps
|
||||
|
||||
@paddle.no_grad()
|
||||
def forward(self,
|
||||
pred_scores,
|
||||
pred_bboxes,
|
||||
anchor_points,
|
||||
stride_tensor,
|
||||
gt_labels,
|
||||
gt_bboxes,
|
||||
pad_gt_mask,
|
||||
bg_index,
|
||||
gt_scores=None):
|
||||
r"""This code is based on
|
||||
https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py
|
||||
|
||||
The assignment is done in following steps
|
||||
1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt
|
||||
2. select top-k bbox as candidates for each gt
|
||||
3. limit the positive sample's center in gt (because the anchor-free detector
|
||||
only can predict positive distance)
|
||||
4. if an anchor box is assigned to multiple gts, the one with the
|
||||
highest iou will be selected.
|
||||
Args:
|
||||
pred_scores (Tensor, float32): predicted class probability, shape(B, L, C)
|
||||
pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 4)
|
||||
anchor_points (Tensor, float32): pre-defined anchors, shape(L, 2), "cxcy" format
|
||||
stride_tensor (Tensor, float32): stride of feature map, shape(L, 1)
|
||||
gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
|
||||
gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
|
||||
pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
|
||||
bg_index (int): background index
|
||||
gt_scores (Tensor|None, float32) Score of gt_bboxes, shape(B, n, 1)
|
||||
Returns:
|
||||
assigned_labels (Tensor): (B, L)
|
||||
assigned_bboxes (Tensor): (B, L, 4)
|
||||
assigned_scores (Tensor): (B, L, C)
|
||||
"""
|
||||
assert pred_scores.ndim == pred_bboxes.ndim
|
||||
assert gt_labels.ndim == gt_bboxes.ndim and \
|
||||
gt_bboxes.ndim == 3
|
||||
|
||||
batch_size, num_anchors, num_classes = pred_scores.shape
|
||||
_, num_max_boxes, _ = gt_bboxes.shape
|
||||
|
||||
# negative batch
|
||||
if num_max_boxes == 0:
|
||||
assigned_labels = paddle.full(
|
||||
[batch_size, num_anchors], bg_index, dtype='int32')
|
||||
assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
|
||||
assigned_scores = paddle.zeros(
|
||||
[batch_size, num_anchors, num_classes])
|
||||
return assigned_labels, assigned_bboxes, assigned_scores
|
||||
|
||||
# compute iou between gt and pred bbox, [B, n, L]
|
||||
ious = batch_iou_similarity(gt_bboxes, pred_bboxes)
|
||||
# gather pred bboxes class score
|
||||
pred_scores = pred_scores.transpose([0, 2, 1])
|
||||
batch_ind = paddle.arange(
|
||||
end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
|
||||
gt_labels_ind = paddle.stack(
|
||||
[batch_ind.tile([1, num_max_boxes]), gt_labels.squeeze(-1)],
|
||||
axis=-1)
|
||||
bbox_cls_scores = paddle.gather_nd(pred_scores, gt_labels_ind)
|
||||
# compute alignment metrics, [B, n, L]
|
||||
alignment_metrics = bbox_cls_scores.pow(self.alpha) * ious.pow(
|
||||
self.beta) * pad_gt_mask
|
||||
|
||||
# select positive sample, [B, n, L]
|
||||
if self.center_radius is None:
|
||||
# check the positive sample's center in gt, [B, n, L]
|
||||
is_in_gts = check_points_inside_bboxes(
|
||||
anchor_points, gt_bboxes, sm_use=True)
|
||||
# select topk largest alignment metrics pred bbox as candidates
|
||||
# for each gt, [B, n, L]
|
||||
mask_positive = gather_topk_anchors(
|
||||
alignment_metrics, self.topk, topk_mask=pad_gt_mask) * is_in_gts
|
||||
else:
|
||||
is_in_gts, is_in_center = check_points_inside_bboxes(
|
||||
anchor_points,
|
||||
gt_bboxes,
|
||||
stride_tensor * self.center_radius,
|
||||
sm_use=True)
|
||||
is_in_gts *= pad_gt_mask
|
||||
is_in_center *= pad_gt_mask
|
||||
candidate_metrics = paddle.where(
|
||||
is_in_gts.sum(-1, keepdim=True) == 0,
|
||||
alignment_metrics + is_in_center,
|
||||
alignment_metrics)
|
||||
mask_positive = gather_topk_anchors(
|
||||
candidate_metrics, self.topk,
|
||||
topk_mask=pad_gt_mask) * paddle.cast((is_in_center > 0) |
|
||||
(is_in_gts > 0), 'float32')
|
||||
|
||||
# if an anchor box is assigned to multiple gts,
|
||||
# the one with the highest iou will be selected, [B, n, L]
|
||||
mask_positive_sum = mask_positive.sum(axis=-2)
|
||||
if mask_positive_sum.max() > 1:
|
||||
mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile(
|
||||
[1, num_max_boxes, 1])
|
||||
is_max_iou = compute_max_iou_anchor(ious * mask_positive)
|
||||
mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
|
||||
mask_positive)
|
||||
mask_positive_sum = mask_positive.sum(axis=-2)
|
||||
assigned_gt_index = mask_positive.argmax(axis=-2)
|
||||
|
||||
# assigned target
|
||||
assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
|
||||
assigned_labels = paddle.gather(
|
||||
gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
|
||||
assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
|
||||
assigned_labels = paddle.where(
|
||||
mask_positive_sum > 0, assigned_labels,
|
||||
paddle.full_like(assigned_labels, bg_index))
|
||||
|
||||
assigned_bboxes = paddle.gather(
|
||||
gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
|
||||
assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
|
||||
|
||||
assigned_scores = F.one_hot(assigned_labels, num_classes + 1)
|
||||
ind = list(range(num_classes + 1))
|
||||
ind.remove(bg_index)
|
||||
assigned_scores = paddle.index_select(
|
||||
assigned_scores, paddle.to_tensor(ind), axis=-1)
|
||||
# rescale alignment metrics
|
||||
alignment_metrics *= mask_positive
|
||||
max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True)
|
||||
max_ious_per_instance = (ious * mask_positive).max(axis=-1,
|
||||
keepdim=True)
|
||||
alignment_metrics = alignment_metrics / (
|
||||
max_metrics_per_instance + self.eps) * max_ious_per_instance
|
||||
alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1)
|
||||
assigned_scores = assigned_scores * alignment_metrics
|
||||
|
||||
return assigned_labels, assigned_bboxes, assigned_scores
|
||||
@@ -0,0 +1,93 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from ppdet.core.workspace import register
|
||||
|
||||
from ppdet.modeling.bbox_utils import batch_bbox_overlaps
|
||||
from ppdet.modeling.transformers import bbox_xyxy_to_cxcywh
|
||||
|
||||
__all__ = ['UniformAssigner']
|
||||
|
||||
|
||||
def batch_p_dist(x, y, p=2):
|
||||
"""
|
||||
calculate pairwise p_dist, the first index of x and y are batch
|
||||
return [x.shape[0], y.shape[0]]
|
||||
"""
|
||||
x = x.unsqueeze(1)
|
||||
diff = x - y
|
||||
return paddle.norm(diff, p=p, axis=list(range(2, diff.dim())))
|
||||
|
||||
|
||||
@register
|
||||
class UniformAssigner(nn.Layer):
|
||||
def __init__(self, pos_ignore_thr, neg_ignore_thr, match_times=4):
|
||||
super(UniformAssigner, self).__init__()
|
||||
self.pos_ignore_thr = pos_ignore_thr
|
||||
self.neg_ignore_thr = neg_ignore_thr
|
||||
self.match_times = match_times
|
||||
|
||||
def forward(self, bbox_pred, anchor, gt_bboxes, gt_labels=None):
|
||||
num_bboxes = bbox_pred.shape[0]
|
||||
num_gts = gt_bboxes.shape[0]
|
||||
match_labels = paddle.full([num_bboxes], -1, dtype=paddle.int32)
|
||||
|
||||
pred_ious = batch_bbox_overlaps(bbox_pred, gt_bboxes)
|
||||
pred_max_iou = pred_ious.max(axis=1)
|
||||
neg_ignore = pred_max_iou > self.neg_ignore_thr
|
||||
# exclude potential ignored neg samples first, deal with pos samples later
|
||||
#match_labels: -2(ignore), -1(neg) or >=0(pos_inds)
|
||||
match_labels = paddle.where(neg_ignore,
|
||||
paddle.full_like(match_labels, -2),
|
||||
match_labels)
|
||||
|
||||
bbox_pred_c = bbox_xyxy_to_cxcywh(bbox_pred)
|
||||
anchor_c = bbox_xyxy_to_cxcywh(anchor)
|
||||
gt_bboxes_c = bbox_xyxy_to_cxcywh(gt_bboxes)
|
||||
bbox_pred_dist = batch_p_dist(bbox_pred_c, gt_bboxes_c, p=1)
|
||||
anchor_dist = batch_p_dist(anchor_c, gt_bboxes_c, p=1)
|
||||
|
||||
top_pred = bbox_pred_dist.topk(
|
||||
k=self.match_times, axis=0, largest=False)[1]
|
||||
top_anchor = anchor_dist.topk(
|
||||
k=self.match_times, axis=0, largest=False)[1]
|
||||
|
||||
tar_pred = paddle.arange(num_gts).expand([self.match_times, num_gts])
|
||||
tar_anchor = paddle.arange(num_gts).expand([self.match_times, num_gts])
|
||||
pos_places = paddle.concat([top_pred, top_anchor]).reshape([-1])
|
||||
pos_inds = paddle.concat([tar_pred, tar_anchor]).reshape([-1])
|
||||
|
||||
pos_anchor = anchor[pos_places]
|
||||
pos_tar_bbox = gt_bboxes[pos_inds]
|
||||
pos_ious = batch_bbox_overlaps(
|
||||
pos_anchor, pos_tar_bbox, is_aligned=True)
|
||||
pos_ignore = pos_ious < self.pos_ignore_thr
|
||||
pos_inds = paddle.where(pos_ignore,
|
||||
paddle.full_like(pos_inds, -2), pos_inds)
|
||||
match_labels[pos_places] = pos_inds
|
||||
match_labels.stop_gradient = True
|
||||
pos_keep = ~pos_ignore
|
||||
|
||||
if pos_keep.sum() > 0:
|
||||
pos_places_keep = pos_places[pos_keep]
|
||||
pos_bbox_pred = bbox_pred[pos_places_keep].reshape([-1, 4])
|
||||
pos_bbox_tar = pos_tar_bbox[pos_keep].reshape([-1, 4]).detach()
|
||||
else:
|
||||
pos_bbox_pred = None
|
||||
pos_bbox_tar = None
|
||||
|
||||
return match_labels, pos_bbox_pred, pos_bbox_tar
|
||||
230
paddle_detection/ppdet/modeling/assigners/utils.py
Normal file
230
paddle_detection/ppdet/modeling/assigners/utils.py
Normal file
@@ -0,0 +1,230 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn.functional as F
|
||||
|
||||
__all__ = [
|
||||
'pad_gt', 'gather_topk_anchors', 'check_points_inside_bboxes',
|
||||
'compute_max_iou_anchor', 'compute_max_iou_gt',
|
||||
'generate_anchors_for_grid_cell'
|
||||
]
|
||||
|
||||
|
||||
def pad_gt(gt_labels, gt_bboxes, gt_scores=None):
|
||||
r""" Pad 0 in gt_labels and gt_bboxes.
|
||||
Args:
|
||||
gt_labels (Tensor|List[Tensor], int64): Label of gt_bboxes,
|
||||
shape is [B, n, 1] or [[n_1, 1], [n_2, 1], ...], here n = sum(n_i)
|
||||
gt_bboxes (Tensor|List[Tensor], float32): Ground truth bboxes,
|
||||
shape is [B, n, 4] or [[n_1, 4], [n_2, 4], ...], here n = sum(n_i)
|
||||
gt_scores (Tensor|List[Tensor]|None, float32): Score of gt_bboxes,
|
||||
shape is [B, n, 1] or [[n_1, 4], [n_2, 4], ...], here n = sum(n_i)
|
||||
Returns:
|
||||
pad_gt_labels (Tensor, int64): shape[B, n, 1]
|
||||
pad_gt_bboxes (Tensor, float32): shape[B, n, 4]
|
||||
pad_gt_scores (Tensor, float32): shape[B, n, 1]
|
||||
pad_gt_mask (Tensor, float32): shape[B, n, 1], 1 means bbox, 0 means no bbox
|
||||
"""
|
||||
if isinstance(gt_labels, paddle.Tensor) and isinstance(gt_bboxes,
|
||||
paddle.Tensor):
|
||||
assert gt_labels.ndim == gt_bboxes.ndim and \
|
||||
gt_bboxes.ndim == 3
|
||||
pad_gt_mask = (
|
||||
gt_bboxes.sum(axis=-1, keepdim=True) > 0).astype(gt_bboxes.dtype)
|
||||
if gt_scores is None:
|
||||
gt_scores = pad_gt_mask.clone()
|
||||
assert gt_labels.ndim == gt_scores.ndim
|
||||
|
||||
return gt_labels, gt_bboxes, gt_scores, pad_gt_mask
|
||||
elif isinstance(gt_labels, list) and isinstance(gt_bboxes, list):
|
||||
assert len(gt_labels) == len(gt_bboxes), \
|
||||
'The number of `gt_labels` and `gt_bboxes` is not equal. '
|
||||
num_max_boxes = max([len(a) for a in gt_bboxes])
|
||||
batch_size = len(gt_bboxes)
|
||||
# pad label and bbox
|
||||
pad_gt_labels = paddle.zeros(
|
||||
[batch_size, num_max_boxes, 1], dtype=gt_labels[0].dtype)
|
||||
pad_gt_bboxes = paddle.zeros(
|
||||
[batch_size, num_max_boxes, 4], dtype=gt_bboxes[0].dtype)
|
||||
pad_gt_scores = paddle.zeros(
|
||||
[batch_size, num_max_boxes, 1], dtype=gt_bboxes[0].dtype)
|
||||
pad_gt_mask = paddle.zeros(
|
||||
[batch_size, num_max_boxes, 1], dtype=gt_bboxes[0].dtype)
|
||||
for i, (label, bbox) in enumerate(zip(gt_labels, gt_bboxes)):
|
||||
if len(label) > 0 and len(bbox) > 0:
|
||||
pad_gt_labels[i, :len(label)] = label
|
||||
pad_gt_bboxes[i, :len(bbox)] = bbox
|
||||
pad_gt_mask[i, :len(bbox)] = 1.
|
||||
if gt_scores is not None:
|
||||
pad_gt_scores[i, :len(gt_scores[i])] = gt_scores[i]
|
||||
if gt_scores is None:
|
||||
pad_gt_scores = pad_gt_mask.clone()
|
||||
return pad_gt_labels, pad_gt_bboxes, pad_gt_scores, pad_gt_mask
|
||||
else:
|
||||
raise ValueError('The input `gt_labels` or `gt_bboxes` is invalid! ')
|
||||
|
||||
|
||||
def gather_topk_anchors(metrics, topk, largest=True, topk_mask=None, eps=1e-9):
|
||||
r"""
|
||||
Args:
|
||||
metrics (Tensor, float32): shape[B, n, L], n: num_gts, L: num_anchors
|
||||
topk (int): The number of top elements to look for along the axis.
|
||||
largest (bool) : largest is a flag, if set to true,
|
||||
algorithm will sort by descending order, otherwise sort by
|
||||
ascending order. Default: True
|
||||
topk_mask (Tensor, float32): shape[B, n, 1], ignore bbox mask,
|
||||
Default: None
|
||||
eps (float): Default: 1e-9
|
||||
Returns:
|
||||
is_in_topk (Tensor, float32): shape[B, n, L], value=1. means selected
|
||||
"""
|
||||
num_anchors = metrics.shape[-1]
|
||||
topk_metrics, topk_idxs = paddle.topk(
|
||||
metrics, topk, axis=-1, largest=largest)
|
||||
if topk_mask is None:
|
||||
topk_mask = (
|
||||
topk_metrics.max(axis=-1, keepdim=True) > eps).astype(metrics.dtype)
|
||||
is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(
|
||||
axis=-2).astype(metrics.dtype)
|
||||
return is_in_topk * topk_mask
|
||||
|
||||
|
||||
def check_points_inside_bboxes(points,
|
||||
bboxes,
|
||||
center_radius_tensor=None,
|
||||
eps=1e-9,
|
||||
sm_use=False):
|
||||
r"""
|
||||
Args:
|
||||
points (Tensor, float32): shape[L, 2], "xy" format, L: num_anchors
|
||||
bboxes (Tensor, float32): shape[B, n, 4], "xmin, ymin, xmax, ymax" format
|
||||
center_radius_tensor (Tensor, float32): shape [L, 1]. Default: None.
|
||||
eps (float): Default: 1e-9
|
||||
Returns:
|
||||
is_in_bboxes (Tensor, float32): shape[B, n, L], value=1. means selected
|
||||
"""
|
||||
points = points.unsqueeze([0, 1])
|
||||
x, y = points.chunk(2, axis=-1)
|
||||
xmin, ymin, xmax, ymax = bboxes.unsqueeze(2).chunk(4, axis=-1)
|
||||
# check whether `points` is in `bboxes`
|
||||
l = x - xmin
|
||||
t = y - ymin
|
||||
r = xmax - x
|
||||
b = ymax - y
|
||||
delta_ltrb = paddle.concat([l, t, r, b], axis=-1)
|
||||
is_in_bboxes = (delta_ltrb.min(axis=-1) > eps)
|
||||
if center_radius_tensor is not None:
|
||||
# check whether `points` is in `center_radius`
|
||||
center_radius_tensor = center_radius_tensor.unsqueeze([0, 1])
|
||||
cx = (xmin + xmax) * 0.5
|
||||
cy = (ymin + ymax) * 0.5
|
||||
l = x - (cx - center_radius_tensor)
|
||||
t = y - (cy - center_radius_tensor)
|
||||
r = (cx + center_radius_tensor) - x
|
||||
b = (cy + center_radius_tensor) - y
|
||||
delta_ltrb_c = paddle.concat([l, t, r, b], axis=-1)
|
||||
is_in_center = (delta_ltrb_c.min(axis=-1) > eps)
|
||||
if sm_use:
|
||||
return is_in_bboxes.astype(bboxes.dtype), is_in_center.astype(
|
||||
bboxes.dtype)
|
||||
else:
|
||||
return (paddle.logical_and(is_in_bboxes, is_in_center),
|
||||
paddle.logical_or(is_in_bboxes, is_in_center))
|
||||
|
||||
return is_in_bboxes.astype(bboxes.dtype)
|
||||
|
||||
|
||||
def compute_max_iou_anchor(ious):
|
||||
r"""
|
||||
For each anchor, find the GT with the largest IOU.
|
||||
Args:
|
||||
ious (Tensor, float32): shape[B, n, L], n: num_gts, L: num_anchors
|
||||
Returns:
|
||||
is_max_iou (Tensor, float32): shape[B, n, L], value=1. means selected
|
||||
"""
|
||||
num_max_boxes = ious.shape[-2]
|
||||
max_iou_index = ious.argmax(axis=-2)
|
||||
is_max_iou = F.one_hot(max_iou_index, num_max_boxes).transpose([0, 2, 1])
|
||||
return is_max_iou.astype(ious.dtype)
|
||||
|
||||
|
||||
def compute_max_iou_gt(ious):
|
||||
r"""
|
||||
For each GT, find the anchor with the largest IOU.
|
||||
Args:
|
||||
ious (Tensor, float32): shape[B, n, L], n: num_gts, L: num_anchors
|
||||
Returns:
|
||||
is_max_iou (Tensor, float32): shape[B, n, L], value=1. means selected
|
||||
"""
|
||||
num_anchors = ious.shape[-1]
|
||||
max_iou_index = ious.argmax(axis=-1)
|
||||
is_max_iou = F.one_hot(max_iou_index, num_anchors)
|
||||
return is_max_iou.astype(ious.dtype)
|
||||
|
||||
|
||||
def generate_anchors_for_grid_cell(feats,
|
||||
fpn_strides,
|
||||
grid_cell_size=5.0,
|
||||
grid_cell_offset=0.5,
|
||||
dtype='float32'):
|
||||
r"""
|
||||
Like ATSS, generate anchors based on grid size.
|
||||
Args:
|
||||
feats (List[Tensor]): shape[s, (b, c, h, w)]
|
||||
fpn_strides (tuple|list): shape[s], stride for each scale feature
|
||||
grid_cell_size (float): anchor size
|
||||
grid_cell_offset (float): The range is between 0 and 1.
|
||||
Returns:
|
||||
anchors (Tensor): shape[l, 4], "xmin, ymin, xmax, ymax" format.
|
||||
anchor_points (Tensor): shape[l, 2], "x, y" format.
|
||||
num_anchors_list (List[int]): shape[s], contains [s_1, s_2, ...].
|
||||
stride_tensor (Tensor): shape[l, 1], contains the stride for each scale.
|
||||
"""
|
||||
assert len(feats) == len(fpn_strides)
|
||||
anchors = []
|
||||
anchor_points = []
|
||||
num_anchors_list = []
|
||||
stride_tensor = []
|
||||
for feat, stride in zip(feats, fpn_strides):
|
||||
_, _, h, w = feat.shape
|
||||
cell_half_size = grid_cell_size * stride * 0.5
|
||||
shift_x = (paddle.arange(end=w) + grid_cell_offset) * stride
|
||||
shift_y = (paddle.arange(end=h) + grid_cell_offset) * stride
|
||||
shift_y, shift_x = paddle.meshgrid(shift_y, shift_x)
|
||||
anchor = paddle.stack(
|
||||
[
|
||||
shift_x - cell_half_size, shift_y - cell_half_size,
|
||||
shift_x + cell_half_size, shift_y + cell_half_size
|
||||
],
|
||||
axis=-1).astype(dtype)
|
||||
anchor_point = paddle.stack([shift_x, shift_y], axis=-1).astype(dtype)
|
||||
|
||||
anchors.append(anchor.reshape([-1, 4]))
|
||||
anchor_points.append(anchor_point.reshape([-1, 2]))
|
||||
num_anchors_list.append(len(anchors[-1]))
|
||||
stride_tensor.append(
|
||||
paddle.full(
|
||||
[num_anchors_list[-1], 1], stride, dtype=dtype))
|
||||
anchors = paddle.concat(anchors)
|
||||
anchors.stop_gradient = True
|
||||
anchor_points = paddle.concat(anchor_points)
|
||||
anchor_points.stop_gradient = True
|
||||
stride_tensor = paddle.concat(stride_tensor)
|
||||
stride_tensor.stop_gradient = True
|
||||
return anchors, anchor_points, num_anchors_list, stride_tensor
|
||||
Reference in New Issue
Block a user