更换文档检测模型
This commit is contained in:
58
paddle_detection/ppdet/modeling/losses/__init__.py
Normal file
58
paddle_detection/ppdet/modeling/losses/__init__.py
Normal file
@@ -0,0 +1,58 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from . import yolo_loss
|
||||
from . import iou_aware_loss
|
||||
from . import iou_loss
|
||||
from . import ssd_loss
|
||||
from . import fcos_loss
|
||||
from . import solov2_loss
|
||||
from . import ctfocal_loss
|
||||
from . import keypoint_loss
|
||||
from . import jde_loss
|
||||
from . import fairmot_loss
|
||||
from . import gfocal_loss
|
||||
from . import detr_loss
|
||||
from . import sparsercnn_loss
|
||||
from . import focal_loss
|
||||
from . import smooth_l1_loss
|
||||
from . import probiou_loss
|
||||
from . import cot_loss
|
||||
from . import supcontrast
|
||||
from . import queryinst_loss
|
||||
from . import clrnet_loss
|
||||
from . import clrnet_line_iou_loss
|
||||
|
||||
from .yolo_loss import *
|
||||
from .iou_aware_loss import *
|
||||
from .iou_loss import *
|
||||
from .ssd_loss import *
|
||||
from .fcos_loss import *
|
||||
from .solov2_loss import *
|
||||
from .ctfocal_loss import *
|
||||
from .keypoint_loss import *
|
||||
from .jde_loss import *
|
||||
from .fairmot_loss import *
|
||||
from .gfocal_loss import *
|
||||
from .detr_loss import *
|
||||
from .sparsercnn_loss import *
|
||||
from .focal_loss import *
|
||||
from .smooth_l1_loss import *
|
||||
from .pose3d_loss import *
|
||||
from .probiou_loss import *
|
||||
from .cot_loss import *
|
||||
from .supcontrast import *
|
||||
from .queryinst_loss import *
|
||||
from .clrnet_loss import *
|
||||
from .clrnet_line_iou_loss import *
|
||||
@@ -0,0 +1,41 @@
|
||||
import paddle
|
||||
|
||||
|
||||
def line_iou(pred, target, img_w, length=15, aligned=True):
|
||||
'''
|
||||
Calculate the line iou value between predictions and targets
|
||||
Args:
|
||||
pred: lane predictions, shape: (num_pred, 72)
|
||||
target: ground truth, shape: (num_target, 72)
|
||||
img_w: image width
|
||||
length: extended radius
|
||||
aligned: True for iou loss calculation, False for pair-wise ious in assign
|
||||
'''
|
||||
px1 = pred - length
|
||||
px2 = pred + length
|
||||
tx1 = target - length
|
||||
tx2 = target + length
|
||||
|
||||
if aligned:
|
||||
invalid_mask = target
|
||||
ovr = paddle.minimum(px2, tx2) - paddle.maximum(px1, tx1)
|
||||
union = paddle.maximum(px2, tx2) - paddle.minimum(px1, tx1)
|
||||
else:
|
||||
num_pred = pred.shape[0]
|
||||
invalid_mask = target.tile([num_pred, 1, 1])
|
||||
|
||||
ovr = (paddle.minimum(px2[:, None, :], tx2[None, ...]) - paddle.maximum(
|
||||
px1[:, None, :], tx1[None, ...]))
|
||||
union = (paddle.maximum(px2[:, None, :], tx2[None, ...]) -
|
||||
paddle.minimum(px1[:, None, :], tx1[None, ...]))
|
||||
|
||||
invalid_masks = (invalid_mask < 0) | (invalid_mask >= img_w)
|
||||
|
||||
ovr[invalid_masks] = 0.
|
||||
union[invalid_masks] = 0.
|
||||
iou = ovr.sum(axis=-1) / (union.sum(axis=-1) + 1e-9)
|
||||
return iou
|
||||
|
||||
|
||||
def liou_loss(pred, target, img_w, length=15):
|
||||
return (1 - line_iou(pred, target, img_w, length)).mean()
|
||||
283
paddle_detection/ppdet/modeling/losses/clrnet_loss.py
Normal file
283
paddle_detection/ppdet/modeling/losses/clrnet_loss.py
Normal file
@@ -0,0 +1,283 @@
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from ppdet.core.workspace import register
|
||||
from ppdet.modeling.clrnet_utils import accuracy
|
||||
from ppdet.modeling.assigners.clrnet_assigner import assign
|
||||
from ppdet.modeling.losses.clrnet_line_iou_loss import liou_loss
|
||||
|
||||
__all__ = ['CLRNetLoss']
|
||||
|
||||
|
||||
class SoftmaxFocalLoss(nn.Layer):
|
||||
def __init__(self, gamma, ignore_lb=255, *args, **kwargs):
|
||||
super(SoftmaxFocalLoss, self).__init__()
|
||||
self.gamma = gamma
|
||||
self.nll = nn.NLLLoss(ignore_index=ignore_lb)
|
||||
|
||||
def forward(self, logits, labels):
|
||||
scores = F.softmax(logits, dim=1)
|
||||
factor = paddle.pow(1. - scores, self.gamma)
|
||||
log_score = F.log_softmax(logits, dim=1)
|
||||
log_score = factor * log_score
|
||||
loss = self.nll(log_score, labels)
|
||||
return loss
|
||||
|
||||
|
||||
def focal_loss(input: paddle.Tensor,
|
||||
target: paddle.Tensor,
|
||||
alpha: float,
|
||||
gamma: float=2.0,
|
||||
reduction: str='none',
|
||||
eps: float=1e-8) -> paddle.Tensor:
|
||||
r"""Function that computes Focal loss.
|
||||
|
||||
See :class:`~kornia.losses.FocalLoss` for details.
|
||||
"""
|
||||
if not paddle.is_tensor(input):
|
||||
raise TypeError("Input type is not a torch.Tensor. Got {}".format(
|
||||
type(input)))
|
||||
|
||||
if not len(input.shape) >= 2:
|
||||
raise ValueError("Invalid input shape, we expect BxCx*. Got: {}".format(
|
||||
input.shape))
|
||||
|
||||
if input.shape[0] != target.shape[0]:
|
||||
raise ValueError(
|
||||
'Expected input batch_size ({}) to match target batch_size ({}).'.
|
||||
format(input.shape[0], target.shape[0]))
|
||||
|
||||
n = input.shape[0]
|
||||
out_size = (n, ) + tuple(input.shape[2:])
|
||||
if target.shape[1:] != input.shape[2:]:
|
||||
raise ValueError('Expected target size {}, got {}'.format(out_size,
|
||||
target.shape))
|
||||
if (isinstance(input.place, paddle.CUDAPlace) and
|
||||
isinstance(target.place, paddle.CPUPlace)) | (isinstance(
|
||||
input.place, paddle.CPUPlace) and isinstance(target.place,
|
||||
paddle.CUDAPlace)):
|
||||
raise ValueError(
|
||||
"input and target must be in the same device. Got: {} and {}".
|
||||
format(input.place, target.place))
|
||||
|
||||
# compute softmax over the classes axis
|
||||
input_soft: paddle.Tensor = F.softmax(input, axis=1) + eps
|
||||
|
||||
# create the labels one hot tensor
|
||||
target_one_hot: paddle.Tensor = paddle.to_tensor(
|
||||
F.one_hot(
|
||||
target, num_classes=input.shape[1]).cast(input.dtype),
|
||||
place=input.place)
|
||||
|
||||
# compute the actual focal loss
|
||||
weight = paddle.pow(-input_soft + 1., gamma)
|
||||
|
||||
focal = -alpha * weight * paddle.log(input_soft)
|
||||
loss_tmp = paddle.sum(target_one_hot * focal, axis=1)
|
||||
|
||||
if reduction == 'none':
|
||||
loss = loss_tmp
|
||||
elif reduction == 'mean':
|
||||
loss = paddle.mean(loss_tmp)
|
||||
elif reduction == 'sum':
|
||||
loss = paddle.sum(loss_tmp)
|
||||
else:
|
||||
raise NotImplementedError("Invalid reduction mode: {}".format(
|
||||
reduction))
|
||||
return loss
|
||||
|
||||
|
||||
class FocalLoss(nn.Layer):
|
||||
r"""Criterion that computes Focal loss.
|
||||
|
||||
According to [1], the Focal loss is computed as follows:
|
||||
|
||||
.. math::
|
||||
|
||||
\text{FL}(p_t) = -\alpha_t (1 - p_t)^{\gamma} \, \text{log}(p_t)
|
||||
|
||||
where:
|
||||
- :math:`p_t` is the model's estimated probability for each class.
|
||||
|
||||
|
||||
Arguments:
|
||||
alpha (float): Weighting factor :math:`\alpha \in [0, 1]`.
|
||||
gamma (float): Focusing parameter :math:`\gamma >= 0`.
|
||||
reduction (str, optional): Specifies the reduction to apply to the
|
||||
output: ‘none’ | ‘mean’ | ‘sum’. ‘none’: no reduction will be applied,
|
||||
‘mean’: the sum of the output will be divided by the number of elements
|
||||
in the output, ‘sum’: the output will be summed. Default: ‘none’.
|
||||
|
||||
Shape:
|
||||
- Input: :math:`(N, C, *)` where C = number of classes.
|
||||
- Target: :math:`(N, *)` where each value is
|
||||
:math:`0 ≤ targets[i] ≤ C−1`.
|
||||
|
||||
Examples:
|
||||
>>> N = 5 # num_classes
|
||||
>>> kwargs = {"alpha": 0.5, "gamma": 2.0, "reduction": 'mean'}
|
||||
>>> loss = kornia.losses.FocalLoss(**kwargs)
|
||||
>>> input = torch.randn(1, N, 3, 5, requires_grad=True)
|
||||
>>> target = torch.empty(1, 3, 5, dtype=torch.long).random_(N)
|
||||
>>> output = loss(input, target)
|
||||
>>> output.backward()
|
||||
|
||||
References:
|
||||
[1] https://arxiv.org/abs/1708.02002
|
||||
"""
|
||||
|
||||
def __init__(self, alpha: float, gamma: float=2.0,
|
||||
reduction: str='none') -> None:
|
||||
super(FocalLoss, self).__init__()
|
||||
self.alpha: float = alpha
|
||||
self.gamma: float = gamma
|
||||
self.reduction: str = reduction
|
||||
self.eps: float = 1e-6
|
||||
|
||||
def forward( # type: ignore
|
||||
self, input: paddle.Tensor, target: paddle.Tensor) -> paddle.Tensor:
|
||||
return focal_loss(input, target, self.alpha, self.gamma, self.reduction,
|
||||
self.eps)
|
||||
|
||||
|
||||
@register
|
||||
class CLRNetLoss(nn.Layer):
|
||||
__shared__ = ['img_w', 'img_h', 'num_classes', 'num_points']
|
||||
|
||||
def __init__(self,
|
||||
cls_loss_weight=2.0,
|
||||
xyt_loss_weight=0.2,
|
||||
iou_loss_weight=2.0,
|
||||
seg_loss_weight=1.0,
|
||||
refine_layers=3,
|
||||
num_points=72,
|
||||
img_w=800,
|
||||
img_h=320,
|
||||
num_classes=5,
|
||||
ignore_label=255,
|
||||
bg_weight=0.4):
|
||||
super(CLRNetLoss, self).__init__()
|
||||
self.cls_loss_weight = cls_loss_weight
|
||||
self.xyt_loss_weight = xyt_loss_weight
|
||||
self.iou_loss_weight = iou_loss_weight
|
||||
self.seg_loss_weight = seg_loss_weight
|
||||
self.refine_layers = refine_layers
|
||||
self.img_w = img_w
|
||||
self.img_h = img_h
|
||||
self.n_strips = num_points - 1
|
||||
self.num_classes = num_classes
|
||||
self.ignore_label = ignore_label
|
||||
weights = paddle.ones(shape=[self.num_classes])
|
||||
weights[0] = bg_weight
|
||||
self.criterion = nn.NLLLoss(
|
||||
ignore_index=self.ignore_label, weight=weights)
|
||||
|
||||
def forward(self, output, batch):
|
||||
predictions_lists = output['predictions_lists']
|
||||
targets = batch['lane_line'].clone()
|
||||
cls_criterion = FocalLoss(alpha=0.25, gamma=2.0)
|
||||
cls_loss = paddle.to_tensor(0.0)
|
||||
reg_xytl_loss = paddle.to_tensor(0.0)
|
||||
iou_loss = paddle.to_tensor(0.0)
|
||||
cls_acc = []
|
||||
cls_acc_stage = []
|
||||
for stage in range(self.refine_layers):
|
||||
predictions_list = predictions_lists[stage]
|
||||
for predictions, target in zip(predictions_list, targets):
|
||||
target = target[target[:, 1] == 1]
|
||||
|
||||
if len(target) == 0:
|
||||
# If there are no targets, all predictions have to be negatives (i.e., 0 confidence)
|
||||
cls_target = paddle.zeros(
|
||||
[predictions.shape[0]], dtype='int64')
|
||||
cls_pred = predictions[:, :2]
|
||||
cls_loss = cls_loss + cls_criterion(cls_pred,
|
||||
cls_target).sum()
|
||||
continue
|
||||
|
||||
with paddle.no_grad():
|
||||
matched_row_inds, matched_col_inds = assign(
|
||||
predictions, target, self.img_w, self.img_h)
|
||||
|
||||
# classification targets
|
||||
cls_target = paddle.zeros([predictions.shape[0]], dtype='int64')
|
||||
cls_target[matched_row_inds] = 1
|
||||
cls_pred = predictions[:, :2]
|
||||
|
||||
# regression targets -> [start_y, start_x, theta] (all transformed to absolute values), only on matched pairs
|
||||
reg_yxtl = predictions.index_select(matched_row_inds)[..., 2:6]
|
||||
|
||||
reg_yxtl[:, 0] *= self.n_strips
|
||||
reg_yxtl[:, 1] *= (self.img_w - 1)
|
||||
reg_yxtl[:, 2] *= 180
|
||||
reg_yxtl[:, 3] *= self.n_strips
|
||||
|
||||
target_yxtl = target.index_select(matched_col_inds)[..., 2:
|
||||
6].clone()
|
||||
|
||||
# regression targets -> S coordinates (all transformed to absolute values)
|
||||
reg_pred = predictions.index_select(matched_row_inds)[..., 6:]
|
||||
reg_pred *= (self.img_w - 1)
|
||||
reg_targets = target.index_select(matched_col_inds)[...,
|
||||
6:].clone()
|
||||
|
||||
with paddle.no_grad():
|
||||
predictions_starts = paddle.clip(
|
||||
(predictions.index_select(matched_row_inds)[..., 2] *
|
||||
self.n_strips).round().cast("int64"),
|
||||
min=0,
|
||||
max=self.
|
||||
n_strips) # ensure the predictions starts is valid
|
||||
|
||||
target_starts = (
|
||||
target.index_select(matched_col_inds)[..., 2] *
|
||||
self.n_strips).round().cast("int64")
|
||||
target_yxtl[:, -1] -= (
|
||||
predictions_starts - target_starts) # reg length
|
||||
|
||||
# Loss calculation
|
||||
cls_loss = cls_loss + cls_criterion(
|
||||
cls_pred, cls_target).sum() / target.shape[0]
|
||||
|
||||
target_yxtl[:, 0] *= self.n_strips
|
||||
target_yxtl[:, 2] *= 180
|
||||
|
||||
reg_xytl_loss = reg_xytl_loss + F.smooth_l1_loss(
|
||||
input=reg_yxtl, label=target_yxtl, reduction='none').mean()
|
||||
|
||||
iou_loss = iou_loss + liou_loss(
|
||||
reg_pred, reg_targets, self.img_w, length=15)
|
||||
|
||||
cls_accuracy = accuracy(cls_pred, cls_target)
|
||||
cls_acc_stage.append(cls_accuracy)
|
||||
|
||||
cls_acc.append(sum(cls_acc_stage) / (len(cls_acc_stage) + 1e-5))
|
||||
|
||||
# extra segmentation loss
|
||||
seg_loss = self.criterion(
|
||||
F.log_softmax(
|
||||
output['seg'], axis=1), batch['seg'].cast('int64'))
|
||||
|
||||
cls_loss /= (len(targets) * self.refine_layers)
|
||||
reg_xytl_loss /= (len(targets) * self.refine_layers)
|
||||
iou_loss /= (len(targets) * self.refine_layers)
|
||||
|
||||
loss = cls_loss * self.cls_loss_weight \
|
||||
+ reg_xytl_loss * self.xyt_loss_weight \
|
||||
+ seg_loss * self.seg_loss_weight \
|
||||
+ iou_loss * self.iou_loss_weight
|
||||
|
||||
return_value = {
|
||||
'loss': loss,
|
||||
'cls_loss': cls_loss * self.cls_loss_weight,
|
||||
'reg_xytl_loss': reg_xytl_loss * self.xyt_loss_weight,
|
||||
'seg_loss': seg_loss * self.seg_loss_weight,
|
||||
'iou_loss': iou_loss * self.iou_loss_weight
|
||||
}
|
||||
|
||||
for i in range(self.refine_layers):
|
||||
if not isinstance(cls_acc[i], paddle.Tensor):
|
||||
cls_acc[i] = paddle.to_tensor(cls_acc[i])
|
||||
return_value['stage_{}_acc'.format(i)] = cls_acc[i]
|
||||
|
||||
return return_value
|
||||
61
paddle_detection/ppdet/modeling/losses/cot_loss.py
Normal file
61
paddle_detection/ppdet/modeling/losses/cot_loss.py
Normal file
@@ -0,0 +1,61 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
import numpy as np
|
||||
from ppdet.core.workspace import register
|
||||
|
||||
__all__ = ['COTLoss']
|
||||
|
||||
@register
|
||||
class COTLoss(nn.Layer):
|
||||
__shared__ = ['num_classes']
|
||||
def __init__(self,
|
||||
num_classes=80,
|
||||
cot_scale=1,
|
||||
cot_lambda=1):
|
||||
super(COTLoss, self).__init__()
|
||||
self.cot_scale = cot_scale
|
||||
self.cot_lambda = cot_lambda
|
||||
self.num_classes = num_classes
|
||||
|
||||
def forward(self, scores, targets, cot_relation):
|
||||
cls_name = 'loss_bbox_cls_cot'
|
||||
loss_bbox = {}
|
||||
|
||||
tgt_labels, tgt_bboxes, tgt_gt_inds = targets
|
||||
tgt_labels = paddle.concat(tgt_labels) if len(
|
||||
tgt_labels) > 1 else tgt_labels[0]
|
||||
mask = (tgt_labels < self.num_classes)
|
||||
valid_inds = paddle.nonzero(tgt_labels >= 0).flatten()
|
||||
if valid_inds.shape[0] == 0:
|
||||
loss_bbox[cls_name] = paddle.zeros([1], dtype='float32')
|
||||
else:
|
||||
tgt_labels = tgt_labels.cast('int64')
|
||||
valid_cot_targets = []
|
||||
for i in range(tgt_labels.shape[0]):
|
||||
train_label = tgt_labels[i]
|
||||
if train_label < self.num_classes:
|
||||
valid_cot_targets.append(cot_relation[train_label])
|
||||
coco_targets = paddle.to_tensor(valid_cot_targets)
|
||||
coco_targets.stop_gradient = True
|
||||
coco_loss = - coco_targets * F.log_softmax(scores[mask][:, :-1] * self.cot_scale)
|
||||
loss_bbox[cls_name] = self.cot_lambda * paddle.mean(paddle.sum(coco_loss, axis=-1))
|
||||
return loss_bbox
|
||||
68
paddle_detection/ppdet/modeling/losses/ctfocal_loss.py
Normal file
68
paddle_detection/ppdet/modeling/losses/ctfocal_loss.py
Normal file
@@ -0,0 +1,68 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
|
||||
from ppdet.core.workspace import register, serializable
|
||||
|
||||
__all__ = ['CTFocalLoss']
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class CTFocalLoss(object):
|
||||
"""
|
||||
CTFocalLoss: CornerNet & CenterNet Focal Loss
|
||||
Args:
|
||||
loss_weight (float): loss weight
|
||||
gamma (float): gamma parameter for Focal Loss
|
||||
"""
|
||||
|
||||
def __init__(self, loss_weight=1., gamma=2.0):
|
||||
self.loss_weight = loss_weight
|
||||
self.gamma = gamma
|
||||
|
||||
def __call__(self, pred, target):
|
||||
"""
|
||||
Calculate the loss
|
||||
Args:
|
||||
pred (Tensor): heatmap prediction
|
||||
target (Tensor): target for positive samples
|
||||
Return:
|
||||
ct_focal_loss (Tensor): Focal Loss used in CornerNet & CenterNet.
|
||||
Note that the values in target are in [0, 1] since gaussian is
|
||||
used to reduce the punishment and we treat [0, 1) as neg example.
|
||||
"""
|
||||
fg_map = paddle.cast(target == 1, 'float32')
|
||||
fg_map.stop_gradient = True
|
||||
bg_map = paddle.cast(target < 1, 'float32')
|
||||
bg_map.stop_gradient = True
|
||||
|
||||
neg_weights = paddle.pow(1 - target, 4)
|
||||
pos_loss = 0 - paddle.log(pred) * paddle.pow(1 - pred,
|
||||
self.gamma) * fg_map
|
||||
|
||||
neg_loss = 0 - paddle.log(1 - pred) * paddle.pow(
|
||||
pred, self.gamma) * neg_weights * bg_map
|
||||
pos_loss = paddle.sum(pos_loss)
|
||||
neg_loss = paddle.sum(neg_loss)
|
||||
|
||||
fg_num = paddle.sum(fg_map)
|
||||
ct_focal_loss = (pos_loss + neg_loss) / (
|
||||
fg_num + paddle.cast(fg_num == 0, 'float32'))
|
||||
return ct_focal_loss * self.loss_weight
|
||||
631
paddle_detection/ppdet/modeling/losses/detr_loss.py
Normal file
631
paddle_detection/ppdet/modeling/losses/detr_loss.py
Normal file
@@ -0,0 +1,631 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from ppdet.core.workspace import register
|
||||
from .iou_loss import GIoULoss
|
||||
from ..transformers import bbox_cxcywh_to_xyxy, sigmoid_focal_loss, varifocal_loss_with_logits
|
||||
from ..bbox_utils import bbox_iou
|
||||
|
||||
__all__ = ['DETRLoss', 'DINOLoss']
|
||||
|
||||
|
||||
@register
|
||||
class DETRLoss(nn.Layer):
|
||||
__shared__ = ['num_classes', 'use_focal_loss']
|
||||
__inject__ = ['matcher']
|
||||
|
||||
def __init__(self,
|
||||
num_classes=80,
|
||||
matcher='HungarianMatcher',
|
||||
loss_coeff={
|
||||
'class': 1,
|
||||
'bbox': 5,
|
||||
'giou': 2,
|
||||
'no_object': 0.1,
|
||||
'mask': 1,
|
||||
'dice': 1
|
||||
},
|
||||
aux_loss=True,
|
||||
use_focal_loss=False,
|
||||
use_vfl=False,
|
||||
use_uni_match=False,
|
||||
uni_match_ind=0):
|
||||
r"""
|
||||
Args:
|
||||
num_classes (int): The number of classes.
|
||||
matcher (HungarianMatcher): It computes an assignment between the targets
|
||||
and the predictions of the network.
|
||||
loss_coeff (dict): The coefficient of loss.
|
||||
aux_loss (bool): If 'aux_loss = True', loss at each decoder layer are to be used.
|
||||
use_focal_loss (bool): Use focal loss or not.
|
||||
"""
|
||||
super(DETRLoss, self).__init__()
|
||||
|
||||
self.num_classes = num_classes
|
||||
self.matcher = matcher
|
||||
self.loss_coeff = loss_coeff
|
||||
self.aux_loss = aux_loss
|
||||
self.use_focal_loss = use_focal_loss
|
||||
self.use_vfl = use_vfl
|
||||
self.use_uni_match = use_uni_match
|
||||
self.uni_match_ind = uni_match_ind
|
||||
|
||||
if not self.use_focal_loss:
|
||||
self.loss_coeff['class'] = paddle.full([num_classes + 1],
|
||||
loss_coeff['class'])
|
||||
self.loss_coeff['class'][-1] = loss_coeff['no_object']
|
||||
self.giou_loss = GIoULoss()
|
||||
|
||||
def _get_loss_class(self,
|
||||
logits,
|
||||
gt_class,
|
||||
match_indices,
|
||||
bg_index,
|
||||
num_gts,
|
||||
postfix="",
|
||||
iou_score=None,
|
||||
gt_score=None):
|
||||
# logits: [b, query, num_classes], gt_class: list[[n, 1]]
|
||||
name_class = "loss_class" + postfix
|
||||
|
||||
target_label = paddle.full(logits.shape[:2], bg_index, dtype='int64')
|
||||
bs, num_query_objects = target_label.shape
|
||||
num_gt = sum(len(a) for a in gt_class)
|
||||
if num_gt > 0:
|
||||
index, updates = self._get_index_updates(num_query_objects,
|
||||
gt_class, match_indices)
|
||||
target_label = paddle.scatter(
|
||||
target_label.reshape([-1, 1]), index, updates.astype('int64'))
|
||||
target_label = target_label.reshape([bs, num_query_objects])
|
||||
if self.use_focal_loss:
|
||||
target_label = F.one_hot(target_label,
|
||||
self.num_classes + 1)[..., :-1]
|
||||
if iou_score is not None and self.use_vfl:
|
||||
if gt_score is not None:
|
||||
target_score = paddle.zeros([bs, num_query_objects])
|
||||
target_score = paddle.scatter(
|
||||
target_score.reshape([-1, 1]), index, gt_score)
|
||||
target_score = target_score.reshape(
|
||||
[bs, num_query_objects, 1]) * target_label
|
||||
|
||||
target_score_iou = paddle.zeros([bs, num_query_objects])
|
||||
target_score_iou = paddle.scatter(
|
||||
target_score_iou.reshape([-1, 1]), index, iou_score)
|
||||
target_score_iou = target_score_iou.reshape(
|
||||
[bs, num_query_objects, 1]) * target_label
|
||||
target_score = paddle.multiply(target_score,
|
||||
target_score_iou)
|
||||
loss_ = self.loss_coeff[
|
||||
'class'] * varifocal_loss_with_logits(
|
||||
logits, target_score, target_label,
|
||||
num_gts / num_query_objects)
|
||||
else:
|
||||
target_score = paddle.zeros([bs, num_query_objects])
|
||||
if num_gt > 0:
|
||||
target_score = paddle.scatter(
|
||||
target_score.reshape([-1, 1]), index, iou_score)
|
||||
target_score = target_score.reshape(
|
||||
[bs, num_query_objects, 1]) * target_label
|
||||
loss_ = self.loss_coeff[
|
||||
'class'] * varifocal_loss_with_logits(
|
||||
logits, target_score, target_label,
|
||||
num_gts / num_query_objects)
|
||||
else:
|
||||
loss_ = self.loss_coeff['class'] * sigmoid_focal_loss(
|
||||
logits, target_label, num_gts / num_query_objects)
|
||||
else:
|
||||
loss_ = F.cross_entropy(
|
||||
logits, target_label, weight=self.loss_coeff['class'])
|
||||
return {name_class: loss_}
|
||||
|
||||
def _get_loss_bbox(self, boxes, gt_bbox, match_indices, num_gts,
|
||||
postfix=""):
|
||||
# boxes: [b, query, 4], gt_bbox: list[[n, 4]]
|
||||
name_bbox = "loss_bbox" + postfix
|
||||
name_giou = "loss_giou" + postfix
|
||||
|
||||
loss = dict()
|
||||
if sum(len(a) for a in gt_bbox) == 0:
|
||||
loss[name_bbox] = paddle.to_tensor([0.])
|
||||
loss[name_giou] = paddle.to_tensor([0.])
|
||||
return loss
|
||||
|
||||
src_bbox, target_bbox = self._get_src_target_assign(boxes, gt_bbox,
|
||||
match_indices)
|
||||
loss[name_bbox] = self.loss_coeff['bbox'] * F.l1_loss(
|
||||
src_bbox, target_bbox, reduction='sum') / num_gts
|
||||
loss[name_giou] = self.giou_loss(
|
||||
bbox_cxcywh_to_xyxy(src_bbox), bbox_cxcywh_to_xyxy(target_bbox))
|
||||
loss[name_giou] = loss[name_giou].sum() / num_gts
|
||||
loss[name_giou] = self.loss_coeff['giou'] * loss[name_giou]
|
||||
return loss
|
||||
|
||||
def _get_loss_mask(self, masks, gt_mask, match_indices, num_gts,
|
||||
postfix=""):
|
||||
# masks: [b, query, h, w], gt_mask: list[[n, H, W]]
|
||||
name_mask = "loss_mask" + postfix
|
||||
name_dice = "loss_dice" + postfix
|
||||
|
||||
loss = dict()
|
||||
if sum(len(a) for a in gt_mask) == 0:
|
||||
loss[name_mask] = paddle.to_tensor([0.])
|
||||
loss[name_dice] = paddle.to_tensor([0.])
|
||||
return loss
|
||||
|
||||
src_masks, target_masks = self._get_src_target_assign(masks, gt_mask,
|
||||
match_indices)
|
||||
src_masks = F.interpolate(
|
||||
src_masks.unsqueeze(0),
|
||||
size=target_masks.shape[-2:],
|
||||
mode="bilinear")[0]
|
||||
loss[name_mask] = self.loss_coeff['mask'] * F.sigmoid_focal_loss(
|
||||
src_masks,
|
||||
target_masks,
|
||||
paddle.to_tensor(
|
||||
[num_gts], dtype='float32'))
|
||||
loss[name_dice] = self.loss_coeff['dice'] * self._dice_loss(
|
||||
src_masks, target_masks, num_gts)
|
||||
return loss
|
||||
|
||||
def _dice_loss(self, inputs, targets, num_gts):
|
||||
inputs = F.sigmoid(inputs)
|
||||
inputs = inputs.flatten(1)
|
||||
targets = targets.flatten(1)
|
||||
numerator = 2 * (inputs * targets).sum(1)
|
||||
denominator = inputs.sum(-1) + targets.sum(-1)
|
||||
loss = 1 - (numerator + 1) / (denominator + 1)
|
||||
return loss.sum() / num_gts
|
||||
|
||||
def _get_loss_aux(self,
|
||||
boxes,
|
||||
logits,
|
||||
gt_bbox,
|
||||
gt_class,
|
||||
bg_index,
|
||||
num_gts,
|
||||
dn_match_indices=None,
|
||||
postfix="",
|
||||
masks=None,
|
||||
gt_mask=None,
|
||||
gt_score=None):
|
||||
loss_class = []
|
||||
loss_bbox, loss_giou = [], []
|
||||
loss_mask, loss_dice = [], []
|
||||
if dn_match_indices is not None:
|
||||
match_indices = dn_match_indices
|
||||
elif self.use_uni_match:
|
||||
match_indices = self.matcher(
|
||||
boxes[self.uni_match_ind],
|
||||
logits[self.uni_match_ind],
|
||||
gt_bbox,
|
||||
gt_class,
|
||||
masks=masks[self.uni_match_ind] if masks is not None else None,
|
||||
gt_mask=gt_mask)
|
||||
for i, (aux_boxes, aux_logits) in enumerate(zip(boxes, logits)):
|
||||
aux_masks = masks[i] if masks is not None else None
|
||||
if not self.use_uni_match and dn_match_indices is None:
|
||||
match_indices = self.matcher(
|
||||
aux_boxes,
|
||||
aux_logits,
|
||||
gt_bbox,
|
||||
gt_class,
|
||||
masks=aux_masks,
|
||||
gt_mask=gt_mask)
|
||||
if self.use_vfl:
|
||||
if sum(len(a) for a in gt_bbox) > 0:
|
||||
src_bbox, target_bbox = self._get_src_target_assign(
|
||||
aux_boxes.detach(), gt_bbox, match_indices)
|
||||
iou_score = bbox_iou(
|
||||
bbox_cxcywh_to_xyxy(src_bbox).split(4, -1),
|
||||
bbox_cxcywh_to_xyxy(target_bbox).split(4, -1))
|
||||
else:
|
||||
iou_score = None
|
||||
if gt_score is not None:
|
||||
_, target_score = self._get_src_target_assign(
|
||||
logits[-1].detach(), gt_score, match_indices)
|
||||
else:
|
||||
iou_score = None
|
||||
loss_class.append(
|
||||
self._get_loss_class(
|
||||
aux_logits,
|
||||
gt_class,
|
||||
match_indices,
|
||||
bg_index,
|
||||
num_gts,
|
||||
postfix,
|
||||
iou_score,
|
||||
gt_score=target_score
|
||||
if gt_score is not None else None)['loss_class' + postfix])
|
||||
loss_ = self._get_loss_bbox(aux_boxes, gt_bbox, match_indices,
|
||||
num_gts, postfix)
|
||||
loss_bbox.append(loss_['loss_bbox' + postfix])
|
||||
loss_giou.append(loss_['loss_giou' + postfix])
|
||||
if masks is not None and gt_mask is not None:
|
||||
loss_ = self._get_loss_mask(aux_masks, gt_mask, match_indices,
|
||||
num_gts, postfix)
|
||||
loss_mask.append(loss_['loss_mask' + postfix])
|
||||
loss_dice.append(loss_['loss_dice' + postfix])
|
||||
loss = {
|
||||
"loss_class_aux" + postfix: paddle.add_n(loss_class),
|
||||
"loss_bbox_aux" + postfix: paddle.add_n(loss_bbox),
|
||||
"loss_giou_aux" + postfix: paddle.add_n(loss_giou)
|
||||
}
|
||||
if masks is not None and gt_mask is not None:
|
||||
loss["loss_mask_aux" + postfix] = paddle.add_n(loss_mask)
|
||||
loss["loss_dice_aux" + postfix] = paddle.add_n(loss_dice)
|
||||
return loss
|
||||
|
||||
def _get_index_updates(self, num_query_objects, target, match_indices):
|
||||
batch_idx = paddle.concat([
|
||||
paddle.full_like(src, i) for i, (src, _) in enumerate(match_indices)
|
||||
])
|
||||
src_idx = paddle.concat([src for (src, _) in match_indices])
|
||||
src_idx += (batch_idx * num_query_objects)
|
||||
target_assign = paddle.concat([
|
||||
paddle.gather(
|
||||
t, dst, axis=0) for t, (_, dst) in zip(target, match_indices)
|
||||
])
|
||||
return src_idx, target_assign
|
||||
|
||||
def _get_src_target_assign(self, src, target, match_indices):
|
||||
src_assign = paddle.concat([
|
||||
paddle.gather(
|
||||
t, I, axis=0) if len(I) > 0 else paddle.zeros([0, t.shape[-1]])
|
||||
for t, (I, _) in zip(src, match_indices)
|
||||
])
|
||||
target_assign = paddle.concat([
|
||||
paddle.gather(
|
||||
t, J, axis=0) if len(J) > 0 else paddle.zeros([0, t.shape[-1]])
|
||||
for t, (_, J) in zip(target, match_indices)
|
||||
])
|
||||
return src_assign, target_assign
|
||||
|
||||
def _get_num_gts(self, targets, dtype="float32"):
|
||||
num_gts = sum(len(a) for a in targets)
|
||||
num_gts = paddle.to_tensor([num_gts], dtype=dtype)
|
||||
if paddle.distributed.get_world_size() > 1:
|
||||
paddle.distributed.all_reduce(num_gts)
|
||||
num_gts /= paddle.distributed.get_world_size()
|
||||
num_gts = paddle.clip(num_gts, min=1.)
|
||||
return num_gts
|
||||
|
||||
def _get_prediction_loss(self,
|
||||
boxes,
|
||||
logits,
|
||||
gt_bbox,
|
||||
gt_class,
|
||||
masks=None,
|
||||
gt_mask=None,
|
||||
postfix="",
|
||||
dn_match_indices=None,
|
||||
num_gts=1,
|
||||
gt_score=None):
|
||||
if dn_match_indices is None:
|
||||
match_indices = self.matcher(
|
||||
boxes, logits, gt_bbox, gt_class, masks=masks, gt_mask=gt_mask)
|
||||
else:
|
||||
match_indices = dn_match_indices
|
||||
|
||||
if self.use_vfl:
|
||||
if gt_score is not None: #ssod
|
||||
_, target_score = self._get_src_target_assign(
|
||||
logits[-1].detach(), gt_score, match_indices)
|
||||
elif sum(len(a) for a in gt_bbox) > 0:
|
||||
src_bbox, target_bbox = self._get_src_target_assign(
|
||||
boxes.detach(), gt_bbox, match_indices)
|
||||
iou_score = bbox_iou(
|
||||
bbox_cxcywh_to_xyxy(src_bbox).split(4, -1),
|
||||
bbox_cxcywh_to_xyxy(target_bbox).split(4, -1))
|
||||
else:
|
||||
iou_score = None
|
||||
else:
|
||||
iou_score = None
|
||||
|
||||
loss = dict()
|
||||
loss.update(
|
||||
self._get_loss_class(
|
||||
logits,
|
||||
gt_class,
|
||||
match_indices,
|
||||
self.num_classes,
|
||||
num_gts,
|
||||
postfix,
|
||||
iou_score,
|
||||
gt_score=target_score if gt_score is not None else None))
|
||||
loss.update(
|
||||
self._get_loss_bbox(boxes, gt_bbox, match_indices, num_gts,
|
||||
postfix))
|
||||
if masks is not None and gt_mask is not None:
|
||||
loss.update(
|
||||
self._get_loss_mask(masks, gt_mask, match_indices, num_gts,
|
||||
postfix))
|
||||
return loss
|
||||
|
||||
def forward(self,
|
||||
boxes,
|
||||
logits,
|
||||
gt_bbox,
|
||||
gt_class,
|
||||
masks=None,
|
||||
gt_mask=None,
|
||||
postfix="",
|
||||
gt_score=None,
|
||||
**kwargs):
|
||||
r"""
|
||||
Args:
|
||||
boxes (Tensor): [l, b, query, 4]
|
||||
logits (Tensor): [l, b, query, num_classes]
|
||||
gt_bbox (List(Tensor)): list[[n, 4]]
|
||||
gt_class (List(Tensor)): list[[n, 1]]
|
||||
masks (Tensor, optional): [l, b, query, h, w]
|
||||
gt_mask (List(Tensor), optional): list[[n, H, W]]
|
||||
postfix (str): postfix of loss name
|
||||
"""
|
||||
|
||||
dn_match_indices = kwargs.get("dn_match_indices", None)
|
||||
num_gts = kwargs.get("num_gts", None)
|
||||
if num_gts is None:
|
||||
num_gts = self._get_num_gts(gt_class)
|
||||
|
||||
total_loss = self._get_prediction_loss(
|
||||
boxes[-1],
|
||||
logits[-1],
|
||||
gt_bbox,
|
||||
gt_class,
|
||||
masks=masks[-1] if masks is not None else None,
|
||||
gt_mask=gt_mask,
|
||||
postfix=postfix,
|
||||
dn_match_indices=dn_match_indices,
|
||||
num_gts=num_gts,
|
||||
gt_score=gt_score if gt_score is not None else None)
|
||||
|
||||
if self.aux_loss:
|
||||
total_loss.update(
|
||||
self._get_loss_aux(
|
||||
boxes[:-1],
|
||||
logits[:-1],
|
||||
gt_bbox,
|
||||
gt_class,
|
||||
self.num_classes,
|
||||
num_gts,
|
||||
dn_match_indices,
|
||||
postfix,
|
||||
masks=masks[:-1] if masks is not None else None,
|
||||
gt_mask=gt_mask,
|
||||
gt_score=gt_score if gt_score is not None else None))
|
||||
|
||||
return total_loss
|
||||
|
||||
|
||||
@register
|
||||
class DINOLoss(DETRLoss):
|
||||
def forward(self,
|
||||
boxes,
|
||||
logits,
|
||||
gt_bbox,
|
||||
gt_class,
|
||||
masks=None,
|
||||
gt_mask=None,
|
||||
postfix="",
|
||||
dn_out_bboxes=None,
|
||||
dn_out_logits=None,
|
||||
dn_meta=None,
|
||||
gt_score=None,
|
||||
**kwargs):
|
||||
num_gts = self._get_num_gts(gt_class)
|
||||
total_loss = super(DINOLoss, self).forward(
|
||||
boxes,
|
||||
logits,
|
||||
gt_bbox,
|
||||
gt_class,
|
||||
num_gts=num_gts,
|
||||
gt_score=gt_score)
|
||||
|
||||
if dn_meta is not None:
|
||||
dn_positive_idx, dn_num_group = \
|
||||
dn_meta["dn_positive_idx"], dn_meta["dn_num_group"]
|
||||
assert len(gt_class) == len(dn_positive_idx)
|
||||
|
||||
# denoising match indices
|
||||
dn_match_indices = self.get_dn_match_indices(
|
||||
gt_class, dn_positive_idx, dn_num_group)
|
||||
|
||||
# compute denoising training loss
|
||||
num_gts *= dn_num_group
|
||||
dn_loss = super(DINOLoss, self).forward(
|
||||
dn_out_bboxes,
|
||||
dn_out_logits,
|
||||
gt_bbox,
|
||||
gt_class,
|
||||
postfix="_dn",
|
||||
dn_match_indices=dn_match_indices,
|
||||
num_gts=num_gts,
|
||||
gt_score=gt_score)
|
||||
total_loss.update(dn_loss)
|
||||
else:
|
||||
total_loss.update(
|
||||
{k + '_dn': paddle.to_tensor([0.])
|
||||
for k in total_loss.keys()})
|
||||
|
||||
return total_loss
|
||||
|
||||
@staticmethod
|
||||
def get_dn_match_indices(labels, dn_positive_idx, dn_num_group):
|
||||
dn_match_indices = []
|
||||
for i in range(len(labels)):
|
||||
num_gt = len(labels[i])
|
||||
if num_gt > 0:
|
||||
gt_idx = paddle.arange(end=num_gt, dtype="int64")
|
||||
gt_idx = gt_idx.tile([dn_num_group])
|
||||
assert len(dn_positive_idx[i]) == len(gt_idx)
|
||||
dn_match_indices.append((dn_positive_idx[i], gt_idx))
|
||||
else:
|
||||
dn_match_indices.append((paddle.zeros(
|
||||
[0], dtype="int64"), paddle.zeros(
|
||||
[0], dtype="int64")))
|
||||
return dn_match_indices
|
||||
|
||||
|
||||
@register
|
||||
class MaskDINOLoss(DETRLoss):
|
||||
__shared__ = ['num_classes', 'use_focal_loss', 'num_sample_points']
|
||||
__inject__ = ['matcher']
|
||||
|
||||
def __init__(self,
|
||||
num_classes=80,
|
||||
matcher='HungarianMatcher',
|
||||
loss_coeff={
|
||||
'class': 4,
|
||||
'bbox': 5,
|
||||
'giou': 2,
|
||||
'mask': 5,
|
||||
'dice': 5
|
||||
},
|
||||
aux_loss=True,
|
||||
use_focal_loss=False,
|
||||
num_sample_points=12544,
|
||||
oversample_ratio=3.0,
|
||||
important_sample_ratio=0.75):
|
||||
super(MaskDINOLoss, self).__init__(num_classes, matcher, loss_coeff,
|
||||
aux_loss, use_focal_loss)
|
||||
assert oversample_ratio >= 1
|
||||
assert important_sample_ratio <= 1 and important_sample_ratio >= 0
|
||||
|
||||
self.num_sample_points = num_sample_points
|
||||
self.oversample_ratio = oversample_ratio
|
||||
self.important_sample_ratio = important_sample_ratio
|
||||
self.num_oversample_points = int(num_sample_points * oversample_ratio)
|
||||
self.num_important_points = int(num_sample_points *
|
||||
important_sample_ratio)
|
||||
self.num_random_points = num_sample_points - self.num_important_points
|
||||
|
||||
def forward(self,
|
||||
boxes,
|
||||
logits,
|
||||
gt_bbox,
|
||||
gt_class,
|
||||
masks=None,
|
||||
gt_mask=None,
|
||||
postfix="",
|
||||
dn_out_bboxes=None,
|
||||
dn_out_logits=None,
|
||||
dn_out_masks=None,
|
||||
dn_meta=None,
|
||||
**kwargs):
|
||||
num_gts = self._get_num_gts(gt_class)
|
||||
total_loss = super(MaskDINOLoss, self).forward(
|
||||
boxes,
|
||||
logits,
|
||||
gt_bbox,
|
||||
gt_class,
|
||||
masks=masks,
|
||||
gt_mask=gt_mask,
|
||||
num_gts=num_gts)
|
||||
|
||||
if dn_meta is not None:
|
||||
dn_positive_idx, dn_num_group = \
|
||||
dn_meta["dn_positive_idx"], dn_meta["dn_num_group"]
|
||||
assert len(gt_class) == len(dn_positive_idx)
|
||||
|
||||
# denoising match indices
|
||||
dn_match_indices = DINOLoss.get_dn_match_indices(
|
||||
gt_class, dn_positive_idx, dn_num_group)
|
||||
|
||||
# compute denoising training loss
|
||||
num_gts *= dn_num_group
|
||||
dn_loss = super(MaskDINOLoss, self).forward(
|
||||
dn_out_bboxes,
|
||||
dn_out_logits,
|
||||
gt_bbox,
|
||||
gt_class,
|
||||
masks=dn_out_masks,
|
||||
gt_mask=gt_mask,
|
||||
postfix="_dn",
|
||||
dn_match_indices=dn_match_indices,
|
||||
num_gts=num_gts)
|
||||
total_loss.update(dn_loss)
|
||||
else:
|
||||
total_loss.update(
|
||||
{k + '_dn': paddle.to_tensor([0.])
|
||||
for k in total_loss.keys()})
|
||||
|
||||
return total_loss
|
||||
|
||||
def _get_loss_mask(self, masks, gt_mask, match_indices, num_gts,
|
||||
postfix=""):
|
||||
# masks: [b, query, h, w], gt_mask: list[[n, H, W]]
|
||||
name_mask = "loss_mask" + postfix
|
||||
name_dice = "loss_dice" + postfix
|
||||
|
||||
loss = dict()
|
||||
if sum(len(a) for a in gt_mask) == 0:
|
||||
loss[name_mask] = paddle.to_tensor([0.])
|
||||
loss[name_dice] = paddle.to_tensor([0.])
|
||||
return loss
|
||||
|
||||
src_masks, target_masks = self._get_src_target_assign(masks, gt_mask,
|
||||
match_indices)
|
||||
# sample points
|
||||
sample_points = self._get_point_coords_by_uncertainty(src_masks)
|
||||
sample_points = 2.0 * sample_points.unsqueeze(1) - 1.0
|
||||
|
||||
src_masks = F.grid_sample(
|
||||
src_masks.unsqueeze(1), sample_points,
|
||||
align_corners=False).squeeze([1, 2])
|
||||
|
||||
target_masks = F.grid_sample(
|
||||
target_masks.unsqueeze(1), sample_points,
|
||||
align_corners=False).squeeze([1, 2]).detach()
|
||||
|
||||
loss[name_mask] = self.loss_coeff[
|
||||
'mask'] * F.binary_cross_entropy_with_logits(
|
||||
src_masks, target_masks,
|
||||
reduction='none').mean(1).sum() / num_gts
|
||||
loss[name_dice] = self.loss_coeff['dice'] * self._dice_loss(
|
||||
src_masks, target_masks, num_gts)
|
||||
return loss
|
||||
|
||||
def _get_point_coords_by_uncertainty(self, masks):
|
||||
# Sample points based on their uncertainty.
|
||||
masks = masks.detach()
|
||||
num_masks = masks.shape[0]
|
||||
sample_points = paddle.rand(
|
||||
[num_masks, 1, self.num_oversample_points, 2])
|
||||
|
||||
out_mask = F.grid_sample(
|
||||
masks.unsqueeze(1), 2.0 * sample_points - 1.0,
|
||||
align_corners=False).squeeze([1, 2])
|
||||
out_mask = -paddle.abs(out_mask)
|
||||
|
||||
_, topk_ind = paddle.topk(out_mask, self.num_important_points, axis=1)
|
||||
batch_ind = paddle.arange(end=num_masks, dtype=topk_ind.dtype)
|
||||
batch_ind = batch_ind.unsqueeze(-1).tile([1, self.num_important_points])
|
||||
topk_ind = paddle.stack([batch_ind, topk_ind], axis=-1)
|
||||
|
||||
sample_points = paddle.gather_nd(sample_points.squeeze(1), topk_ind)
|
||||
if self.num_random_points > 0:
|
||||
sample_points = paddle.concat(
|
||||
[
|
||||
sample_points,
|
||||
paddle.rand([num_masks, self.num_random_points, 2])
|
||||
],
|
||||
axis=1)
|
||||
return sample_points
|
||||
41
paddle_detection/ppdet/modeling/losses/fairmot_loss.py
Normal file
41
paddle_detection/ppdet/modeling/losses/fairmot_loss.py
Normal file
@@ -0,0 +1,41 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
from paddle.nn.initializer import Constant
|
||||
from ppdet.core.workspace import register
|
||||
|
||||
__all__ = ['FairMOTLoss']
|
||||
|
||||
|
||||
@register
|
||||
class FairMOTLoss(nn.Layer):
|
||||
def __init__(self):
|
||||
super(FairMOTLoss, self).__init__()
|
||||
self.det_weight = self.create_parameter(
|
||||
shape=[1], default_initializer=Constant(-1.85))
|
||||
self.reid_weight = self.create_parameter(
|
||||
shape=[1], default_initializer=Constant(-1.05))
|
||||
|
||||
def forward(self, det_loss, reid_loss):
|
||||
loss = paddle.exp(-self.det_weight) * det_loss + paddle.exp(
|
||||
-self.reid_weight) * reid_loss + (self.det_weight + self.reid_weight
|
||||
)
|
||||
loss *= 0.5
|
||||
return {'loss': loss}
|
||||
1020
paddle_detection/ppdet/modeling/losses/fcos_loss.py
Normal file
1020
paddle_detection/ppdet/modeling/losses/fcos_loss.py
Normal file
File diff suppressed because it is too large
Load Diff
138
paddle_detection/ppdet/modeling/losses/focal_loss.py
Normal file
138
paddle_detection/ppdet/modeling/losses/focal_loss.py
Normal file
@@ -0,0 +1,138 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn.functional as F
|
||||
import paddle.nn as nn
|
||||
from ppdet.core.workspace import register
|
||||
|
||||
__all__ = ['FocalLoss', 'Weighted_FocalLoss']
|
||||
|
||||
@register
|
||||
class FocalLoss(nn.Layer):
|
||||
"""A wrapper around paddle.nn.functional.sigmoid_focal_loss.
|
||||
Args:
|
||||
use_sigmoid (bool): currently only support use_sigmoid=True
|
||||
alpha (float): parameter alpha in Focal Loss
|
||||
gamma (float): parameter gamma in Focal Loss
|
||||
loss_weight (float): final loss will be multiplied by this
|
||||
"""
|
||||
def __init__(self,
|
||||
use_sigmoid=True,
|
||||
alpha=0.25,
|
||||
gamma=2.0,
|
||||
loss_weight=1.0):
|
||||
super(FocalLoss, self).__init__()
|
||||
assert use_sigmoid == True, \
|
||||
'Focal Loss only supports sigmoid at the moment'
|
||||
self.use_sigmoid = use_sigmoid
|
||||
self.alpha = alpha
|
||||
self.gamma = gamma
|
||||
self.loss_weight = loss_weight
|
||||
|
||||
def forward(self, pred, target, reduction='none'):
|
||||
"""forward function.
|
||||
Args:
|
||||
pred (Tensor): logits of class prediction, of shape (N, num_classes)
|
||||
target (Tensor): target class label, of shape (N, )
|
||||
reduction (str): the way to reduce loss, one of (none, sum, mean)
|
||||
"""
|
||||
num_classes = pred.shape[1]
|
||||
target = F.one_hot(target, num_classes+1).cast(pred.dtype)
|
||||
target = target[:, :-1].detach()
|
||||
loss = F.sigmoid_focal_loss(
|
||||
pred, target, alpha=self.alpha, gamma=self.gamma,
|
||||
reduction=reduction)
|
||||
return loss * self.loss_weight
|
||||
|
||||
|
||||
@register
|
||||
class Weighted_FocalLoss(FocalLoss):
|
||||
"""A wrapper around paddle.nn.functional.sigmoid_focal_loss.
|
||||
Args:
|
||||
use_sigmoid (bool): currently only support use_sigmoid=True
|
||||
alpha (float): parameter alpha in Focal Loss
|
||||
gamma (float): parameter gamma in Focal Loss
|
||||
loss_weight (float): final loss will be multiplied by this
|
||||
"""
|
||||
def __init__(self,
|
||||
use_sigmoid=True,
|
||||
alpha=0.25,
|
||||
gamma=2.0,
|
||||
loss_weight=1.0,
|
||||
reduction="mean"):
|
||||
super(FocalLoss, self).__init__()
|
||||
assert use_sigmoid == True, \
|
||||
'Focal Loss only supports sigmoid at the moment'
|
||||
self.use_sigmoid = use_sigmoid
|
||||
self.alpha = alpha
|
||||
self.gamma = gamma
|
||||
self.loss_weight = loss_weight
|
||||
self.reduction = reduction
|
||||
|
||||
def forward(self, pred, target, weight=None, avg_factor=None, reduction_override=None):
|
||||
"""forward function.
|
||||
Args:
|
||||
pred (Tensor): logits of class prediction, of shape (N, num_classes)
|
||||
target (Tensor): target class label, of shape (N, )
|
||||
reduction (str): the way to reduce loss, one of (none, sum, mean)
|
||||
"""
|
||||
assert reduction_override in (None, 'none', 'mean', 'sum')
|
||||
reduction = (
|
||||
reduction_override if reduction_override else self.reduction)
|
||||
num_classes = pred.shape[1]
|
||||
target = F.one_hot(target, num_classes + 1).astype(pred.dtype)
|
||||
target = target[:, :-1].detach()
|
||||
loss = F.sigmoid_focal_loss(
|
||||
pred, target, alpha=self.alpha, gamma=self.gamma,
|
||||
reduction='none')
|
||||
|
||||
if weight is not None:
|
||||
if weight.shape != loss.shape:
|
||||
if weight.shape[0] == loss.shape[0]:
|
||||
# For most cases, weight is of shape (num_priors, ),
|
||||
# which means it does not have the second axis num_class
|
||||
weight = weight.reshape((-1, 1))
|
||||
else:
|
||||
# Sometimes, weight per anchor per class is also needed. e.g.
|
||||
# in FSAF. But it may be flattened of shape
|
||||
# (num_priors x num_class, ), while loss is still of shape
|
||||
# (num_priors, num_class).
|
||||
assert weight.numel() == loss.numel()
|
||||
weight = weight.reshape((loss.shape[0], -1))
|
||||
assert weight.ndim == loss.ndim
|
||||
loss = loss * weight
|
||||
|
||||
# if avg_factor is not specified, just reduce the loss
|
||||
if avg_factor is None:
|
||||
if reduction == 'mean':
|
||||
loss = loss.mean()
|
||||
elif reduction == 'sum':
|
||||
loss = loss.sum()
|
||||
else:
|
||||
# if reduction is mean, then average the loss by avg_factor
|
||||
if reduction == 'mean':
|
||||
# Avoid causing ZeroDivisionError when avg_factor is 0.0,
|
||||
# i.e., all labels of an image belong to ignore index.
|
||||
eps = 1e-10
|
||||
loss = loss.sum() / (avg_factor + eps)
|
||||
# if reduction is 'none', then do nothing, otherwise raise an error
|
||||
elif reduction != 'none':
|
||||
raise ValueError('avg_factor can not be used with reduction="sum"')
|
||||
|
||||
return loss * self.loss_weight
|
||||
217
paddle_detection/ppdet/modeling/losses/gfocal_loss.py
Normal file
217
paddle_detection/ppdet/modeling/losses/gfocal_loss.py
Normal file
@@ -0,0 +1,217 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# The code is based on:
|
||||
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/losses/gfocal_loss.py
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from ppdet.modeling import ops
|
||||
|
||||
__all__ = ['QualityFocalLoss', 'DistributionFocalLoss']
|
||||
|
||||
|
||||
def quality_focal_loss(pred, target, beta=2.0, use_sigmoid=True):
|
||||
"""
|
||||
Quality Focal Loss (QFL) is from `Generalized Focal Loss: Learning
|
||||
Qualified and Distributed Bounding Boxes for Dense Object Detection
|
||||
<https://arxiv.org/abs/2006.04388>`_.
|
||||
Args:
|
||||
pred (Tensor): Predicted joint representation of classification
|
||||
and quality (IoU) estimation with shape (N, C), C is the number of
|
||||
classes.
|
||||
target (tuple([Tensor])): Target category label with shape (N,)
|
||||
and target quality label with shape (N,).
|
||||
beta (float): The beta parameter for calculating the modulating factor.
|
||||
Defaults to 2.0.
|
||||
Returns:
|
||||
Tensor: Loss tensor with shape (N,).
|
||||
"""
|
||||
assert len(target) == 2, """target for QFL must be a tuple of two elements,
|
||||
including category label and quality label, respectively"""
|
||||
# label denotes the category id, score denotes the quality score
|
||||
label, score = target
|
||||
if use_sigmoid:
|
||||
func = F.binary_cross_entropy_with_logits
|
||||
else:
|
||||
func = F.binary_cross_entropy
|
||||
|
||||
# negatives are supervised by 0 quality score
|
||||
pred_sigmoid = F.sigmoid(pred) if use_sigmoid else pred
|
||||
scale_factor = pred_sigmoid
|
||||
zerolabel = paddle.zeros(pred.shape, dtype='float32')
|
||||
loss = func(pred, zerolabel, reduction='none') * scale_factor.pow(beta)
|
||||
|
||||
# FG cat_id: [0, num_classes -1], BG cat_id: num_classes
|
||||
bg_class_ind = pred.shape[1]
|
||||
pos = paddle.logical_and((label >= 0),
|
||||
(label < bg_class_ind)).nonzero().squeeze(1)
|
||||
if pos.shape[0] == 0:
|
||||
return loss.sum(axis=1)
|
||||
pos_label = paddle.gather(label, pos, axis=0)
|
||||
pos_mask = np.zeros(pred.shape, dtype=np.int32)
|
||||
pos_mask[pos.numpy(), pos_label.numpy()] = 1
|
||||
pos_mask = paddle.to_tensor(pos_mask, dtype='bool')
|
||||
score = score.unsqueeze(-1).expand([-1, pred.shape[1]]).cast('float32')
|
||||
# positives are supervised by bbox quality (IoU) score
|
||||
scale_factor_new = score - pred_sigmoid
|
||||
|
||||
loss_pos = func(
|
||||
pred, score, reduction='none') * scale_factor_new.abs().pow(beta)
|
||||
loss = loss * paddle.logical_not(pos_mask) + loss_pos * pos_mask
|
||||
loss = loss.sum(axis=1)
|
||||
return loss
|
||||
|
||||
|
||||
def distribution_focal_loss(pred, label):
|
||||
"""Distribution Focal Loss (DFL) is from `Generalized Focal Loss: Learning
|
||||
Qualified and Distributed Bounding Boxes for Dense Object Detection
|
||||
<https://arxiv.org/abs/2006.04388>`_.
|
||||
Args:
|
||||
pred (Tensor): Predicted general distribution of bounding boxes
|
||||
(before softmax) with shape (N, n+1), n is the max value of the
|
||||
integral set `{0, ..., n}` in paper.
|
||||
label (Tensor): Target distance label for bounding boxes with
|
||||
shape (N,).
|
||||
Returns:
|
||||
Tensor: Loss tensor with shape (N,).
|
||||
"""
|
||||
dis_left = label.cast('int64')
|
||||
dis_right = dis_left + 1
|
||||
weight_left = dis_right.cast('float32') - label
|
||||
weight_right = label - dis_left.cast('float32')
|
||||
loss = F.cross_entropy(pred, dis_left, reduction='none') * weight_left \
|
||||
+ F.cross_entropy(pred, dis_right, reduction='none') * weight_right
|
||||
return loss
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class QualityFocalLoss(nn.Layer):
|
||||
r"""Quality Focal Loss (QFL) is a variant of `Generalized Focal Loss:
|
||||
Learning Qualified and Distributed Bounding Boxes for Dense Object
|
||||
Detection <https://arxiv.org/abs/2006.04388>`_.
|
||||
Args:
|
||||
use_sigmoid (bool): Whether sigmoid operation is conducted in QFL.
|
||||
Defaults to True.
|
||||
beta (float): The beta parameter for calculating the modulating factor.
|
||||
Defaults to 2.0.
|
||||
reduction (str): Options are "none", "mean" and "sum".
|
||||
loss_weight (float): Loss weight of current loss.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
use_sigmoid=True,
|
||||
beta=2.0,
|
||||
reduction='mean',
|
||||
loss_weight=1.0):
|
||||
super(QualityFocalLoss, self).__init__()
|
||||
self.use_sigmoid = use_sigmoid
|
||||
self.beta = beta
|
||||
assert reduction in ('none', 'mean', 'sum')
|
||||
self.reduction = reduction
|
||||
self.loss_weight = loss_weight
|
||||
|
||||
def forward(self, pred, target, weight=None, avg_factor=None):
|
||||
"""Forward function.
|
||||
Args:
|
||||
pred (Tensor): Predicted joint representation of
|
||||
classification and quality (IoU) estimation with shape (N, C),
|
||||
C is the number of classes.
|
||||
target (tuple([Tensor])): Target category label with shape
|
||||
(N,) and target quality label with shape (N,).
|
||||
weight (Tensor, optional): The weight of loss for each
|
||||
prediction. Defaults to None.
|
||||
avg_factor (int, optional): Average factor that is used to average
|
||||
the loss. Defaults to None.
|
||||
"""
|
||||
|
||||
loss = self.loss_weight * quality_focal_loss(
|
||||
pred, target, beta=self.beta, use_sigmoid=self.use_sigmoid)
|
||||
|
||||
if weight is not None:
|
||||
loss = loss * weight
|
||||
if avg_factor is None:
|
||||
if self.reduction == 'none':
|
||||
return loss
|
||||
elif self.reduction == 'mean':
|
||||
return loss.mean()
|
||||
elif self.reduction == 'sum':
|
||||
return loss.sum()
|
||||
else:
|
||||
# if reduction is mean, then average the loss by avg_factor
|
||||
if self.reduction == 'mean':
|
||||
loss = loss.sum() / avg_factor
|
||||
# if reduction is 'none', then do nothing, otherwise raise an error
|
||||
elif self.reduction != 'none':
|
||||
raise ValueError(
|
||||
'avg_factor can not be used with reduction="sum"')
|
||||
return loss
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class DistributionFocalLoss(nn.Layer):
|
||||
"""Distribution Focal Loss (DFL) is a variant of `Generalized Focal Loss:
|
||||
Learning Qualified and Distributed Bounding Boxes for Dense Object
|
||||
Detection <https://arxiv.org/abs/2006.04388>`_.
|
||||
Args:
|
||||
reduction (str): Options are `'none'`, `'mean'` and `'sum'`.
|
||||
loss_weight (float): Loss weight of current loss.
|
||||
"""
|
||||
|
||||
def __init__(self, reduction='mean', loss_weight=1.0):
|
||||
super(DistributionFocalLoss, self).__init__()
|
||||
assert reduction in ('none', 'mean', 'sum')
|
||||
self.reduction = reduction
|
||||
self.loss_weight = loss_weight
|
||||
|
||||
def forward(self, pred, target, weight=None, avg_factor=None):
|
||||
"""Forward function.
|
||||
Args:
|
||||
pred (Tensor): Predicted general distribution of bounding
|
||||
boxes (before softmax) with shape (N, n+1), n is the max value
|
||||
of the integral set `{0, ..., n}` in paper.
|
||||
target (Tensor): Target distance label for bounding boxes
|
||||
with shape (N,).
|
||||
weight (Tensor, optional): The weight of loss for each
|
||||
prediction. Defaults to None.
|
||||
avg_factor (int, optional): Average factor that is used to average
|
||||
the loss. Defaults to None.
|
||||
"""
|
||||
loss = self.loss_weight * distribution_focal_loss(pred, target)
|
||||
if weight is not None:
|
||||
loss = loss * weight
|
||||
if avg_factor is None:
|
||||
if self.reduction == 'none':
|
||||
return loss
|
||||
elif self.reduction == 'mean':
|
||||
return loss.mean()
|
||||
elif self.reduction == 'sum':
|
||||
return loss.sum()
|
||||
else:
|
||||
# if reduction is mean, then average the loss by avg_factor
|
||||
if self.reduction == 'mean':
|
||||
loss = loss.sum() / avg_factor
|
||||
# if reduction is 'none', then do nothing, otherwise raise an error
|
||||
elif self.reduction != 'none':
|
||||
raise ValueError(
|
||||
'avg_factor can not be used with reduction="sum"')
|
||||
return loss
|
||||
47
paddle_detection/ppdet/modeling/losses/iou_aware_loss.py
Normal file
47
paddle_detection/ppdet/modeling/losses/iou_aware_loss.py
Normal file
@@ -0,0 +1,47 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle.nn.functional as F
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from .iou_loss import IouLoss
|
||||
from ..bbox_utils import bbox_iou
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class IouAwareLoss(IouLoss):
|
||||
"""
|
||||
iou aware loss, see https://arxiv.org/abs/1912.05992
|
||||
Args:
|
||||
loss_weight (float): iou aware loss weight, default is 1.0
|
||||
max_height (int): max height of input to support random shape input
|
||||
max_width (int): max width of input to support random shape input
|
||||
"""
|
||||
|
||||
def __init__(self, loss_weight=1.0, giou=False, diou=False, ciou=False):
|
||||
super(IouAwareLoss, self).__init__(
|
||||
loss_weight=loss_weight, giou=giou, diou=diou, ciou=ciou)
|
||||
|
||||
def __call__(self, ioup, pbox, gbox):
|
||||
iou = bbox_iou(
|
||||
pbox, gbox, giou=self.giou, diou=self.diou, ciou=self.ciou)
|
||||
iou.stop_gradient = True
|
||||
loss_iou_aware = F.binary_cross_entropy_with_logits(
|
||||
ioup, iou, reduction='none')
|
||||
loss_iou_aware = loss_iou_aware * self.loss_weight
|
||||
return loss_iou_aware
|
||||
295
paddle_detection/ppdet/modeling/losses/iou_loss.py
Normal file
295
paddle_detection/ppdet/modeling/losses/iou_loss.py
Normal file
@@ -0,0 +1,295 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import math
|
||||
import paddle
|
||||
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from ..bbox_utils import bbox_iou
|
||||
|
||||
__all__ = ['IouLoss', 'GIoULoss', 'DIouLoss', 'SIoULoss']
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class IouLoss(object):
|
||||
"""
|
||||
iou loss, see https://arxiv.org/abs/1908.03851
|
||||
loss = 1.0 - iou * iou
|
||||
Args:
|
||||
loss_weight (float): iou loss weight, default is 2.5
|
||||
max_height (int): max height of input to support random shape input
|
||||
max_width (int): max width of input to support random shape input
|
||||
ciou_term (bool): whether to add ciou_term
|
||||
loss_square (bool): whether to square the iou term
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
loss_weight=2.5,
|
||||
giou=False,
|
||||
diou=False,
|
||||
ciou=False,
|
||||
loss_square=True):
|
||||
self.loss_weight = loss_weight
|
||||
self.giou = giou
|
||||
self.diou = diou
|
||||
self.ciou = ciou
|
||||
self.loss_square = loss_square
|
||||
|
||||
def __call__(self, pbox, gbox):
|
||||
iou = bbox_iou(
|
||||
pbox, gbox, giou=self.giou, diou=self.diou, ciou=self.ciou)
|
||||
if self.loss_square:
|
||||
loss_iou = 1 - iou * iou
|
||||
else:
|
||||
loss_iou = 1 - iou
|
||||
|
||||
loss_iou = loss_iou * self.loss_weight
|
||||
return loss_iou
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class GIoULoss(object):
|
||||
"""
|
||||
Generalized Intersection over Union, see https://arxiv.org/abs/1902.09630
|
||||
Args:
|
||||
loss_weight (float): giou loss weight, default as 1
|
||||
eps (float): epsilon to avoid divide by zero, default as 1e-10
|
||||
reduction (string): Options are "none", "mean" and "sum". default as none
|
||||
"""
|
||||
|
||||
def __init__(self, loss_weight=1., eps=1e-10, reduction='none'):
|
||||
self.loss_weight = loss_weight
|
||||
self.eps = eps
|
||||
assert reduction in ('none', 'mean', 'sum')
|
||||
self.reduction = reduction
|
||||
|
||||
def bbox_overlap(self, box1, box2, eps=1e-10):
|
||||
"""calculate the iou of box1 and box2
|
||||
Args:
|
||||
box1 (Tensor): box1 with the shape (..., 4)
|
||||
box2 (Tensor): box1 with the shape (..., 4)
|
||||
eps (float): epsilon to avoid divide by zero
|
||||
Return:
|
||||
iou (Tensor): iou of box1 and box2
|
||||
overlap (Tensor): overlap of box1 and box2
|
||||
union (Tensor): union of box1 and box2
|
||||
"""
|
||||
x1, y1, x2, y2 = box1
|
||||
x1g, y1g, x2g, y2g = box2
|
||||
|
||||
xkis1 = paddle.maximum(x1, x1g)
|
||||
ykis1 = paddle.maximum(y1, y1g)
|
||||
xkis2 = paddle.minimum(x2, x2g)
|
||||
ykis2 = paddle.minimum(y2, y2g)
|
||||
w_inter = (xkis2 - xkis1).clip(0)
|
||||
h_inter = (ykis2 - ykis1).clip(0)
|
||||
overlap = w_inter * h_inter
|
||||
|
||||
area1 = (x2 - x1) * (y2 - y1)
|
||||
area2 = (x2g - x1g) * (y2g - y1g)
|
||||
union = area1 + area2 - overlap + eps
|
||||
iou = overlap / union
|
||||
|
||||
return iou, overlap, union
|
||||
|
||||
def __call__(self, pbox, gbox, iou_weight=1., loc_reweight=None):
|
||||
x1, y1, x2, y2 = paddle.split(pbox, num_or_sections=4, axis=-1)
|
||||
x1g, y1g, x2g, y2g = paddle.split(gbox, num_or_sections=4, axis=-1)
|
||||
box1 = [x1, y1, x2, y2]
|
||||
box2 = [x1g, y1g, x2g, y2g]
|
||||
iou, overlap, union = self.bbox_overlap(box1, box2, self.eps)
|
||||
xc1 = paddle.minimum(x1, x1g)
|
||||
yc1 = paddle.minimum(y1, y1g)
|
||||
xc2 = paddle.maximum(x2, x2g)
|
||||
yc2 = paddle.maximum(y2, y2g)
|
||||
|
||||
area_c = (xc2 - xc1) * (yc2 - yc1) + self.eps
|
||||
miou = iou - ((area_c - union) / area_c)
|
||||
if loc_reweight is not None:
|
||||
loc_reweight = paddle.reshape(loc_reweight, shape=(-1, 1))
|
||||
loc_thresh = 0.9
|
||||
giou = 1 - (1 - loc_thresh
|
||||
) * miou - loc_thresh * miou * loc_reweight
|
||||
else:
|
||||
giou = 1 - miou
|
||||
if self.reduction == 'none':
|
||||
loss = giou
|
||||
elif self.reduction == 'sum':
|
||||
loss = paddle.sum(giou * iou_weight)
|
||||
else:
|
||||
loss = paddle.mean(giou * iou_weight)
|
||||
return loss * self.loss_weight
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class DIouLoss(GIoULoss):
|
||||
"""
|
||||
Distance-IoU Loss, see https://arxiv.org/abs/1911.08287
|
||||
Args:
|
||||
loss_weight (float): giou loss weight, default as 1
|
||||
eps (float): epsilon to avoid divide by zero, default as 1e-10
|
||||
use_complete_iou_loss (bool): whether to use complete iou loss
|
||||
"""
|
||||
|
||||
def __init__(self, loss_weight=1., eps=1e-10, use_complete_iou_loss=True):
|
||||
super(DIouLoss, self).__init__(loss_weight=loss_weight, eps=eps)
|
||||
self.use_complete_iou_loss = use_complete_iou_loss
|
||||
|
||||
def __call__(self, pbox, gbox, iou_weight=1.):
|
||||
x1, y1, x2, y2 = paddle.split(pbox, num_or_sections=4, axis=-1)
|
||||
x1g, y1g, x2g, y2g = paddle.split(gbox, num_or_sections=4, axis=-1)
|
||||
cx = (x1 + x2) / 2
|
||||
cy = (y1 + y2) / 2
|
||||
w = x2 - x1
|
||||
h = y2 - y1
|
||||
|
||||
cxg = (x1g + x2g) / 2
|
||||
cyg = (y1g + y2g) / 2
|
||||
wg = x2g - x1g
|
||||
hg = y2g - y1g
|
||||
|
||||
x2 = paddle.maximum(x1, x2)
|
||||
y2 = paddle.maximum(y1, y2)
|
||||
|
||||
# A and B
|
||||
xkis1 = paddle.maximum(x1, x1g)
|
||||
ykis1 = paddle.maximum(y1, y1g)
|
||||
xkis2 = paddle.minimum(x2, x2g)
|
||||
ykis2 = paddle.minimum(y2, y2g)
|
||||
|
||||
# A or B
|
||||
xc1 = paddle.minimum(x1, x1g)
|
||||
yc1 = paddle.minimum(y1, y1g)
|
||||
xc2 = paddle.maximum(x2, x2g)
|
||||
yc2 = paddle.maximum(y2, y2g)
|
||||
|
||||
intsctk = (xkis2 - xkis1) * (ykis2 - ykis1)
|
||||
intsctk = intsctk * paddle.greater_than(
|
||||
xkis2, xkis1) * paddle.greater_than(ykis2, ykis1)
|
||||
unionk = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g
|
||||
) - intsctk + self.eps
|
||||
iouk = intsctk / unionk
|
||||
|
||||
# DIOU term
|
||||
dist_intersection = (cx - cxg) * (cx - cxg) + (cy - cyg) * (cy - cyg)
|
||||
dist_union = (xc2 - xc1) * (xc2 - xc1) + (yc2 - yc1) * (yc2 - yc1)
|
||||
diou_term = (dist_intersection + self.eps) / (dist_union + self.eps)
|
||||
|
||||
# CIOU term
|
||||
ciou_term = 0
|
||||
if self.use_complete_iou_loss:
|
||||
ar_gt = wg / hg
|
||||
ar_pred = w / h
|
||||
arctan = paddle.atan(ar_gt) - paddle.atan(ar_pred)
|
||||
ar_loss = 4. / np.pi / np.pi * arctan * arctan
|
||||
alpha = ar_loss / (1 - iouk + ar_loss + self.eps)
|
||||
alpha.stop_gradient = True
|
||||
ciou_term = alpha * ar_loss
|
||||
|
||||
diou = paddle.mean((1 - iouk + ciou_term + diou_term) * iou_weight)
|
||||
|
||||
return diou * self.loss_weight
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class SIoULoss(GIoULoss):
|
||||
"""
|
||||
see https://arxiv.org/pdf/2205.12740.pdf
|
||||
Args:
|
||||
loss_weight (float): siou loss weight, default as 1
|
||||
eps (float): epsilon to avoid divide by zero, default as 1e-10
|
||||
theta (float): default as 4
|
||||
reduction (str): Options are "none", "mean" and "sum". default as none
|
||||
"""
|
||||
|
||||
def __init__(self, loss_weight=1., eps=1e-10, theta=4., reduction='none'):
|
||||
super(SIoULoss, self).__init__(loss_weight=loss_weight, eps=eps)
|
||||
self.loss_weight = loss_weight
|
||||
self.eps = eps
|
||||
self.theta = theta
|
||||
self.reduction = reduction
|
||||
|
||||
def __call__(self, pbox, gbox):
|
||||
x1, y1, x2, y2 = paddle.split(pbox, num_or_sections=4, axis=-1)
|
||||
x1g, y1g, x2g, y2g = paddle.split(gbox, num_or_sections=4, axis=-1)
|
||||
|
||||
box1 = [x1, y1, x2, y2]
|
||||
box2 = [x1g, y1g, x2g, y2g]
|
||||
iou = bbox_iou(box1, box2)
|
||||
|
||||
cx = (x1 + x2) / 2
|
||||
cy = (y1 + y2) / 2
|
||||
w = x2 - x1 + self.eps
|
||||
h = y2 - y1 + self.eps
|
||||
|
||||
cxg = (x1g + x2g) / 2
|
||||
cyg = (y1g + y2g) / 2
|
||||
wg = x2g - x1g + self.eps
|
||||
hg = y2g - y1g + self.eps
|
||||
|
||||
x2 = paddle.maximum(x1, x2)
|
||||
y2 = paddle.maximum(y1, y2)
|
||||
|
||||
# A or B
|
||||
xc1 = paddle.minimum(x1, x1g)
|
||||
yc1 = paddle.minimum(y1, y1g)
|
||||
xc2 = paddle.maximum(x2, x2g)
|
||||
yc2 = paddle.maximum(y2, y2g)
|
||||
|
||||
cw_out = xc2 - xc1
|
||||
ch_out = yc2 - yc1
|
||||
|
||||
ch = paddle.maximum(cy, cyg) - paddle.minimum(cy, cyg)
|
||||
cw = paddle.maximum(cx, cxg) - paddle.minimum(cx, cxg)
|
||||
|
||||
# angle cost
|
||||
dist_intersection = paddle.sqrt((cx - cxg)**2 + (cy - cyg)**2)
|
||||
sin_angle_alpha = ch / dist_intersection
|
||||
sin_angle_beta = cw / dist_intersection
|
||||
thred = paddle.pow(paddle.to_tensor(2), 0.5) / 2
|
||||
thred.stop_gradient = True
|
||||
sin_alpha = paddle.where(sin_angle_alpha > thred, sin_angle_beta,
|
||||
sin_angle_alpha)
|
||||
angle_cost = paddle.cos(paddle.asin(sin_alpha) * 2 - math.pi / 2)
|
||||
|
||||
# distance cost
|
||||
gamma = 2 - angle_cost
|
||||
# gamma.stop_gradient = True
|
||||
beta_x = ((cxg - cx) / cw_out)**2
|
||||
beta_y = ((cyg - cy) / ch_out)**2
|
||||
dist_cost = 1 - paddle.exp(-gamma * beta_x) + 1 - paddle.exp(-gamma *
|
||||
beta_y)
|
||||
|
||||
# shape cost
|
||||
omega_w = paddle.abs(w - wg) / paddle.maximum(w, wg)
|
||||
omega_h = paddle.abs(hg - h) / paddle.maximum(h, hg)
|
||||
omega = (1 - paddle.exp(-omega_w))**self.theta + (
|
||||
1 - paddle.exp(-omega_h))**self.theta
|
||||
siou_loss = 1 - iou + (omega + dist_cost) / 2
|
||||
|
||||
if self.reduction == 'mean':
|
||||
siou_loss = paddle.mean(siou_loss)
|
||||
elif self.reduction == 'sum':
|
||||
siou_loss = paddle.sum(siou_loss)
|
||||
|
||||
return siou_loss * self.loss_weight
|
||||
193
paddle_detection/ppdet/modeling/losses/jde_loss.py
Normal file
193
paddle_detection/ppdet/modeling/losses/jde_loss.py
Normal file
@@ -0,0 +1,193 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from ppdet.core.workspace import register
|
||||
|
||||
__all__ = ['JDEDetectionLoss', 'JDEEmbeddingLoss', 'JDELoss']
|
||||
|
||||
|
||||
@register
|
||||
class JDEDetectionLoss(nn.Layer):
|
||||
__shared__ = ['num_classes']
|
||||
|
||||
def __init__(self, num_classes=1, for_mot=True):
|
||||
super(JDEDetectionLoss, self).__init__()
|
||||
self.num_classes = num_classes
|
||||
self.for_mot = for_mot
|
||||
|
||||
def det_loss(self, p_det, anchor, t_conf, t_box):
|
||||
pshape = paddle.shape(p_det)
|
||||
pshape.stop_gradient = True
|
||||
nB, nGh, nGw = pshape[0], pshape[-2], pshape[-1]
|
||||
nA = len(anchor)
|
||||
p_det = paddle.reshape(
|
||||
p_det, [nB, nA, self.num_classes + 5, nGh, nGw]).transpose(
|
||||
(0, 1, 3, 4, 2))
|
||||
|
||||
# 1. loss_conf: cross_entropy
|
||||
p_conf = p_det[:, :, :, :, 4:6]
|
||||
p_conf_flatten = paddle.reshape(p_conf, [-1, 2])
|
||||
t_conf_flatten = t_conf.flatten()
|
||||
t_conf_flatten = paddle.cast(t_conf_flatten, dtype="int64")
|
||||
t_conf_flatten.stop_gradient = True
|
||||
loss_conf = F.cross_entropy(
|
||||
p_conf_flatten, t_conf_flatten, ignore_index=-1, reduction='mean')
|
||||
loss_conf.stop_gradient = False
|
||||
|
||||
# 2. loss_box: smooth_l1_loss
|
||||
p_box = p_det[:, :, :, :, :4]
|
||||
p_box_flatten = paddle.reshape(p_box, [-1, 4])
|
||||
t_box_flatten = paddle.reshape(t_box, [-1, 4])
|
||||
fg_inds = paddle.nonzero(t_conf_flatten > 0).flatten()
|
||||
if fg_inds.numel() > 0:
|
||||
reg_delta = paddle.gather(p_box_flatten, fg_inds)
|
||||
reg_target = paddle.gather(t_box_flatten, fg_inds)
|
||||
else:
|
||||
reg_delta = paddle.to_tensor([0, 0, 0, 0], dtype='float32')
|
||||
reg_delta.stop_gradient = False
|
||||
reg_target = paddle.to_tensor([0, 0, 0, 0], dtype='float32')
|
||||
reg_target.stop_gradient = True
|
||||
loss_box = F.smooth_l1_loss(
|
||||
reg_delta, reg_target, reduction='mean', delta=1.0)
|
||||
loss_box.stop_gradient = False
|
||||
|
||||
return loss_conf, loss_box
|
||||
|
||||
def forward(self, det_outs, targets, anchors):
|
||||
"""
|
||||
Args:
|
||||
det_outs (list[Tensor]): output from detection head, each one
|
||||
is a 4-D Tensor with shape [N, C, H, W].
|
||||
targets (dict): contains 'im_id', 'gt_bbox', 'gt_ide', 'image',
|
||||
'im_shape', 'scale_factor' and 'tbox', 'tconf', 'tide' of
|
||||
each FPN level.
|
||||
anchors (list[list]): anchor setting of JDE model, N row M col, N is
|
||||
the anchor levels(FPN levels), M is the anchor scales each
|
||||
level.
|
||||
"""
|
||||
assert len(det_outs) == len(anchors)
|
||||
loss_confs = []
|
||||
loss_boxes = []
|
||||
for i, (p_det, anchor) in enumerate(zip(det_outs, anchors)):
|
||||
t_conf = targets['tconf{}'.format(i)]
|
||||
t_box = targets['tbox{}'.format(i)]
|
||||
|
||||
loss_conf, loss_box = self.det_loss(p_det, anchor, t_conf, t_box)
|
||||
loss_confs.append(loss_conf)
|
||||
loss_boxes.append(loss_box)
|
||||
if self.for_mot:
|
||||
return {'loss_confs': loss_confs, 'loss_boxes': loss_boxes}
|
||||
else:
|
||||
jde_conf_losses = sum(loss_confs)
|
||||
jde_box_losses = sum(loss_boxes)
|
||||
jde_det_losses = {
|
||||
"loss_conf": jde_conf_losses,
|
||||
"loss_box": jde_box_losses,
|
||||
"loss": jde_conf_losses + jde_box_losses,
|
||||
}
|
||||
return jde_det_losses
|
||||
|
||||
|
||||
@register
|
||||
class JDEEmbeddingLoss(nn.Layer):
|
||||
def __init__(self, ):
|
||||
super(JDEEmbeddingLoss, self).__init__()
|
||||
self.phony = self.create_parameter(shape=[1], dtype="float32")
|
||||
|
||||
def emb_loss(self, p_ide, t_conf, t_ide, emb_scale, classifier):
|
||||
emb_dim = p_ide.shape[1]
|
||||
p_ide = p_ide.transpose((0, 2, 3, 1))
|
||||
p_ide_flatten = paddle.reshape(p_ide, [-1, emb_dim])
|
||||
mask = t_conf > 0
|
||||
mask = paddle.cast(mask, dtype="int64")
|
||||
mask.stop_gradient = True
|
||||
emb_mask = mask.max(1).flatten()
|
||||
emb_mask_inds = paddle.nonzero(emb_mask > 0).flatten()
|
||||
emb_mask_inds.stop_gradient = True
|
||||
# use max(1) to decide the id, TODO: more reseanable strategy
|
||||
t_ide_flatten = t_ide.max(1).flatten()
|
||||
t_ide_flatten = paddle.cast(t_ide_flatten, dtype="int64")
|
||||
valid_inds = paddle.nonzero(t_ide_flatten != -1).flatten()
|
||||
|
||||
if emb_mask_inds.numel() == 0 or valid_inds.numel() == 0:
|
||||
# loss_ide = paddle.to_tensor([0]) # will be error in gradient backward
|
||||
loss_ide = self.phony * 0 # todo
|
||||
else:
|
||||
embedding = paddle.gather(p_ide_flatten, emb_mask_inds)
|
||||
embedding = emb_scale * F.normalize(embedding)
|
||||
logits = classifier(embedding)
|
||||
|
||||
ide_target = paddle.gather(t_ide_flatten, emb_mask_inds)
|
||||
|
||||
loss_ide = F.cross_entropy(
|
||||
logits, ide_target, ignore_index=-1, reduction='mean')
|
||||
loss_ide.stop_gradient = False
|
||||
|
||||
return loss_ide
|
||||
|
||||
def forward(self, ide_outs, targets, emb_scale, classifier):
|
||||
loss_ides = []
|
||||
for i, p_ide in enumerate(ide_outs):
|
||||
t_conf = targets['tconf{}'.format(i)]
|
||||
t_ide = targets['tide{}'.format(i)]
|
||||
|
||||
loss_ide = self.emb_loss(p_ide, t_conf, t_ide, emb_scale,
|
||||
classifier)
|
||||
loss_ides.append(loss_ide)
|
||||
return loss_ides
|
||||
|
||||
|
||||
@register
|
||||
class JDELoss(nn.Layer):
|
||||
def __init__(self):
|
||||
super(JDELoss, self).__init__()
|
||||
|
||||
def forward(self, loss_confs, loss_boxes, loss_ides, loss_params_cls,
|
||||
loss_params_reg, loss_params_ide, targets):
|
||||
assert len(loss_confs) == len(loss_boxes) == len(loss_ides)
|
||||
assert len(loss_params_cls) == len(loss_params_reg) == len(
|
||||
loss_params_ide)
|
||||
assert len(loss_confs) == len(loss_params_cls)
|
||||
|
||||
batchsize = targets['gt_bbox'].shape[0]
|
||||
nTargets = paddle.nonzero(paddle.sum(targets['gt_bbox'], axis=2)).shape[
|
||||
0] / batchsize
|
||||
nTargets = paddle.to_tensor(nTargets, dtype='float32')
|
||||
nTargets.stop_gradient = True
|
||||
|
||||
jde_losses = []
|
||||
for i, (loss_conf, loss_box, loss_ide, l_conf_p, l_box_p,
|
||||
l_ide_p) in enumerate(
|
||||
zip(loss_confs, loss_boxes, loss_ides, loss_params_cls,
|
||||
loss_params_reg, loss_params_ide)):
|
||||
|
||||
jde_loss = l_conf_p(loss_conf) + l_box_p(loss_box) + l_ide_p(
|
||||
loss_ide)
|
||||
jde_losses.append(jde_loss)
|
||||
|
||||
loss_all = {
|
||||
"loss_conf": sum(loss_confs),
|
||||
"loss_box": sum(loss_boxes),
|
||||
"loss_ide": sum(loss_ides),
|
||||
"loss": sum(jde_losses),
|
||||
"nTargets": nTargets,
|
||||
}
|
||||
return loss_all
|
||||
632
paddle_detection/ppdet/modeling/losses/keypoint_loss.py
Normal file
632
paddle_detection/ppdet/modeling/losses/keypoint_loss.py
Normal file
@@ -0,0 +1,632 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from itertools import cycle, islice
|
||||
from collections import abc
|
||||
import numpy as np
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
|
||||
from ppdet.core.workspace import register, serializable
|
||||
|
||||
__all__ = ['HrHRNetLoss', 'KeyPointMSELoss', 'OKSLoss', 'CenterFocalLoss', 'L1Loss']
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class KeyPointMSELoss(nn.Layer):
|
||||
def __init__(self, use_target_weight=True, loss_scale=0.5):
|
||||
"""
|
||||
KeyPointMSELoss layer
|
||||
|
||||
Args:
|
||||
use_target_weight (bool): whether to use target weight
|
||||
"""
|
||||
super(KeyPointMSELoss, self).__init__()
|
||||
self.criterion = nn.MSELoss(reduction='mean')
|
||||
self.use_target_weight = use_target_weight
|
||||
self.loss_scale = loss_scale
|
||||
|
||||
def forward(self, output, records):
|
||||
target = records['target']
|
||||
target_weight = records['target_weight']
|
||||
batch_size = output.shape[0]
|
||||
num_joints = output.shape[1]
|
||||
heatmaps_pred = output.reshape(
|
||||
(batch_size, num_joints, -1)).split(num_joints, 1)
|
||||
heatmaps_gt = target.reshape(
|
||||
(batch_size, num_joints, -1)).split(num_joints, 1)
|
||||
loss = 0
|
||||
for idx in range(num_joints):
|
||||
heatmap_pred = heatmaps_pred[idx].squeeze()
|
||||
heatmap_gt = heatmaps_gt[idx].squeeze()
|
||||
if self.use_target_weight:
|
||||
loss += self.loss_scale * self.criterion(
|
||||
heatmap_pred.multiply(target_weight[:, idx]),
|
||||
heatmap_gt.multiply(target_weight[:, idx]))
|
||||
else:
|
||||
loss += self.loss_scale * self.criterion(heatmap_pred,
|
||||
heatmap_gt)
|
||||
keypoint_losses = dict()
|
||||
keypoint_losses['loss'] = loss / num_joints
|
||||
return keypoint_losses
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class HrHRNetLoss(nn.Layer):
|
||||
def __init__(self, num_joints, swahr):
|
||||
"""
|
||||
HrHRNetLoss layer
|
||||
|
||||
Args:
|
||||
num_joints (int): number of keypoints
|
||||
"""
|
||||
super(HrHRNetLoss, self).__init__()
|
||||
if swahr:
|
||||
self.heatmaploss = HeatMapSWAHRLoss(num_joints)
|
||||
else:
|
||||
self.heatmaploss = HeatMapLoss()
|
||||
self.aeloss = AELoss()
|
||||
self.ziploss = ZipLoss(
|
||||
[self.heatmaploss, self.heatmaploss, self.aeloss])
|
||||
|
||||
def forward(self, inputs, records):
|
||||
targets = []
|
||||
targets.append([records['heatmap_gt1x'], records['mask_1x']])
|
||||
targets.append([records['heatmap_gt2x'], records['mask_2x']])
|
||||
targets.append(records['tagmap'])
|
||||
keypoint_losses = dict()
|
||||
loss = self.ziploss(inputs, targets)
|
||||
keypoint_losses['heatmap_loss'] = loss[0] + loss[1]
|
||||
keypoint_losses['pull_loss'] = loss[2][0]
|
||||
keypoint_losses['push_loss'] = loss[2][1]
|
||||
keypoint_losses['loss'] = recursive_sum(loss)
|
||||
return keypoint_losses
|
||||
|
||||
|
||||
class HeatMapLoss(object):
|
||||
def __init__(self, loss_factor=1.0):
|
||||
super(HeatMapLoss, self).__init__()
|
||||
self.loss_factor = loss_factor
|
||||
|
||||
def __call__(self, preds, targets):
|
||||
heatmap, mask = targets
|
||||
loss = ((preds - heatmap)**2 * mask.cast('float').unsqueeze(1))
|
||||
loss = paddle.clip(loss, min=0, max=2).mean()
|
||||
loss *= self.loss_factor
|
||||
return loss
|
||||
|
||||
|
||||
class HeatMapSWAHRLoss(object):
|
||||
def __init__(self, num_joints, loss_factor=1.0):
|
||||
super(HeatMapSWAHRLoss, self).__init__()
|
||||
self.loss_factor = loss_factor
|
||||
self.num_joints = num_joints
|
||||
|
||||
def __call__(self, preds, targets):
|
||||
heatmaps_gt, mask = targets
|
||||
heatmaps_pred = preds[0]
|
||||
scalemaps_pred = preds[1]
|
||||
|
||||
heatmaps_scaled_gt = paddle.where(heatmaps_gt > 0, 0.5 * heatmaps_gt * (
|
||||
1 + (1 +
|
||||
(scalemaps_pred - 1.) * paddle.log(heatmaps_gt + 1e-10))**2),
|
||||
heatmaps_gt)
|
||||
|
||||
regularizer_loss = paddle.mean(
|
||||
paddle.pow((scalemaps_pred - 1.) * (heatmaps_gt > 0).astype(float),
|
||||
2))
|
||||
omiga = 0.01
|
||||
# thres = 2**(-1/omiga), threshold for positive weight
|
||||
hm_weight = heatmaps_scaled_gt**(
|
||||
omiga
|
||||
) * paddle.abs(1 - heatmaps_pred) + paddle.abs(heatmaps_pred) * (
|
||||
1 - heatmaps_scaled_gt**(omiga))
|
||||
|
||||
loss = (((heatmaps_pred - heatmaps_scaled_gt)**2) *
|
||||
mask.cast('float').unsqueeze(1)) * hm_weight
|
||||
loss = loss.mean()
|
||||
loss = self.loss_factor * (loss + 1.0 * regularizer_loss)
|
||||
return loss
|
||||
|
||||
|
||||
class AELoss(object):
|
||||
def __init__(self, pull_factor=0.001, push_factor=0.001):
|
||||
super(AELoss, self).__init__()
|
||||
self.pull_factor = pull_factor
|
||||
self.push_factor = push_factor
|
||||
|
||||
def apply_single(self, pred, tagmap):
|
||||
if tagmap.numpy()[:, :, 3].sum() == 0:
|
||||
return (paddle.zeros([1]), paddle.zeros([1]))
|
||||
nonzero = paddle.nonzero(tagmap[:, :, 3] > 0)
|
||||
if nonzero.shape[0] == 0:
|
||||
return (paddle.zeros([1]), paddle.zeros([1]))
|
||||
p_inds = paddle.unique(nonzero[:, 0])
|
||||
num_person = p_inds.shape[0]
|
||||
if num_person == 0:
|
||||
return (paddle.zeros([1]), paddle.zeros([1]))
|
||||
|
||||
pull = 0
|
||||
tagpull_num = 0
|
||||
embs_all = []
|
||||
person_unvalid = 0
|
||||
for person_idx in p_inds.numpy():
|
||||
valid_single = tagmap[person_idx.item()]
|
||||
validkpts = paddle.nonzero(valid_single[:, 3] > 0)
|
||||
valid_single = paddle.index_select(valid_single, validkpts)
|
||||
emb = paddle.gather_nd(pred, valid_single[:, :3])
|
||||
if emb.shape[0] == 1:
|
||||
person_unvalid += 1
|
||||
mean = paddle.mean(emb, axis=0)
|
||||
embs_all.append(mean)
|
||||
pull += paddle.mean(paddle.pow(emb - mean, 2), axis=0)
|
||||
tagpull_num += emb.shape[0]
|
||||
pull /= max(num_person - person_unvalid, 1)
|
||||
if num_person < 2:
|
||||
return pull, paddle.zeros([1])
|
||||
|
||||
embs_all = paddle.stack(embs_all)
|
||||
A = embs_all.expand([num_person, num_person])
|
||||
B = A.transpose([1, 0])
|
||||
diff = A - B
|
||||
|
||||
diff = paddle.pow(diff, 2)
|
||||
push = paddle.exp(-diff)
|
||||
push = paddle.sum(push) - num_person
|
||||
|
||||
push /= 2 * num_person * (num_person - 1)
|
||||
return pull, push
|
||||
|
||||
def __call__(self, preds, tagmaps):
|
||||
bs = preds.shape[0]
|
||||
losses = [
|
||||
self.apply_single(preds[i:i + 1].squeeze(),
|
||||
tagmaps[i:i + 1].squeeze()) for i in range(bs)
|
||||
]
|
||||
pull = self.pull_factor * sum(loss[0] for loss in losses) / len(losses)
|
||||
push = self.push_factor * sum(loss[1] for loss in losses) / len(losses)
|
||||
return pull, push
|
||||
|
||||
|
||||
class ZipLoss(object):
|
||||
def __init__(self, loss_funcs):
|
||||
super(ZipLoss, self).__init__()
|
||||
self.loss_funcs = loss_funcs
|
||||
|
||||
def __call__(self, inputs, targets):
|
||||
assert len(self.loss_funcs) == len(targets) >= len(inputs)
|
||||
|
||||
def zip_repeat(*args):
|
||||
longest = max(map(len, args))
|
||||
filled = [islice(cycle(x), longest) for x in args]
|
||||
return zip(*filled)
|
||||
|
||||
return tuple(
|
||||
fn(x, y)
|
||||
for x, y, fn in zip_repeat(inputs, targets, self.loss_funcs))
|
||||
|
||||
|
||||
def recursive_sum(inputs):
|
||||
if isinstance(inputs, abc.Sequence):
|
||||
return sum([recursive_sum(x) for x in inputs])
|
||||
return inputs
|
||||
|
||||
|
||||
def oks_overlaps(kpt_preds, kpt_gts, kpt_valids, kpt_areas, sigmas):
|
||||
if not kpt_gts.astype('bool').any():
|
||||
return kpt_preds.sum()*0
|
||||
|
||||
sigmas = paddle.to_tensor(sigmas, dtype=kpt_preds.dtype)
|
||||
variances = (sigmas * 2)**2
|
||||
|
||||
assert kpt_preds.shape[0] == kpt_gts.shape[0]
|
||||
kpt_preds = kpt_preds.reshape((-1, kpt_preds.shape[-1] // 2, 2))
|
||||
kpt_gts = kpt_gts.reshape((-1, kpt_gts.shape[-1] // 2, 2))
|
||||
|
||||
squared_distance = (kpt_preds[:, :, 0] - kpt_gts[:, :, 0]) ** 2 + \
|
||||
(kpt_preds[:, :, 1] - kpt_gts[:, :, 1]) ** 2
|
||||
assert (kpt_valids.sum(-1) > 0).all()
|
||||
squared_distance0 = squared_distance / (
|
||||
kpt_areas[:, None] * variances[None, :] * 2)
|
||||
squared_distance1 = paddle.exp(-squared_distance0)
|
||||
squared_distance1 = squared_distance1 * kpt_valids
|
||||
oks = squared_distance1.sum(axis=1) / kpt_valids.sum(axis=1)
|
||||
|
||||
return oks
|
||||
|
||||
|
||||
def oks_loss(pred,
|
||||
target,
|
||||
weight,
|
||||
valid=None,
|
||||
area=None,
|
||||
linear=False,
|
||||
sigmas=None,
|
||||
eps=1e-6,
|
||||
avg_factor=None,
|
||||
reduction=None):
|
||||
"""Oks loss.
|
||||
|
||||
Computing the oks loss between a set of predicted poses and target poses.
|
||||
The loss is calculated as negative log of oks.
|
||||
|
||||
Args:
|
||||
pred (Tensor): Predicted poses of format (x1, y1, x2, y2, ...),
|
||||
shape (n, K*2).
|
||||
target (Tensor): Corresponding gt poses, shape (n, K*2).
|
||||
linear (bool, optional): If True, use linear scale of loss instead of
|
||||
log scale. Default: False.
|
||||
eps (float): Eps to avoid log(0).
|
||||
|
||||
Returns:
|
||||
Tensor: Loss tensor.
|
||||
"""
|
||||
oks = oks_overlaps(pred, target, valid, area, sigmas).clip(min=eps)
|
||||
if linear:
|
||||
loss = 1 - oks
|
||||
else:
|
||||
loss = -oks.log()
|
||||
|
||||
if weight is not None:
|
||||
if weight.shape != loss.shape:
|
||||
if weight.shape[0] == loss.shape[0]:
|
||||
# For most cases, weight is of shape (num_priors, ),
|
||||
# which means it does not have the second axis num_class
|
||||
weight = weight.reshape((-1, 1))
|
||||
else:
|
||||
# Sometimes, weight per anchor per class is also needed. e.g.
|
||||
# in FSAF. But it may be flattened of shape
|
||||
# (num_priors x num_class, ), while loss is still of shape
|
||||
# (num_priors, num_class).
|
||||
assert weight.numel() == loss.numel()
|
||||
weight = weight.reshape((loss.shape[0], -1))
|
||||
assert weight.ndim == loss.ndim
|
||||
loss = loss * weight
|
||||
|
||||
# if avg_factor is not specified, just reduce the loss
|
||||
if avg_factor is None:
|
||||
if reduction == 'mean':
|
||||
loss = loss.mean()
|
||||
elif reduction == 'sum':
|
||||
loss = loss.sum()
|
||||
else:
|
||||
# if reduction is mean, then average the loss by avg_factor
|
||||
if reduction == 'mean':
|
||||
# Avoid causing ZeroDivisionError when avg_factor is 0.0,
|
||||
# i.e., all labels of an image belong to ignore index.
|
||||
eps = 1e-10
|
||||
loss = loss.sum() / (avg_factor + eps)
|
||||
# if reduction is 'none', then do nothing, otherwise raise an error
|
||||
elif reduction != 'none':
|
||||
raise ValueError('avg_factor can not be used with reduction="sum"')
|
||||
|
||||
|
||||
return loss
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class OKSLoss(nn.Layer):
|
||||
"""OKSLoss.
|
||||
|
||||
Computing the oks loss between a set of predicted poses and target poses.
|
||||
|
||||
Args:
|
||||
linear (bool): If True, use linear scale of loss instead of log scale.
|
||||
Default: False.
|
||||
eps (float): Eps to avoid log(0).
|
||||
reduction (str): Options are "none", "mean" and "sum".
|
||||
loss_weight (float): Weight of loss.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
linear=False,
|
||||
num_keypoints=17,
|
||||
eps=1e-6,
|
||||
reduction='mean',
|
||||
loss_weight=1.0):
|
||||
super(OKSLoss, self).__init__()
|
||||
self.linear = linear
|
||||
self.eps = eps
|
||||
self.reduction = reduction
|
||||
self.loss_weight = loss_weight
|
||||
if num_keypoints == 17:
|
||||
self.sigmas = np.array([
|
||||
.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07,
|
||||
1.07, .87, .87, .89, .89
|
||||
], dtype=np.float32) / 10.0
|
||||
elif num_keypoints == 14:
|
||||
self.sigmas = np.array([
|
||||
.79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89,
|
||||
.79, .79
|
||||
]) / 10.0
|
||||
else:
|
||||
raise ValueError(f'Unsupported keypoints number {num_keypoints}')
|
||||
|
||||
def forward(self,
|
||||
pred,
|
||||
target,
|
||||
valid,
|
||||
area,
|
||||
weight=None,
|
||||
avg_factor=None,
|
||||
reduction_override=None,
|
||||
**kwargs):
|
||||
"""Forward function.
|
||||
|
||||
Args:
|
||||
pred (Tensor): The prediction.
|
||||
target (Tensor): The learning target of the prediction.
|
||||
valid (Tensor): The visible flag of the target pose.
|
||||
area (Tensor): The area of the target pose.
|
||||
weight (Tensor, optional): The weight of loss for each
|
||||
prediction. Defaults to None.
|
||||
avg_factor (int, optional): Average factor that is used to average
|
||||
the loss. Defaults to None.
|
||||
reduction_override (str, optional): The reduction method used to
|
||||
override the original reduction method of the loss.
|
||||
Defaults to None. Options are "none", "mean" and "sum".
|
||||
"""
|
||||
assert reduction_override in (None, 'none', 'mean', 'sum')
|
||||
reduction = (
|
||||
reduction_override if reduction_override else self.reduction)
|
||||
if (weight is not None) and (not paddle.any(weight > 0)) and (
|
||||
reduction != 'none'):
|
||||
if pred.dim() == weight.dim() + 1:
|
||||
weight = weight.unsqueeze(1)
|
||||
return (pred * weight).sum() # 0
|
||||
if weight is not None and weight.dim() > 1:
|
||||
# TODO: remove this in the future
|
||||
# reduce the weight of shape (n, 4) to (n,) to match the
|
||||
# iou_loss of shape (n,)
|
||||
assert weight.shape == pred.shape
|
||||
weight = weight.mean(-1)
|
||||
loss = self.loss_weight * oks_loss(
|
||||
pred,
|
||||
target,
|
||||
weight,
|
||||
valid=valid,
|
||||
area=area,
|
||||
linear=self.linear,
|
||||
sigmas=self.sigmas,
|
||||
eps=self.eps,
|
||||
reduction=reduction,
|
||||
avg_factor=avg_factor,
|
||||
**kwargs)
|
||||
return loss
|
||||
|
||||
|
||||
def center_focal_loss(pred, gt, weight=None, mask=None, avg_factor=None, reduction=None):
|
||||
"""Modified focal loss. Exactly the same as CornerNet.
|
||||
Runs faster and costs a little bit more memory.
|
||||
|
||||
Args:
|
||||
pred (Tensor): The prediction with shape [bs, c, h, w].
|
||||
gt (Tensor): The learning target of the prediction in gaussian
|
||||
distribution, with shape [bs, c, h, w].
|
||||
mask (Tensor): The valid mask. Defaults to None.
|
||||
"""
|
||||
if not gt.astype('bool').any():
|
||||
return pred.sum()*0
|
||||
pos_inds = gt.equal(1).astype('float32')
|
||||
if mask is None:
|
||||
neg_inds = gt.less_than(paddle.to_tensor([1], dtype='float32')).astype('float32')
|
||||
else:
|
||||
neg_inds = gt.less_than(paddle.to_tensor([1], dtype='float32')).astype('float32') * mask.equal(0).astype('float32')
|
||||
|
||||
neg_weights = paddle.pow(1 - gt, 4)
|
||||
|
||||
loss = 0
|
||||
|
||||
pos_loss = paddle.log(pred) * paddle.pow(1 - pred, 2) * pos_inds
|
||||
neg_loss = paddle.log(1 - pred) * paddle.pow(pred, 2) * neg_weights * \
|
||||
neg_inds
|
||||
|
||||
num_pos = pos_inds.astype('float32').sum()
|
||||
pos_loss = pos_loss.sum()
|
||||
neg_loss = neg_loss.sum()
|
||||
|
||||
if num_pos == 0:
|
||||
loss = loss - neg_loss
|
||||
else:
|
||||
loss = loss - (pos_loss + neg_loss) / num_pos
|
||||
|
||||
if weight is not None:
|
||||
if weight.shape != loss.shape:
|
||||
if weight.shape[0] == loss.shape[0]:
|
||||
# For most cases, weight is of shape (num_priors, ),
|
||||
# which means it does not have the second axis num_class
|
||||
weight = weight.reshape((-1, 1))
|
||||
else:
|
||||
# Sometimes, weight per anchor per class is also needed. e.g.
|
||||
# in FSAF. But it may be flattened of shape
|
||||
# (num_priors x num_class, ), while loss is still of shape
|
||||
# (num_priors, num_class).
|
||||
assert weight.numel() == loss.numel()
|
||||
weight = weight.reshape((loss.shape[0], -1))
|
||||
assert weight.ndim == loss.ndim
|
||||
loss = loss * weight
|
||||
|
||||
# if avg_factor is not specified, just reduce the loss
|
||||
if avg_factor is None:
|
||||
if reduction == 'mean':
|
||||
loss = loss.mean()
|
||||
elif reduction == 'sum':
|
||||
loss = loss.sum()
|
||||
else:
|
||||
# if reduction is mean, then average the loss by avg_factor
|
||||
if reduction == 'mean':
|
||||
# Avoid causing ZeroDivisionError when avg_factor is 0.0,
|
||||
# i.e., all labels of an image belong to ignore index.
|
||||
eps = 1e-10
|
||||
loss = loss.sum() / (avg_factor + eps)
|
||||
# if reduction is 'none', then do nothing, otherwise raise an error
|
||||
elif reduction != 'none':
|
||||
raise ValueError('avg_factor can not be used with reduction="sum"')
|
||||
|
||||
return loss
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class CenterFocalLoss(nn.Layer):
|
||||
"""CenterFocalLoss is a variant of focal loss.
|
||||
|
||||
More details can be found in the `paper
|
||||
<https://arxiv.org/abs/1808.01244>`_
|
||||
|
||||
Args:
|
||||
reduction (str): Options are "none", "mean" and "sum".
|
||||
loss_weight (float): Loss weight of current loss.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
reduction='none',
|
||||
loss_weight=1.0):
|
||||
super(CenterFocalLoss, self).__init__()
|
||||
self.reduction = reduction
|
||||
self.loss_weight = loss_weight
|
||||
|
||||
def forward(self,
|
||||
pred,
|
||||
target,
|
||||
weight=None,
|
||||
mask=None,
|
||||
avg_factor=None,
|
||||
reduction_override=None):
|
||||
"""Forward function.
|
||||
|
||||
Args:
|
||||
pred (Tensor): The prediction.
|
||||
target (Tensor): The learning target of the prediction in gaussian
|
||||
distribution.
|
||||
weight (Tensor, optional): The weight of loss for each
|
||||
prediction. Defaults to None.
|
||||
mask (Tensor): The valid mask. Defaults to None.
|
||||
avg_factor (int, optional): Average factor that is used to average
|
||||
the loss. Defaults to None.
|
||||
reduction_override (str, optional): The reduction method used to
|
||||
override the original reduction method of the loss.
|
||||
Defaults to None.
|
||||
"""
|
||||
assert reduction_override in (None, 'none', 'mean', 'sum')
|
||||
reduction = (
|
||||
reduction_override if reduction_override else self.reduction)
|
||||
loss_reg = self.loss_weight * center_focal_loss(
|
||||
pred,
|
||||
target,
|
||||
weight,
|
||||
mask=mask,
|
||||
reduction=reduction,
|
||||
avg_factor=avg_factor)
|
||||
return loss_reg
|
||||
|
||||
def l1_loss(pred, target, weight=None, reduction='mean', avg_factor=None):
|
||||
"""L1 loss.
|
||||
|
||||
Args:
|
||||
pred (Tensor): The prediction.
|
||||
target (Tensor): The learning target of the prediction.
|
||||
|
||||
Returns:
|
||||
Tensor: Calculated loss
|
||||
"""
|
||||
if not target.astype('bool').any():
|
||||
return pred.sum() * 0
|
||||
|
||||
assert pred.shape == target.shape
|
||||
loss = paddle.abs(pred - target)
|
||||
|
||||
if weight is not None:
|
||||
if weight.shape != loss.shape:
|
||||
if weight.shape[0] == loss.shape[0]:
|
||||
# For most cases, weight is of shape (num_priors, ),
|
||||
# which means it does not have the second axis num_class
|
||||
weight = weight.reshape((-1, 1))
|
||||
else:
|
||||
# Sometimes, weight per anchor per class is also needed. e.g.
|
||||
# in FSAF. But it may be flattened of shape
|
||||
# (num_priors x num_class, ), while loss is still of shape
|
||||
# (num_priors, num_class).
|
||||
assert weight.numel() == loss.numel()
|
||||
weight = weight.reshape((loss.shape[0], -1))
|
||||
assert weight.ndim == loss.ndim
|
||||
loss = loss * weight
|
||||
|
||||
# if avg_factor is not specified, just reduce the loss
|
||||
if avg_factor is None:
|
||||
if reduction == 'mean':
|
||||
loss = loss.mean()
|
||||
elif reduction == 'sum':
|
||||
loss = loss.sum()
|
||||
else:
|
||||
# if reduction is mean, then average the loss by avg_factor
|
||||
if reduction == 'mean':
|
||||
# Avoid causing ZeroDivisionError when avg_factor is 0.0,
|
||||
# i.e., all labels of an image belong to ignore index.
|
||||
eps = 1e-10
|
||||
loss = loss.sum() / (avg_factor + eps)
|
||||
# if reduction is 'none', then do nothing, otherwise raise an error
|
||||
elif reduction != 'none':
|
||||
raise ValueError('avg_factor can not be used with reduction="sum"')
|
||||
|
||||
|
||||
return loss
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class L1Loss(nn.Layer):
|
||||
"""L1 loss.
|
||||
|
||||
Args:
|
||||
reduction (str, optional): The method to reduce the loss.
|
||||
Options are "none", "mean" and "sum".
|
||||
loss_weight (float, optional): The weight of loss.
|
||||
"""
|
||||
|
||||
def __init__(self, reduction='mean', loss_weight=1.0):
|
||||
super(L1Loss, self).__init__()
|
||||
self.reduction = reduction
|
||||
self.loss_weight = loss_weight
|
||||
|
||||
def forward(self,
|
||||
pred,
|
||||
target,
|
||||
weight=None,
|
||||
avg_factor=None,
|
||||
reduction_override=None):
|
||||
"""Forward function.
|
||||
|
||||
Args:
|
||||
pred (Tensor): The prediction.
|
||||
target (Tensor): The learning target of the prediction.
|
||||
weight (Tensor, optional): The weight of loss for each
|
||||
prediction. Defaults to None.
|
||||
avg_factor (int, optional): Average factor that is used to average
|
||||
the loss. Defaults to None.
|
||||
reduction_override (str, optional): The reduction method used to
|
||||
override the original reduction method of the loss.
|
||||
Defaults to None.
|
||||
"""
|
||||
assert reduction_override in (None, 'none', 'mean', 'sum')
|
||||
reduction = (
|
||||
reduction_override if reduction_override else self.reduction)
|
||||
loss_bbox = self.loss_weight * l1_loss(
|
||||
pred, target, weight, reduction=reduction, avg_factor=avg_factor)
|
||||
return loss_bbox
|
||||
|
||||
250
paddle_detection/ppdet/modeling/losses/pose3d_loss.py
Normal file
250
paddle_detection/ppdet/modeling/losses/pose3d_loss.py
Normal file
@@ -0,0 +1,250 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from itertools import cycle, islice
|
||||
from collections import abc
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from ppdet.utils.logger import setup_logger
|
||||
logger = setup_logger('ppdet.engine')
|
||||
|
||||
__all__ = ['Pose3DLoss']
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class Pose3DLoss(nn.Layer):
|
||||
def __init__(self, weight_3d=1.0, weight_2d=0.0, reduction='none'):
|
||||
"""
|
||||
KeyPointMSELoss layer
|
||||
|
||||
Args:
|
||||
weight_3d (float): weight of 3d loss
|
||||
weight_2d (float): weight of 2d loss
|
||||
reduction (bool): whether use reduction to loss
|
||||
"""
|
||||
super(Pose3DLoss, self).__init__()
|
||||
self.weight_3d = weight_3d
|
||||
self.weight_2d = weight_2d
|
||||
self.criterion_2dpose = nn.MSELoss(reduction=reduction)
|
||||
self.criterion_3dpose = nn.L1Loss(reduction=reduction)
|
||||
self.criterion_smoothl1 = nn.SmoothL1Loss(
|
||||
reduction=reduction, delta=1.0)
|
||||
self.criterion_vertices = nn.L1Loss()
|
||||
|
||||
def forward(self, pred3d, pred2d, inputs):
|
||||
"""
|
||||
mpjpe: mpjpe loss between 3d joints
|
||||
keypoint_2d_loss: 2d joints loss compute by criterion_2dpose
|
||||
"""
|
||||
gt_3d_joints = inputs['joints_3d']
|
||||
gt_2d_joints = inputs['joints_2d']
|
||||
has_3d_joints = inputs['has_3d_joints']
|
||||
has_2d_joints = inputs['has_2d_joints']
|
||||
|
||||
loss_3d = mpjpe_focal(pred3d, gt_3d_joints, has_3d_joints)
|
||||
loss = self.weight_3d * loss_3d
|
||||
epoch = inputs['epoch_id']
|
||||
if self.weight_2d > 0:
|
||||
weight = self.weight_2d * pow(0.1, (epoch // 8))
|
||||
if epoch > 8:
|
||||
weight = 0
|
||||
loss_2d = keypoint_2d_loss(self.criterion_2dpose, pred2d,
|
||||
gt_2d_joints, has_2d_joints)
|
||||
loss += weight * loss_2d
|
||||
return loss
|
||||
|
||||
|
||||
def filter_3d_joints(pred, gt, has_3d_joints):
|
||||
"""
|
||||
filter 3d joints
|
||||
"""
|
||||
gt = gt[has_3d_joints == 1]
|
||||
gt = gt[:, :, :3]
|
||||
pred = pred[has_3d_joints == 1]
|
||||
|
||||
gt_pelvis = (gt[:, 2, :] + gt[:, 3, :]) / 2
|
||||
gt = gt - gt_pelvis[:, None, :]
|
||||
pred_pelvis = (pred[:, 2, :] + pred[:, 3, :]) / 2
|
||||
pred = pred - pred_pelvis[:, None, :]
|
||||
return pred, gt
|
||||
|
||||
|
||||
def mpjpe(pred, gt, has_3d_joints):
|
||||
"""
|
||||
mPJPE loss
|
||||
"""
|
||||
pred, gt = filter_3d_joints(pred, gt, has_3d_joints)
|
||||
error = paddle.sqrt((paddle.minimum((pred - gt), paddle.to_tensor(1.2))**2
|
||||
).sum(axis=-1)).mean()
|
||||
return error
|
||||
|
||||
|
||||
def mpjpe_focal(pred, gt, has_3d_joints):
|
||||
"""
|
||||
mPJPE loss
|
||||
"""
|
||||
pred, gt = filter_3d_joints(pred, gt, has_3d_joints)
|
||||
mse_error = ((pred - gt)**2).sum(axis=-1)
|
||||
mpjpe_error = paddle.sqrt(mse_error)
|
||||
mean = mpjpe_error.mean()
|
||||
std = mpjpe_error.std()
|
||||
atte = 2 * F.sigmoid(6 * (mpjpe_error - mean) / std)
|
||||
mse_error *= atte
|
||||
return mse_error.mean()
|
||||
|
||||
|
||||
def mpjpe_mse(pred, gt, has_3d_joints, weight=1.):
|
||||
"""
|
||||
mPJPE loss
|
||||
"""
|
||||
pred, gt = filter_3d_joints(pred, gt, has_3d_joints)
|
||||
error = (((pred - gt)**2).sum(axis=-1)).mean()
|
||||
return error
|
||||
|
||||
|
||||
def mpjpe_criterion(pred, gt, has_3d_joints, criterion_pose3d):
|
||||
"""
|
||||
mPJPE loss of self define criterion
|
||||
"""
|
||||
pred, gt = filter_3d_joints(pred, gt, has_3d_joints)
|
||||
error = paddle.sqrt(criterion_pose3d(pred, gt)).mean()
|
||||
return error
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
def weighted_mpjpe(pred, gt, has_3d_joints):
|
||||
"""
|
||||
Weighted_mPJPE
|
||||
"""
|
||||
pred, gt = filter_3d_joints(pred, gt, has_3d_joints)
|
||||
weight = paddle.linalg.norm(pred, p=2, axis=-1)
|
||||
weight = paddle.to_tensor(
|
||||
[1.5, 1.3, 1.2, 1.2, 1.3, 1.5, 1.5, 1.3, 1.2, 1.2, 1.3, 1.5, 1., 1.])
|
||||
error = (weight * paddle.linalg.norm(pred - gt, p=2, axis=-1)).mean()
|
||||
return error
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
def normed_mpjpe(pred, gt, has_3d_joints):
|
||||
"""
|
||||
Normalized MPJPE (scale only), adapted from:
|
||||
https://github.com/hrhodin/UnsupervisedGeometryAwareRepresentationLearning/blob/master/losses/poses.py
|
||||
"""
|
||||
assert pred.shape == gt.shape
|
||||
pred, gt = filter_3d_joints(pred, gt, has_3d_joints)
|
||||
|
||||
norm_predicted = paddle.mean(
|
||||
paddle.sum(pred**2, axis=3, keepdim=True), axis=2, keepdim=True)
|
||||
norm_target = paddle.mean(
|
||||
paddle.sum(gt * pred, axis=3, keepdim=True), axis=2, keepdim=True)
|
||||
scale = norm_target / norm_predicted
|
||||
return mpjpe(scale * pred, gt)
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
def mpjpe_np(pred, gt, has_3d_joints):
|
||||
"""
|
||||
mPJPE_NP
|
||||
"""
|
||||
pred, gt = filter_3d_joints(pred, gt, has_3d_joints)
|
||||
error = np.sqrt(((pred - gt)**2).sum(axis=-1)).mean()
|
||||
return error
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
def mean_per_vertex_error(pred, gt, has_smpl):
|
||||
"""
|
||||
Compute mPVE
|
||||
"""
|
||||
pred = pred[has_smpl == 1]
|
||||
gt = gt[has_smpl == 1]
|
||||
with paddle.no_grad():
|
||||
error = paddle.sqrt(((pred - gt)**2).sum(axis=-1)).mean()
|
||||
return error
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
def keypoint_2d_loss(criterion_keypoints, pred_keypoints_2d, gt_keypoints_2d,
|
||||
has_pose_2d):
|
||||
"""
|
||||
Compute 2D reprojection loss if 2D keypoint annotations are available.
|
||||
The confidence (conf) is binary and indicates whether the keypoints exist or not.
|
||||
"""
|
||||
conf = gt_keypoints_2d[:, :, -1].unsqueeze(-1).clone()
|
||||
loss = (conf * criterion_keypoints(
|
||||
pred_keypoints_2d, gt_keypoints_2d[:, :, :-1] * 0.001)).mean()
|
||||
return loss
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
def keypoint_3d_loss(criterion_keypoints, pred_keypoints_3d, gt_keypoints_3d,
|
||||
has_pose_3d):
|
||||
"""
|
||||
Compute 3D keypoint loss if 3D keypoint annotations are available.
|
||||
"""
|
||||
conf = gt_keypoints_3d[:, :, -1].unsqueeze(-1).clone()
|
||||
gt_keypoints_3d = gt_keypoints_3d[:, :, :-1].clone()
|
||||
gt_keypoints_3d = gt_keypoints_3d[has_pose_3d == 1]
|
||||
conf = conf[has_pose_3d == 1]
|
||||
pred_keypoints_3d = pred_keypoints_3d[has_pose_3d == 1]
|
||||
if len(gt_keypoints_3d) > 0:
|
||||
gt_pelvis = (gt_keypoints_3d[:, 2, :] + gt_keypoints_3d[:, 3, :]) / 2
|
||||
gt_keypoints_3d = gt_keypoints_3d - gt_pelvis[:, None, :]
|
||||
pred_pelvis = (
|
||||
pred_keypoints_3d[:, 2, :] + pred_keypoints_3d[:, 3, :]) / 2
|
||||
pred_keypoints_3d = pred_keypoints_3d - pred_pelvis[:, None, :]
|
||||
return (conf * criterion_keypoints(pred_keypoints_3d,
|
||||
gt_keypoints_3d)).mean()
|
||||
else:
|
||||
return paddle.to_tensor([1.]).fill_(0.)
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
def vertices_loss(criterion_vertices, pred_vertices, gt_vertices, has_smpl):
|
||||
"""
|
||||
Compute per-vertex loss if vertex annotations are available.
|
||||
"""
|
||||
pred_vertices_with_shape = pred_vertices[has_smpl == 1]
|
||||
gt_vertices_with_shape = gt_vertices[has_smpl == 1]
|
||||
if len(gt_vertices_with_shape) > 0:
|
||||
return criterion_vertices(pred_vertices_with_shape,
|
||||
gt_vertices_with_shape)
|
||||
else:
|
||||
return paddle.to_tensor([1.]).fill_(0.)
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
def rectify_pose(pose):
|
||||
pose = pose.copy()
|
||||
R_mod = cv2.Rodrigues(np.array([np.pi, 0, 0]))[0]
|
||||
R_root = cv2.Rodrigues(pose[:3])[0]
|
||||
new_root = R_root.dot(R_mod)
|
||||
pose[:3] = cv2.Rodrigues(new_root)[0].reshape(3)
|
||||
return pose
|
||||
104
paddle_detection/ppdet/modeling/losses/probiou_loss.py
Normal file
104
paddle_detection/ppdet/modeling/losses/probiou_loss.py
Normal file
@@ -0,0 +1,104 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
|
||||
import paddle
|
||||
import paddle.nn.functional as F
|
||||
|
||||
from ppdet.core.workspace import register, serializable
|
||||
|
||||
__all__ = ['ProbIoULoss']
|
||||
|
||||
|
||||
def gbb_form(boxes):
|
||||
xy, wh, angle = paddle.split(boxes, [2, 2, 1], axis=-1)
|
||||
return paddle.concat([xy, wh.pow(2) / 12., angle], axis=-1)
|
||||
|
||||
|
||||
def rotated_form(a_, b_, angles):
|
||||
cos_a = paddle.cos(angles)
|
||||
sin_a = paddle.sin(angles)
|
||||
a = a_ * paddle.pow(cos_a, 2) + b_ * paddle.pow(sin_a, 2)
|
||||
b = a_ * paddle.pow(sin_a, 2) + b_ * paddle.pow(cos_a, 2)
|
||||
c = (a_ - b_) * cos_a * sin_a
|
||||
return a, b, c
|
||||
|
||||
|
||||
def probiou_loss(pred, target, eps=1e-3, mode='l1'):
|
||||
"""
|
||||
pred -> a matrix [N,5](x,y,w,h,angle - in radians) containing ours predicted box ;in case of HBB angle == 0
|
||||
target -> a matrix [N,5](x,y,w,h,angle - in radians) containing ours target box ;in case of HBB angle == 0
|
||||
eps -> threshold to avoid infinite values
|
||||
mode -> ('l1' in [0,1] or 'l2' in [0,inf]) metrics according our paper
|
||||
|
||||
"""
|
||||
|
||||
gbboxes1 = gbb_form(pred)
|
||||
gbboxes2 = gbb_form(target)
|
||||
|
||||
x1, y1, a1_, b1_, c1_ = gbboxes1[:,
|
||||
0], gbboxes1[:,
|
||||
1], gbboxes1[:,
|
||||
2], gbboxes1[:,
|
||||
3], gbboxes1[:,
|
||||
4]
|
||||
x2, y2, a2_, b2_, c2_ = gbboxes2[:,
|
||||
0], gbboxes2[:,
|
||||
1], gbboxes2[:,
|
||||
2], gbboxes2[:,
|
||||
3], gbboxes2[:,
|
||||
4]
|
||||
|
||||
a1, b1, c1 = rotated_form(a1_, b1_, c1_)
|
||||
a2, b2, c2 = rotated_form(a2_, b2_, c2_)
|
||||
|
||||
t1 = 0.25 * ((a1 + a2) * (paddle.pow(y1 - y2, 2)) + (b1 + b2) * (paddle.pow(x1 - x2, 2))) + \
|
||||
0.5 * ((c1+c2)*(x2-x1)*(y1-y2))
|
||||
t2 = (a1 + a2) * (b1 + b2) - paddle.pow(c1 + c2, 2)
|
||||
t3_ = (a1 * b1 - c1 * c1) * (a2 * b2 - c2 * c2)
|
||||
t3 = 0.5 * paddle.log(t2 / (4 * paddle.sqrt(F.relu(t3_)) + eps))
|
||||
|
||||
B_d = (t1 / t2) + t3
|
||||
# B_d = t1 + t2 + t3
|
||||
|
||||
B_d = paddle.clip(B_d, min=eps, max=100.0)
|
||||
l1 = paddle.sqrt(1.0 - paddle.exp(-B_d) + eps)
|
||||
l_i = paddle.pow(l1, 2.0)
|
||||
l2 = -paddle.log(1.0 - l_i + eps)
|
||||
|
||||
if mode == 'l1':
|
||||
probiou = l1
|
||||
if mode == 'l2':
|
||||
probiou = l2
|
||||
|
||||
return probiou
|
||||
|
||||
|
||||
@serializable
|
||||
@register
|
||||
class ProbIoULoss(object):
|
||||
""" ProbIoU Loss, refer to https://arxiv.org/abs/2106.06072 for details """
|
||||
|
||||
def __init__(self, mode='l1', eps=1e-3):
|
||||
super(ProbIoULoss, self).__init__()
|
||||
self.mode = mode
|
||||
self.eps = eps
|
||||
|
||||
def __call__(self, pred_rboxes, assigned_rboxes):
|
||||
return probiou_loss(pred_rboxes, assigned_rboxes, self.eps, self.mode)
|
||||
175
paddle_detection/ppdet/modeling/losses/queryinst_loss.py
Normal file
175
paddle_detection/ppdet/modeling/losses/queryinst_loss.py
Normal file
@@ -0,0 +1,175 @@
|
||||
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn.functional as F
|
||||
|
||||
from ppdet.core.workspace import register
|
||||
from ppdet.modeling.losses.iou_loss import GIoULoss
|
||||
from .sparsercnn_loss import HungarianMatcher
|
||||
|
||||
__all__ = ['QueryInstLoss']
|
||||
|
||||
|
||||
@register
|
||||
class QueryInstLoss(object):
|
||||
__shared__ = ['num_classes']
|
||||
|
||||
def __init__(self,
|
||||
num_classes=80,
|
||||
focal_loss_alpha=0.25,
|
||||
focal_loss_gamma=2.0,
|
||||
class_weight=2.0,
|
||||
l1_weight=5.0,
|
||||
giou_weight=2.0,
|
||||
mask_weight=8.0):
|
||||
super(QueryInstLoss, self).__init__()
|
||||
|
||||
self.num_classes = num_classes
|
||||
self.focal_loss_alpha = focal_loss_alpha
|
||||
self.focal_loss_gamma = focal_loss_gamma
|
||||
self.loss_weights = {
|
||||
"loss_cls": class_weight,
|
||||
"loss_bbox": l1_weight,
|
||||
"loss_giou": giou_weight,
|
||||
"loss_mask": mask_weight
|
||||
}
|
||||
self.giou_loss = GIoULoss(eps=1e-6, reduction='sum')
|
||||
|
||||
self.matcher = HungarianMatcher(focal_loss_alpha, focal_loss_gamma,
|
||||
class_weight, l1_weight, giou_weight)
|
||||
|
||||
def loss_classes(self, class_logits, targets, indices, avg_factor):
|
||||
tgt_labels = paddle.full(
|
||||
class_logits.shape[:2], self.num_classes, dtype='int32')
|
||||
|
||||
if sum(len(v['labels']) for v in targets) > 0:
|
||||
tgt_classes = paddle.concat([
|
||||
paddle.gather(
|
||||
tgt['labels'], tgt_idx, axis=0)
|
||||
for tgt, (_, tgt_idx) in zip(targets, indices)
|
||||
])
|
||||
batch_idx, src_idx = self._get_src_permutation_idx(indices)
|
||||
for i, (batch_i, src_i) in enumerate(zip(batch_idx, src_idx)):
|
||||
tgt_labels[int(batch_i), int(src_i)] = tgt_classes[i]
|
||||
|
||||
tgt_labels = tgt_labels.flatten(0, 1).unsqueeze(-1)
|
||||
|
||||
tgt_labels_onehot = paddle.cast(
|
||||
tgt_labels == paddle.arange(0, self.num_classes), dtype='float32')
|
||||
tgt_labels_onehot.stop_gradient = True
|
||||
|
||||
src_logits = class_logits.flatten(0, 1)
|
||||
|
||||
loss_cls = F.sigmoid_focal_loss(
|
||||
src_logits,
|
||||
tgt_labels_onehot,
|
||||
alpha=self.focal_loss_alpha,
|
||||
gamma=self.focal_loss_gamma,
|
||||
reduction='sum') / avg_factor
|
||||
losses = {'loss_cls': loss_cls * self.loss_weights['loss_cls']}
|
||||
return losses
|
||||
|
||||
def loss_bboxes(self, bbox_pred, targets, indices, avg_factor):
|
||||
bboxes = paddle.concat([
|
||||
paddle.gather(
|
||||
src, src_idx, axis=0)
|
||||
for src, (src_idx, _) in zip(bbox_pred, indices)
|
||||
])
|
||||
|
||||
tgt_bboxes = paddle.concat([
|
||||
paddle.gather(
|
||||
tgt['boxes'], tgt_idx, axis=0)
|
||||
for tgt, (_, tgt_idx) in zip(targets, indices)
|
||||
])
|
||||
tgt_bboxes.stop_gradient = True
|
||||
|
||||
im_shapes = paddle.concat([tgt['img_whwh_tgt'] for tgt in targets])
|
||||
bboxes_norm = bboxes / im_shapes
|
||||
tgt_bboxes_norm = tgt_bboxes / im_shapes
|
||||
|
||||
loss_giou = self.giou_loss(bboxes, tgt_bboxes) / avg_factor
|
||||
loss_bbox = F.l1_loss(
|
||||
bboxes_norm, tgt_bboxes_norm, reduction='sum') / avg_factor
|
||||
losses = {
|
||||
'loss_bbox': loss_bbox * self.loss_weights['loss_bbox'],
|
||||
'loss_giou': loss_giou * self.loss_weights['loss_giou']
|
||||
}
|
||||
return losses
|
||||
|
||||
def loss_masks(self, pos_bbox_pred, mask_logits, targets, indices,
|
||||
avg_factor):
|
||||
tgt_segm = [
|
||||
paddle.gather(
|
||||
tgt['gt_segm'], tgt_idx, axis=0)
|
||||
for tgt, (_, tgt_idx) in zip(targets, indices)
|
||||
]
|
||||
|
||||
tgt_masks = []
|
||||
for i in range(len(indices)):
|
||||
gt_segm = tgt_segm[i].unsqueeze(1)
|
||||
if len(gt_segm) == 0:
|
||||
continue
|
||||
boxes = pos_bbox_pred[i]
|
||||
boxes[:, 0::2] = paddle.clip(
|
||||
boxes[:, 0::2], min=0, max=gt_segm.shape[3])
|
||||
boxes[:, 1::2] = paddle.clip(
|
||||
boxes[:, 1::2], min=0, max=gt_segm.shape[2])
|
||||
boxes_num = paddle.to_tensor([1] * len(boxes), dtype='int32')
|
||||
gt_mask = paddle.vision.ops.roi_align(
|
||||
gt_segm,
|
||||
boxes,
|
||||
boxes_num,
|
||||
output_size=mask_logits.shape[-2:],
|
||||
aligned=True)
|
||||
tgt_masks.append(gt_mask)
|
||||
tgt_masks = paddle.concat(tgt_masks).squeeze(1)
|
||||
tgt_masks = paddle.cast(tgt_masks >= 0.5, dtype='float32')
|
||||
tgt_masks.stop_gradient = True
|
||||
|
||||
tgt_labels = paddle.concat([
|
||||
paddle.gather(
|
||||
tgt['labels'], tgt_idx, axis=0)
|
||||
for tgt, (_, tgt_idx) in zip(targets, indices)
|
||||
])
|
||||
|
||||
mask_label = F.one_hot(tgt_labels, self.num_classes).unsqueeze([2, 3])
|
||||
mask_label = paddle.expand_as(mask_label, mask_logits)
|
||||
mask_label.stop_gradient = True
|
||||
|
||||
src_masks = paddle.gather_nd(mask_logits, paddle.nonzero(mask_label))
|
||||
shape = mask_logits.shape
|
||||
src_masks = paddle.reshape(src_masks, [shape[0], shape[2], shape[3]])
|
||||
src_masks = F.sigmoid(src_masks)
|
||||
|
||||
X = src_masks.flatten(1)
|
||||
Y = tgt_masks.flatten(1)
|
||||
inter = paddle.sum(X * Y, 1)
|
||||
union = paddle.sum(X * X, 1) + paddle.sum(Y * Y, 1)
|
||||
dice = (2 * inter) / (union + 2e-5)
|
||||
|
||||
loss_mask = (1 - dice).sum() / avg_factor
|
||||
losses = {'loss_mask': loss_mask * self.loss_weights['loss_mask']}
|
||||
return losses
|
||||
|
||||
@staticmethod
|
||||
def _get_src_permutation_idx(indices):
|
||||
batch_idx = paddle.concat(
|
||||
[paddle.full_like(src, i) for i, (src, _) in enumerate(indices)])
|
||||
src_idx = paddle.concat([src for (src, _) in indices])
|
||||
return batch_idx, src_idx
|
||||
60
paddle_detection/ppdet/modeling/losses/smooth_l1_loss.py
Normal file
60
paddle_detection/ppdet/modeling/losses/smooth_l1_loss.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from ppdet.core.workspace import register
|
||||
|
||||
__all__ = ['SmoothL1Loss']
|
||||
|
||||
@register
|
||||
class SmoothL1Loss(nn.Layer):
|
||||
"""Smooth L1 Loss.
|
||||
Args:
|
||||
beta (float): controls smooth region, it becomes L1 Loss when beta=0.0
|
||||
loss_weight (float): the final loss will be multiplied by this
|
||||
"""
|
||||
def __init__(self,
|
||||
beta=1.0,
|
||||
loss_weight=1.0):
|
||||
super(SmoothL1Loss, self).__init__()
|
||||
assert beta >= 0
|
||||
self.beta = beta
|
||||
self.loss_weight = loss_weight
|
||||
|
||||
def forward(self, pred, target, reduction='none'):
|
||||
"""forward function, based on fvcore.
|
||||
Args:
|
||||
pred (Tensor): prediction tensor
|
||||
target (Tensor): target tensor, pred.shape must be the same as target.shape
|
||||
reduction (str): the way to reduce loss, one of (none, sum, mean)
|
||||
"""
|
||||
assert reduction in ('none', 'sum', 'mean')
|
||||
target = target.detach()
|
||||
if self.beta < 1e-5:
|
||||
loss = paddle.abs(pred - target)
|
||||
else:
|
||||
n = paddle.abs(pred - target)
|
||||
cond = n < self.beta
|
||||
loss = paddle.where(cond, 0.5 * n ** 2 / self.beta, n - 0.5 * self.beta)
|
||||
if reduction == 'mean':
|
||||
loss = loss.mean() if loss.size > 0 else 0.0 * loss.sum()
|
||||
elif reduction == 'sum':
|
||||
loss = loss.sum()
|
||||
return loss * self.loss_weight
|
||||
101
paddle_detection/ppdet/modeling/losses/solov2_loss.py
Normal file
101
paddle_detection/ppdet/modeling/losses/solov2_loss.py
Normal file
@@ -0,0 +1,101 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn.functional as F
|
||||
from ppdet.core.workspace import register, serializable
|
||||
|
||||
__all__ = ['SOLOv2Loss']
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class SOLOv2Loss(object):
|
||||
"""
|
||||
SOLOv2Loss
|
||||
Args:
|
||||
ins_loss_weight (float): Weight of instance loss.
|
||||
focal_loss_gamma (float): Gamma parameter for focal loss.
|
||||
focal_loss_alpha (float): Alpha parameter for focal loss.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
ins_loss_weight=3.0,
|
||||
focal_loss_gamma=2.0,
|
||||
focal_loss_alpha=0.25):
|
||||
self.ins_loss_weight = ins_loss_weight
|
||||
self.focal_loss_gamma = focal_loss_gamma
|
||||
self.focal_loss_alpha = focal_loss_alpha
|
||||
|
||||
def _dice_loss(self, input, target):
|
||||
input = paddle.reshape(input, shape=(paddle.shape(input)[0], -1))
|
||||
target = paddle.reshape(target, shape=(paddle.shape(target)[0], -1))
|
||||
a = paddle.sum(input * target, axis=1)
|
||||
b = paddle.sum(input * input, axis=1) + 0.001
|
||||
c = paddle.sum(target * target, axis=1) + 0.001
|
||||
d = (2 * a) / (b + c)
|
||||
return 1 - d
|
||||
|
||||
def __call__(self, ins_pred_list, ins_label_list, cate_preds, cate_labels,
|
||||
num_ins):
|
||||
"""
|
||||
Get loss of network of SOLOv2.
|
||||
Args:
|
||||
ins_pred_list (list): Variable list of instance branch output.
|
||||
ins_label_list (list): List of instance labels pre batch.
|
||||
cate_preds (list): Concat Variable list of categroy branch output.
|
||||
cate_labels (list): Concat list of categroy labels pre batch.
|
||||
num_ins (int): Number of positive samples in a mini-batch.
|
||||
Returns:
|
||||
loss_ins (Variable): The instance loss Variable of SOLOv2 network.
|
||||
loss_cate (Variable): The category loss Variable of SOLOv2 network.
|
||||
"""
|
||||
|
||||
#1. Ues dice_loss to calculate instance loss
|
||||
loss_ins = []
|
||||
total_weights = paddle.zeros(shape=[1], dtype='float32')
|
||||
for input, target in zip(ins_pred_list, ins_label_list):
|
||||
if input is None:
|
||||
continue
|
||||
target = paddle.cast(target, 'float32')
|
||||
target = paddle.reshape(
|
||||
target,
|
||||
shape=[-1, paddle.shape(input)[-2], paddle.shape(input)[-1]])
|
||||
weights = paddle.cast(
|
||||
paddle.sum(target, axis=[1, 2]) > 0, 'float32')
|
||||
input = F.sigmoid(input)
|
||||
dice_out = paddle.multiply(self._dice_loss(input, target), weights)
|
||||
total_weights += paddle.sum(weights)
|
||||
loss_ins.append(dice_out)
|
||||
loss_ins = paddle.sum(paddle.concat(loss_ins)) / total_weights
|
||||
loss_ins = loss_ins * self.ins_loss_weight
|
||||
|
||||
#2. Ues sigmoid_focal_loss to calculate category loss
|
||||
# expand onehot labels
|
||||
num_classes = cate_preds.shape[-1]
|
||||
cate_labels_bin = F.one_hot(cate_labels, num_classes=num_classes + 1)
|
||||
cate_labels_bin = cate_labels_bin[:, 1:]
|
||||
|
||||
loss_cate = F.sigmoid_focal_loss(
|
||||
cate_preds,
|
||||
label=cate_labels_bin,
|
||||
normalizer=num_ins + 1.,
|
||||
gamma=self.focal_loss_gamma,
|
||||
alpha=self.focal_loss_alpha)
|
||||
|
||||
return loss_ins, loss_cate
|
||||
430
paddle_detection/ppdet/modeling/losses/sparsercnn_loss.py
Normal file
430
paddle_detection/ppdet/modeling/losses/sparsercnn_loss.py
Normal file
@@ -0,0 +1,430 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
This code is based on https://github.com/PeizeSun/SparseR-CNN/blob/main/projects/SparseRCNN/sparsercnn/loss.py
|
||||
Ths copyright of PeizeSun/SparseR-CNN is as follows:
|
||||
MIT License [see LICENSE for details]
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.metric import accuracy
|
||||
from ppdet.core.workspace import register
|
||||
from ppdet.modeling.losses.iou_loss import GIoULoss
|
||||
|
||||
__all__ = ["SparseRCNNLoss"]
|
||||
|
||||
|
||||
@register
|
||||
class SparseRCNNLoss(nn.Layer):
|
||||
""" This class computes the loss for SparseRCNN.
|
||||
The process happens in two steps:
|
||||
1) we compute hungarian assignment between ground truth boxes and the outputs of the model
|
||||
2) we supervise each pair of matched ground-truth / prediction (supervise class and box)
|
||||
"""
|
||||
__shared__ = ['num_classes']
|
||||
|
||||
def __init__(self,
|
||||
losses,
|
||||
focal_loss_alpha,
|
||||
focal_loss_gamma,
|
||||
num_classes=80,
|
||||
class_weight=2.,
|
||||
l1_weight=5.,
|
||||
giou_weight=2.):
|
||||
""" Create the criterion.
|
||||
Parameters:
|
||||
num_classes: number of object categories, omitting the special no-object category
|
||||
weight_dict: dict containing as key the names of the losses and as values their relative weight.
|
||||
losses: list of all the losses to be applied. See get_loss for list of available losses.
|
||||
matcher: module able to compute a matching between targets and proposals
|
||||
"""
|
||||
super().__init__()
|
||||
self.num_classes = num_classes
|
||||
weight_dict = {
|
||||
"loss_ce": class_weight,
|
||||
"loss_bbox": l1_weight,
|
||||
"loss_giou": giou_weight
|
||||
}
|
||||
self.weight_dict = weight_dict
|
||||
self.losses = losses
|
||||
self.giou_loss = GIoULoss(reduction="sum")
|
||||
|
||||
self.focal_loss_alpha = focal_loss_alpha
|
||||
self.focal_loss_gamma = focal_loss_gamma
|
||||
|
||||
self.matcher = HungarianMatcher(focal_loss_alpha, focal_loss_gamma,
|
||||
class_weight, l1_weight, giou_weight)
|
||||
|
||||
def loss_labels(self, outputs, targets, indices, num_boxes, log=True):
|
||||
"""Classification loss (NLL)
|
||||
targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes]
|
||||
"""
|
||||
assert 'pred_logits' in outputs
|
||||
src_logits = outputs['pred_logits']
|
||||
|
||||
idx = self._get_src_permutation_idx(indices)
|
||||
target_classes_o = paddle.concat([
|
||||
paddle.gather(
|
||||
t["labels"], J, axis=0) for t, (_, J) in zip(targets, indices)
|
||||
])
|
||||
target_classes = paddle.full(
|
||||
src_logits.shape[:2], self.num_classes, dtype="int32")
|
||||
for i, ind in enumerate(zip(idx[0], idx[1])):
|
||||
target_classes[int(ind[0]), int(ind[1])] = target_classes_o[i]
|
||||
target_classes.stop_gradient = True
|
||||
|
||||
src_logits = src_logits.flatten(start_axis=0, stop_axis=1)
|
||||
|
||||
# prepare one_hot target.
|
||||
target_classes = target_classes.flatten(start_axis=0, stop_axis=1)
|
||||
class_ids = paddle.arange(0, self.num_classes)
|
||||
labels = (target_classes.unsqueeze(-1) == class_ids).astype("float32")
|
||||
labels.stop_gradient = True
|
||||
|
||||
# comp focal loss.
|
||||
class_loss = sigmoid_focal_loss(
|
||||
src_logits,
|
||||
labels,
|
||||
alpha=self.focal_loss_alpha,
|
||||
gamma=self.focal_loss_gamma,
|
||||
reduction="sum", ) / num_boxes
|
||||
losses = {'loss_ce': class_loss}
|
||||
|
||||
if log:
|
||||
label_acc = target_classes_o.unsqueeze(-1)
|
||||
src_idx = [src for (src, _) in indices]
|
||||
|
||||
pred_list = []
|
||||
for i in range(outputs["pred_logits"].shape[0]):
|
||||
pred_list.append(
|
||||
paddle.gather(
|
||||
outputs["pred_logits"][i], src_idx[i], axis=0))
|
||||
|
||||
pred = F.sigmoid(paddle.concat(pred_list, axis=0))
|
||||
acc = accuracy(pred, label_acc.astype("int64"))
|
||||
losses["acc"] = acc
|
||||
|
||||
return losses
|
||||
|
||||
def loss_boxes(self, outputs, targets, indices, num_boxes):
|
||||
"""Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss
|
||||
targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]
|
||||
The target boxes are expected in format (center_x, center_y, w, h), normalized by the image size.
|
||||
"""
|
||||
assert 'pred_boxes' in outputs # [batch_size, num_proposals, 4]
|
||||
src_idx = [src for (src, _) in indices]
|
||||
src_boxes_list = []
|
||||
|
||||
for i in range(outputs["pred_boxes"].shape[0]):
|
||||
src_boxes_list.append(
|
||||
paddle.gather(
|
||||
outputs["pred_boxes"][i], src_idx[i], axis=0))
|
||||
|
||||
src_boxes = paddle.concat(src_boxes_list, axis=0)
|
||||
|
||||
target_boxes = paddle.concat(
|
||||
[
|
||||
paddle.gather(
|
||||
t['boxes'], I, axis=0)
|
||||
for t, (_, I) in zip(targets, indices)
|
||||
],
|
||||
axis=0)
|
||||
target_boxes.stop_gradient = True
|
||||
losses = {}
|
||||
|
||||
losses['loss_giou'] = self.giou_loss(src_boxes,
|
||||
target_boxes) / num_boxes
|
||||
|
||||
image_size = paddle.concat([v["img_whwh_tgt"] for v in targets])
|
||||
src_boxes_ = src_boxes / image_size
|
||||
target_boxes_ = target_boxes / image_size
|
||||
|
||||
loss_bbox = F.l1_loss(src_boxes_, target_boxes_, reduction='sum')
|
||||
losses['loss_bbox'] = loss_bbox / num_boxes
|
||||
|
||||
return losses
|
||||
|
||||
def _get_src_permutation_idx(self, indices):
|
||||
# permute predictions following indices
|
||||
batch_idx = paddle.concat(
|
||||
[paddle.full_like(src, i) for i, (src, _) in enumerate(indices)])
|
||||
src_idx = paddle.concat([src for (src, _) in indices])
|
||||
return batch_idx, src_idx
|
||||
|
||||
def _get_tgt_permutation_idx(self, indices):
|
||||
# permute targets following indices
|
||||
batch_idx = paddle.concat(
|
||||
[paddle.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
|
||||
tgt_idx = paddle.concat([tgt for (_, tgt) in indices])
|
||||
return batch_idx, tgt_idx
|
||||
|
||||
def get_loss(self, loss, outputs, targets, indices, num_boxes, **kwargs):
|
||||
loss_map = {
|
||||
'labels': self.loss_labels,
|
||||
'boxes': self.loss_boxes,
|
||||
}
|
||||
assert loss in loss_map, f'do you really want to compute {loss} loss?'
|
||||
return loss_map[loss](outputs, targets, indices, num_boxes, **kwargs)
|
||||
|
||||
def forward(self, outputs, targets):
|
||||
""" This performs the loss computation.
|
||||
Parameters:
|
||||
outputs: dict of tensors, see the output specification of the model for the format
|
||||
targets: list of dicts, such that len(targets) == batch_size.
|
||||
The expected keys in each dict depends on the losses applied, see each loss' doc
|
||||
"""
|
||||
outputs_without_aux = {
|
||||
k: v
|
||||
for k, v in outputs.items() if k != 'aux_outputs'
|
||||
}
|
||||
|
||||
# Retrieve the matching between the outputs of the last layer and the targets
|
||||
indices = self.matcher(outputs_without_aux, targets)
|
||||
|
||||
# Compute the average number of target boxes across all nodes, for normalization purposes
|
||||
num_boxes = sum(len(t["labels"]) for t in targets)
|
||||
num_boxes = paddle.to_tensor(
|
||||
[num_boxes],
|
||||
dtype="float32",
|
||||
place=next(iter(outputs.values())).place)
|
||||
|
||||
# Compute all the requested losses
|
||||
losses = {}
|
||||
for loss in self.losses:
|
||||
losses.update(
|
||||
self.get_loss(loss, outputs, targets, indices, num_boxes))
|
||||
|
||||
# In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
|
||||
if 'aux_outputs' in outputs:
|
||||
for i, aux_outputs in enumerate(outputs['aux_outputs']):
|
||||
indices = self.matcher(aux_outputs, targets)
|
||||
for loss in self.losses:
|
||||
kwargs = {}
|
||||
if loss == 'labels':
|
||||
# Logging is enabled only for the last layer
|
||||
kwargs = {'log': False}
|
||||
l_dict = self.get_loss(loss, aux_outputs, targets, indices,
|
||||
num_boxes, **kwargs)
|
||||
|
||||
w_dict = {}
|
||||
for k in l_dict.keys():
|
||||
if k in self.weight_dict:
|
||||
w_dict[k + f'_{i}'] = l_dict[k] * self.weight_dict[
|
||||
k]
|
||||
else:
|
||||
w_dict[k + f'_{i}'] = l_dict[k]
|
||||
losses.update(w_dict)
|
||||
|
||||
return losses
|
||||
|
||||
|
||||
class HungarianMatcher(nn.Layer):
|
||||
"""This class computes an assignment between the targets and the predictions of the network
|
||||
For efficiency reasons, the targets don't include the no_object. Because of this, in general,
|
||||
there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions,
|
||||
while the others are un-matched (and thus treated as non-objects).
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
focal_loss_alpha,
|
||||
focal_loss_gamma,
|
||||
cost_class: float=1,
|
||||
cost_bbox: float=1,
|
||||
cost_giou: float=1):
|
||||
"""Creates the matcher
|
||||
Params:
|
||||
cost_class: This is the relative weight of the classification error in the matching cost
|
||||
cost_bbox: This is the relative weight of the L1 error of the bounding box coordinates in the matching cost
|
||||
cost_giou: This is the relative weight of the giou loss of the bounding box in the matching cost
|
||||
"""
|
||||
super().__init__()
|
||||
self.cost_class = cost_class
|
||||
self.cost_bbox = cost_bbox
|
||||
self.cost_giou = cost_giou
|
||||
self.focal_loss_alpha = focal_loss_alpha
|
||||
self.focal_loss_gamma = focal_loss_gamma
|
||||
assert cost_class != 0 or cost_bbox != 0 or cost_giou != 0, "all costs cant be 0"
|
||||
|
||||
@paddle.no_grad()
|
||||
def forward(self, outputs, targets):
|
||||
""" Performs the matching
|
||||
Args:
|
||||
outputs: This is a dict that contains at least these entries:
|
||||
"pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
|
||||
"pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates
|
||||
eg. outputs = {"pred_logits": pred_logits, "pred_boxes": pred_boxes}
|
||||
targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
|
||||
"labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
|
||||
objects in the target) containing the class labels
|
||||
"boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates
|
||||
eg. targets = [{"labels":labels, "boxes": boxes}, ...,{"labels":labels, "boxes": boxes}]
|
||||
Returns:
|
||||
A list of size batch_size, containing tuples of (index_i, index_j) where:
|
||||
- index_i is the indices of the selected predictions (in order)
|
||||
- index_j is the indices of the corresponding selected targets (in order)
|
||||
For each batch element, it holds:
|
||||
len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
|
||||
"""
|
||||
bs, num_queries = outputs["pred_logits"].shape[:2]
|
||||
|
||||
if sum(len(v["labels"]) for v in targets) == 0:
|
||||
return [(paddle.to_tensor(
|
||||
[], dtype=paddle.int64), paddle.to_tensor(
|
||||
[], dtype=paddle.int64)) for _ in range(bs)]
|
||||
|
||||
# We flatten to compute the cost matrices in a batch
|
||||
out_prob = F.sigmoid(outputs["pred_logits"].flatten(
|
||||
start_axis=0, stop_axis=1))
|
||||
out_bbox = outputs["pred_boxes"].flatten(start_axis=0, stop_axis=1)
|
||||
|
||||
# Also concat the target labels and boxes
|
||||
tgt_ids = paddle.concat([v["labels"] for v in targets])
|
||||
assert (tgt_ids > -1).all()
|
||||
tgt_bbox = paddle.concat([v["boxes"] for v in targets])
|
||||
|
||||
# Compute the classification cost. Contrary to the loss, we don't use the NLL,
|
||||
# but approximate it in 1 - proba[target class].
|
||||
# The 1 is a constant that doesn't change the matching, it can be ommitted.
|
||||
|
||||
# Compute the classification cost.
|
||||
alpha = self.focal_loss_alpha
|
||||
gamma = self.focal_loss_gamma
|
||||
|
||||
neg_cost_class = (1 - alpha) * (out_prob**gamma) * (-(
|
||||
1 - out_prob + 1e-8).log())
|
||||
pos_cost_class = alpha * ((1 - out_prob)
|
||||
**gamma) * (-(out_prob + 1e-8).log())
|
||||
|
||||
cost_class = paddle.gather(
|
||||
pos_cost_class, tgt_ids, axis=1) - paddle.gather(
|
||||
neg_cost_class, tgt_ids, axis=1)
|
||||
|
||||
# Compute the L1 cost between boxes
|
||||
image_size_out = paddle.concat(
|
||||
[v["img_whwh"].unsqueeze(0) for v in targets])
|
||||
image_size_out = image_size_out.unsqueeze(1).tile(
|
||||
[1, num_queries, 1]).flatten(
|
||||
start_axis=0, stop_axis=1)
|
||||
image_size_tgt = paddle.concat([v["img_whwh_tgt"] for v in targets])
|
||||
|
||||
out_bbox_ = out_bbox / image_size_out
|
||||
tgt_bbox_ = tgt_bbox / image_size_tgt
|
||||
cost_bbox = F.l1_loss(
|
||||
out_bbox_.unsqueeze(-2), tgt_bbox_,
|
||||
reduction='none').sum(-1) # [batch_size * num_queries, num_tgts]
|
||||
|
||||
# Compute the giou cost betwen boxes
|
||||
cost_giou = -get_bboxes_giou(out_bbox, tgt_bbox)
|
||||
|
||||
# Final cost matrix
|
||||
C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou
|
||||
C = C.reshape([bs, num_queries, -1])
|
||||
|
||||
sizes = [len(v["boxes"]) for v in targets]
|
||||
|
||||
indices = [
|
||||
linear_sum_assignment(c[i].numpy())
|
||||
for i, c in enumerate(C.split(sizes, -1))
|
||||
]
|
||||
return [(paddle.to_tensor(
|
||||
i, dtype="int32"), paddle.to_tensor(
|
||||
j, dtype="int32")) for i, j in indices]
|
||||
|
||||
|
||||
def box_area(boxes):
|
||||
assert (boxes[:, 2:] >= boxes[:, :2]).all()
|
||||
wh = boxes[:, 2:] - boxes[:, :2]
|
||||
return wh[:, 0] * wh[:, 1]
|
||||
|
||||
|
||||
def boxes_iou(boxes1, boxes2):
|
||||
'''
|
||||
Compute iou
|
||||
|
||||
Args:
|
||||
boxes1 (paddle.tensor) shape (N, 4)
|
||||
boxes2 (paddle.tensor) shape (M, 4)
|
||||
|
||||
Return:
|
||||
(paddle.tensor) shape (N, M)
|
||||
'''
|
||||
area1 = box_area(boxes1)
|
||||
area2 = box_area(boxes2)
|
||||
|
||||
lt = paddle.maximum(boxes1.unsqueeze(-2)[:, :, :2], boxes2[:, :2])
|
||||
rb = paddle.minimum(boxes1.unsqueeze(-2)[:, :, 2:], boxes2[:, 2:])
|
||||
|
||||
wh = (rb - lt).astype("float32").clip(min=1e-9)
|
||||
inter = wh[:, :, 0] * wh[:, :, 1]
|
||||
|
||||
union = area1.unsqueeze(-1) + area2 - inter + 1e-9
|
||||
|
||||
iou = inter / union
|
||||
return iou, union
|
||||
|
||||
|
||||
def get_bboxes_giou(boxes1, boxes2, eps=1e-9):
|
||||
"""calculate the ious of boxes1 and boxes2
|
||||
|
||||
Args:
|
||||
boxes1 (Tensor): shape [N, 4]
|
||||
boxes2 (Tensor): shape [M, 4]
|
||||
eps (float): epsilon to avoid divide by zero
|
||||
|
||||
Return:
|
||||
ious (Tensor): ious of boxes1 and boxes2, with the shape [N, M]
|
||||
"""
|
||||
assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
|
||||
assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
|
||||
|
||||
iou, union = boxes_iou(boxes1, boxes2)
|
||||
|
||||
lt = paddle.minimum(boxes1.unsqueeze(-2)[:, :, :2], boxes2[:, :2])
|
||||
rb = paddle.maximum(boxes1.unsqueeze(-2)[:, :, 2:], boxes2[:, 2:])
|
||||
|
||||
wh = (rb - lt).astype("float32").clip(min=eps)
|
||||
enclose_area = wh[:, :, 0] * wh[:, :, 1]
|
||||
|
||||
giou = iou - (enclose_area - union) / enclose_area
|
||||
|
||||
return giou
|
||||
|
||||
|
||||
def sigmoid_focal_loss(inputs, targets, alpha, gamma, reduction="sum"):
|
||||
|
||||
assert reduction in ["sum", "mean"
|
||||
], f'do not support this {reduction} reduction?'
|
||||
|
||||
p = F.sigmoid(inputs)
|
||||
ce_loss = F.binary_cross_entropy_with_logits(
|
||||
inputs, targets, reduction="none")
|
||||
p_t = p * targets + (1 - p) * (1 - targets)
|
||||
loss = ce_loss * ((1 - p_t)**gamma)
|
||||
|
||||
if alpha >= 0:
|
||||
alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
|
||||
loss = alpha_t * loss
|
||||
|
||||
if reduction == "mean":
|
||||
loss = loss.mean()
|
||||
elif reduction == "sum":
|
||||
loss = loss.sum()
|
||||
|
||||
return loss
|
||||
168
paddle_detection/ppdet/modeling/losses/ssd_loss.py
Normal file
168
paddle_detection/ppdet/modeling/losses/ssd_loss.py
Normal file
@@ -0,0 +1,168 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from ppdet.core.workspace import register
|
||||
from ..bbox_utils import iou_similarity, bbox2delta
|
||||
|
||||
__all__ = ['SSDLoss']
|
||||
|
||||
|
||||
@register
|
||||
class SSDLoss(nn.Layer):
|
||||
"""
|
||||
SSDLoss
|
||||
|
||||
Args:
|
||||
overlap_threshold (float32, optional): IoU threshold for negative bboxes
|
||||
and positive bboxes, 0.5 by default.
|
||||
neg_pos_ratio (float): The ratio of negative samples / positive samples.
|
||||
loc_loss_weight (float): The weight of loc_loss.
|
||||
conf_loss_weight (float): The weight of conf_loss.
|
||||
prior_box_var (list): Variances corresponding to prior box coord, [0.1,
|
||||
0.1, 0.2, 0.2] by default.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
overlap_threshold=0.5,
|
||||
neg_pos_ratio=3.0,
|
||||
loc_loss_weight=1.0,
|
||||
conf_loss_weight=1.0,
|
||||
prior_box_var=[0.1, 0.1, 0.2, 0.2]):
|
||||
super(SSDLoss, self).__init__()
|
||||
self.overlap_threshold = overlap_threshold
|
||||
self.neg_pos_ratio = neg_pos_ratio
|
||||
self.loc_loss_weight = loc_loss_weight
|
||||
self.conf_loss_weight = conf_loss_weight
|
||||
self.prior_box_var = [1. / a for a in prior_box_var]
|
||||
|
||||
def _bipartite_match_for_batch(self, gt_bbox, gt_label, prior_boxes,
|
||||
bg_index):
|
||||
"""
|
||||
Args:
|
||||
gt_bbox (Tensor): [B, N, 4]
|
||||
gt_label (Tensor): [B, N, 1]
|
||||
prior_boxes (Tensor): [A, 4]
|
||||
bg_index (int): Background class index
|
||||
"""
|
||||
batch_size, num_priors = gt_bbox.shape[0], prior_boxes.shape[0]
|
||||
ious = iou_similarity(gt_bbox.reshape((-1, 4)), prior_boxes).reshape(
|
||||
(batch_size, -1, num_priors))
|
||||
|
||||
# For each prior box, get the max IoU of all GTs.
|
||||
prior_max_iou, prior_argmax_iou = ious.max(axis=1), ious.argmax(axis=1)
|
||||
# For each GT, get the max IoU of all prior boxes.
|
||||
gt_max_iou, gt_argmax_iou = ious.max(axis=2), ious.argmax(axis=2)
|
||||
|
||||
# Gather target bbox and label according to 'prior_argmax_iou' index.
|
||||
batch_ind = paddle.arange(end=batch_size, dtype='int64').unsqueeze(-1)
|
||||
prior_argmax_iou = paddle.stack(
|
||||
[batch_ind.tile([1, num_priors]), prior_argmax_iou], axis=-1)
|
||||
targets_bbox = paddle.gather_nd(gt_bbox, prior_argmax_iou)
|
||||
targets_label = paddle.gather_nd(gt_label, prior_argmax_iou)
|
||||
# Assign negative
|
||||
bg_index_tensor = paddle.full([batch_size, num_priors, 1], bg_index,
|
||||
'int64')
|
||||
targets_label = paddle.where(
|
||||
prior_max_iou.unsqueeze(-1) < self.overlap_threshold,
|
||||
bg_index_tensor, targets_label)
|
||||
|
||||
# Ensure each GT can match the max IoU prior box.
|
||||
batch_ind = (batch_ind * num_priors + gt_argmax_iou).flatten()
|
||||
targets_bbox = paddle.scatter(
|
||||
targets_bbox.reshape([-1, 4]), batch_ind,
|
||||
gt_bbox.reshape([-1, 4])).reshape([batch_size, -1, 4])
|
||||
targets_label = paddle.scatter(
|
||||
targets_label.reshape([-1, 1]), batch_ind,
|
||||
gt_label.reshape([-1, 1])).reshape([batch_size, -1, 1])
|
||||
targets_label[:, :1] = bg_index
|
||||
|
||||
# Encode box
|
||||
prior_boxes = prior_boxes.unsqueeze(0).tile([batch_size, 1, 1])
|
||||
targets_bbox = bbox2delta(
|
||||
prior_boxes.reshape([-1, 4]),
|
||||
targets_bbox.reshape([-1, 4]), self.prior_box_var)
|
||||
targets_bbox = targets_bbox.reshape([batch_size, -1, 4])
|
||||
|
||||
return targets_bbox, targets_label
|
||||
|
||||
def _mine_hard_example(self,
|
||||
conf_loss,
|
||||
targets_label,
|
||||
bg_index,
|
||||
mine_neg_ratio=0.01):
|
||||
pos = (targets_label != bg_index).astype(conf_loss.dtype)
|
||||
num_pos = pos.sum(axis=1, keepdim=True)
|
||||
neg = (targets_label == bg_index).astype(conf_loss.dtype)
|
||||
|
||||
conf_loss = conf_loss.detach() * neg
|
||||
loss_idx = conf_loss.argsort(axis=1, descending=True)
|
||||
idx_rank = loss_idx.argsort(axis=1)
|
||||
num_negs = []
|
||||
for i in range(conf_loss.shape[0]):
|
||||
cur_num_pos = num_pos[i]
|
||||
num_neg = paddle.clip(
|
||||
cur_num_pos * self.neg_pos_ratio, max=pos.shape[1])
|
||||
num_neg = num_neg if num_neg > 0 else paddle.to_tensor(
|
||||
[pos.shape[1] * mine_neg_ratio])
|
||||
num_negs.append(num_neg)
|
||||
num_negs = paddle.stack(num_negs).expand_as(idx_rank)
|
||||
neg_mask = (idx_rank < num_negs).astype(conf_loss.dtype)
|
||||
|
||||
return (neg_mask + pos).astype('bool')
|
||||
|
||||
def forward(self, boxes, scores, gt_bbox, gt_label, prior_boxes):
|
||||
boxes = paddle.concat(boxes, axis=1)
|
||||
scores = paddle.concat(scores, axis=1)
|
||||
gt_label = gt_label.unsqueeze(-1).astype('int64')
|
||||
prior_boxes = paddle.concat(prior_boxes, axis=0)
|
||||
bg_index = scores.shape[-1] - 1
|
||||
|
||||
# Match bbox and get targets.
|
||||
targets_bbox, targets_label = \
|
||||
self._bipartite_match_for_batch(gt_bbox, gt_label, prior_boxes, bg_index)
|
||||
targets_bbox.stop_gradient = True
|
||||
targets_label.stop_gradient = True
|
||||
|
||||
# Compute regression loss.
|
||||
# Select positive samples.
|
||||
bbox_mask = paddle.tile(targets_label != bg_index, [1, 1, 4])
|
||||
if bbox_mask.astype(boxes.dtype).sum() > 0:
|
||||
location = paddle.masked_select(boxes, bbox_mask)
|
||||
targets_bbox = paddle.masked_select(targets_bbox, bbox_mask)
|
||||
loc_loss = F.smooth_l1_loss(location, targets_bbox, reduction='sum')
|
||||
loc_loss = loc_loss * self.loc_loss_weight
|
||||
else:
|
||||
loc_loss = paddle.zeros([1])
|
||||
|
||||
# Compute confidence loss.
|
||||
conf_loss = F.cross_entropy(scores, targets_label, reduction="none")
|
||||
# Mining hard examples.
|
||||
label_mask = self._mine_hard_example(
|
||||
conf_loss.squeeze(-1), targets_label.squeeze(-1), bg_index)
|
||||
conf_loss = paddle.masked_select(conf_loss, label_mask.unsqueeze(-1))
|
||||
conf_loss = conf_loss.sum() * self.conf_loss_weight
|
||||
|
||||
# Compute overall weighted loss.
|
||||
normalizer = (targets_label != bg_index).astype('float32').sum().clip(
|
||||
min=1)
|
||||
loss = (conf_loss + loc_loss) / normalizer
|
||||
|
||||
return loss
|
||||
83
paddle_detection/ppdet/modeling/losses/supcontrast.py
Normal file
83
paddle_detection/ppdet/modeling/losses/supcontrast.py
Normal file
@@ -0,0 +1,83 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
|
||||
import random
|
||||
from ppdet.core.workspace import register
|
||||
|
||||
|
||||
__all__ = ['SupContrast']
|
||||
|
||||
|
||||
@register
|
||||
class SupContrast(nn.Layer):
|
||||
__shared__ = [
|
||||
'num_classes'
|
||||
]
|
||||
def __init__(self, num_classes=80, temperature=2.5, sample_num=4096, thresh=0.75):
|
||||
super(SupContrast, self).__init__()
|
||||
self.num_classes = num_classes
|
||||
self.temperature = temperature
|
||||
self.sample_num = sample_num
|
||||
self.thresh = thresh
|
||||
def forward(self, features, labels, scores):
|
||||
|
||||
assert features.shape[0] == labels.shape[0] == scores.shape[0]
|
||||
positive_mask = (labels < self.num_classes)
|
||||
positive_features, positive_labels, positive_scores = features[positive_mask], labels[positive_mask], \
|
||||
scores[positive_mask]
|
||||
|
||||
negative_mask = (labels == self.num_classes)
|
||||
negative_features, negative_labels, negative_scores = features[negative_mask], labels[negative_mask], \
|
||||
scores[negative_mask]
|
||||
|
||||
N = negative_features.shape[0]
|
||||
S = self.sample_num - positive_mask.sum()
|
||||
index = paddle.to_tensor(random.sample(range(N), int(S)), dtype='int32')
|
||||
|
||||
negative_features = paddle.index_select(x=negative_features, index=index, axis=0)
|
||||
negative_labels = paddle.index_select(x=negative_labels, index=index, axis=0)
|
||||
negative_scores = paddle.index_select(x=negative_scores, index=index, axis=0)
|
||||
|
||||
features = paddle.concat([positive_features, negative_features], 0)
|
||||
labels = paddle.concat([positive_labels, negative_labels], 0)
|
||||
scores = paddle.concat([positive_scores, negative_scores], 0)
|
||||
|
||||
if len(labels.shape) == 1:
|
||||
labels = labels.reshape([-1, 1])
|
||||
label_mask = paddle.equal(labels, labels.T).detach()
|
||||
similarity = (paddle.matmul(features, features.T) / self.temperature)
|
||||
|
||||
sim_row_max = paddle.max(similarity, axis=1, keepdim=True)
|
||||
similarity = similarity - sim_row_max
|
||||
|
||||
logits_mask = paddle.ones_like(similarity).detach()
|
||||
logits_mask.fill_diagonal_(0)
|
||||
|
||||
exp_sim = paddle.exp(similarity) * logits_mask
|
||||
log_prob = similarity - paddle.log(exp_sim.sum(axis=1, keepdim=True))
|
||||
|
||||
per_label_log_prob = (log_prob * logits_mask * label_mask).sum(1) / label_mask.sum(1)
|
||||
keep = scores > self.thresh
|
||||
per_label_log_prob = per_label_log_prob[keep]
|
||||
loss = -per_label_log_prob
|
||||
|
||||
return loss.mean()
|
||||
152
paddle_detection/ppdet/modeling/losses/varifocal_loss.py
Normal file
152
paddle_detection/ppdet/modeling/losses/varifocal_loss.py
Normal file
@@ -0,0 +1,152 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# The code is based on:
|
||||
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/losses/varifocal_loss.py
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from ppdet.modeling import ops
|
||||
|
||||
__all__ = ['VarifocalLoss']
|
||||
|
||||
|
||||
def varifocal_loss(pred,
|
||||
target,
|
||||
alpha=0.75,
|
||||
gamma=2.0,
|
||||
iou_weighted=True,
|
||||
use_sigmoid=True):
|
||||
"""`Varifocal Loss <https://arxiv.org/abs/2008.13367>`_
|
||||
|
||||
Args:
|
||||
pred (Tensor): The prediction with shape (N, C), C is the
|
||||
number of classes
|
||||
target (Tensor): The learning target of the iou-aware
|
||||
classification score with shape (N, C), C is the number of classes.
|
||||
alpha (float, optional): A balance factor for the negative part of
|
||||
Varifocal Loss, which is different from the alpha of Focal Loss.
|
||||
Defaults to 0.75.
|
||||
gamma (float, optional): The gamma for calculating the modulating
|
||||
factor. Defaults to 2.0.
|
||||
iou_weighted (bool, optional): Whether to weight the loss of the
|
||||
positive example with the iou target. Defaults to True.
|
||||
"""
|
||||
# pred and target should be of the same size
|
||||
assert pred.shape == target.shape
|
||||
if use_sigmoid:
|
||||
pred_new = F.sigmoid(pred)
|
||||
else:
|
||||
pred_new = pred
|
||||
target = target.cast(pred.dtype)
|
||||
if iou_weighted:
|
||||
focal_weight = target * (target > 0.0).cast('float32') + \
|
||||
alpha * (pred_new - target).abs().pow(gamma) * \
|
||||
(target <= 0.0).cast('float32')
|
||||
else:
|
||||
focal_weight = (target > 0.0).cast('float32') + \
|
||||
alpha * (pred_new - target).abs().pow(gamma) * \
|
||||
(target <= 0.0).cast('float32')
|
||||
|
||||
if use_sigmoid:
|
||||
loss = F.binary_cross_entropy_with_logits(
|
||||
pred, target, reduction='none') * focal_weight
|
||||
else:
|
||||
loss = F.binary_cross_entropy(
|
||||
pred, target, reduction='none') * focal_weight
|
||||
loss = loss.sum(axis=1)
|
||||
return loss
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class VarifocalLoss(nn.Layer):
|
||||
def __init__(self,
|
||||
use_sigmoid=True,
|
||||
alpha=0.75,
|
||||
gamma=2.0,
|
||||
iou_weighted=True,
|
||||
reduction='mean',
|
||||
loss_weight=1.0):
|
||||
"""`Varifocal Loss <https://arxiv.org/abs/2008.13367>`_
|
||||
|
||||
Args:
|
||||
use_sigmoid (bool, optional): Whether the prediction is
|
||||
used for sigmoid or softmax. Defaults to True.
|
||||
alpha (float, optional): A balance factor for the negative part of
|
||||
Varifocal Loss, which is different from the alpha of Focal
|
||||
Loss. Defaults to 0.75.
|
||||
gamma (float, optional): The gamma for calculating the modulating
|
||||
factor. Defaults to 2.0.
|
||||
iou_weighted (bool, optional): Whether to weight the loss of the
|
||||
positive examples with the iou target. Defaults to True.
|
||||
reduction (str, optional): The method used to reduce the loss into
|
||||
a scalar. Defaults to 'mean'. Options are "none", "mean" and
|
||||
"sum".
|
||||
loss_weight (float, optional): Weight of loss. Defaults to 1.0.
|
||||
"""
|
||||
super(VarifocalLoss, self).__init__()
|
||||
assert alpha >= 0.0
|
||||
self.use_sigmoid = use_sigmoid
|
||||
self.alpha = alpha
|
||||
self.gamma = gamma
|
||||
self.iou_weighted = iou_weighted
|
||||
self.reduction = reduction
|
||||
self.loss_weight = loss_weight
|
||||
|
||||
def forward(self, pred, target, weight=None, avg_factor=None):
|
||||
"""Forward function.
|
||||
|
||||
Args:
|
||||
pred (Tensor): The prediction.
|
||||
target (Tensor): The learning target of the prediction.
|
||||
weight (Tensor, optional): The weight of loss for each
|
||||
prediction. Defaults to None.
|
||||
avg_factor (int, optional): Average factor that is used to average
|
||||
the loss. Defaults to None.
|
||||
Returns:
|
||||
Tensor: The calculated loss
|
||||
"""
|
||||
loss = self.loss_weight * varifocal_loss(
|
||||
pred,
|
||||
target,
|
||||
alpha=self.alpha,
|
||||
gamma=self.gamma,
|
||||
iou_weighted=self.iou_weighted,
|
||||
use_sigmoid=self.use_sigmoid)
|
||||
|
||||
if weight is not None:
|
||||
loss = loss * weight
|
||||
if avg_factor is None:
|
||||
if self.reduction == 'none':
|
||||
return loss
|
||||
elif self.reduction == 'mean':
|
||||
return loss.mean()
|
||||
elif self.reduction == 'sum':
|
||||
return loss.sum()
|
||||
else:
|
||||
# if reduction is mean, then average the loss by avg_factor
|
||||
if self.reduction == 'mean':
|
||||
loss = loss.sum() / avg_factor
|
||||
# if reduction is 'none', then do nothing, otherwise raise an error
|
||||
elif self.reduction != 'none':
|
||||
raise ValueError(
|
||||
'avg_factor can not be used with reduction="sum"')
|
||||
return loss
|
||||
207
paddle_detection/ppdet/modeling/losses/yolo_loss.py
Normal file
207
paddle_detection/ppdet/modeling/losses/yolo_loss.py
Normal file
@@ -0,0 +1,207 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from ppdet.core.workspace import register
|
||||
|
||||
from ..bbox_utils import decode_yolo, xywh2xyxy, batch_iou_similarity
|
||||
|
||||
__all__ = ['YOLOv3Loss']
|
||||
|
||||
|
||||
def bbox_transform(pbox, anchor, downsample):
|
||||
pbox = decode_yolo(pbox, anchor, downsample)
|
||||
pbox = xywh2xyxy(pbox)
|
||||
return pbox
|
||||
|
||||
|
||||
@register
|
||||
class YOLOv3Loss(nn.Layer):
|
||||
|
||||
__inject__ = ['iou_loss', 'iou_aware_loss']
|
||||
__shared__ = ['num_classes']
|
||||
|
||||
def __init__(self,
|
||||
num_classes=80,
|
||||
ignore_thresh=0.7,
|
||||
label_smooth=False,
|
||||
downsample=[32, 16, 8],
|
||||
scale_x_y=1.,
|
||||
iou_loss=None,
|
||||
iou_aware_loss=None):
|
||||
"""
|
||||
YOLOv3Loss layer
|
||||
|
||||
Args:
|
||||
num_calsses (int): number of foreground classes
|
||||
ignore_thresh (float): threshold to ignore confidence loss
|
||||
label_smooth (bool): whether to use label smoothing
|
||||
downsample (list): downsample ratio for each detection block
|
||||
scale_x_y (float): scale_x_y factor
|
||||
iou_loss (object): IoULoss instance
|
||||
iou_aware_loss (object): IouAwareLoss instance
|
||||
"""
|
||||
super(YOLOv3Loss, self).__init__()
|
||||
self.num_classes = num_classes
|
||||
self.ignore_thresh = ignore_thresh
|
||||
self.label_smooth = label_smooth
|
||||
self.downsample = downsample
|
||||
self.scale_x_y = scale_x_y
|
||||
self.iou_loss = iou_loss
|
||||
self.iou_aware_loss = iou_aware_loss
|
||||
self.distill_pairs = []
|
||||
|
||||
def obj_loss(self, pbox, gbox, pobj, tobj, anchor, downsample):
|
||||
# pbox
|
||||
pbox = decode_yolo(pbox, anchor, downsample)
|
||||
pbox = xywh2xyxy(pbox)
|
||||
pbox = paddle.concat(pbox, axis=-1)
|
||||
b = pbox.shape[0]
|
||||
pbox = pbox.reshape((b, -1, 4))
|
||||
# gbox
|
||||
gxy = gbox[:, :, 0:2] - gbox[:, :, 2:4] * 0.5
|
||||
gwh = gbox[:, :, 0:2] + gbox[:, :, 2:4] * 0.5
|
||||
gbox = paddle.concat([gxy, gwh], axis=-1)
|
||||
|
||||
iou = batch_iou_similarity(pbox, gbox)
|
||||
iou.stop_gradient = True
|
||||
iou_max = iou.max(2) # [N, M1]
|
||||
iou_mask = paddle.cast(iou_max <= self.ignore_thresh, dtype=pbox.dtype)
|
||||
iou_mask.stop_gradient = True
|
||||
|
||||
pobj = pobj.reshape((b, -1))
|
||||
tobj = tobj.reshape((b, -1))
|
||||
obj_mask = paddle.cast(tobj > 0, dtype=pbox.dtype)
|
||||
obj_mask.stop_gradient = True
|
||||
|
||||
loss_obj = F.binary_cross_entropy_with_logits(
|
||||
pobj, obj_mask, reduction='none')
|
||||
loss_obj_pos = (loss_obj * tobj)
|
||||
loss_obj_neg = (loss_obj * (1 - obj_mask) * iou_mask)
|
||||
return loss_obj_pos + loss_obj_neg
|
||||
|
||||
def cls_loss(self, pcls, tcls):
|
||||
if self.label_smooth:
|
||||
delta = min(1. / self.num_classes, 1. / 40)
|
||||
pos, neg = 1 - delta, delta
|
||||
# 1 for positive, 0 for negative
|
||||
tcls = pos * paddle.cast(
|
||||
tcls > 0., dtype=tcls.dtype) + neg * paddle.cast(
|
||||
tcls <= 0., dtype=tcls.dtype)
|
||||
|
||||
loss_cls = F.binary_cross_entropy_with_logits(
|
||||
pcls, tcls, reduction='none')
|
||||
return loss_cls
|
||||
|
||||
def yolov3_loss(self, p, t, gt_box, anchor, downsample, scale=1.,
|
||||
eps=1e-10):
|
||||
na = len(anchor)
|
||||
b, c, h, w = p.shape
|
||||
if self.iou_aware_loss:
|
||||
ioup, p = p[:, 0:na, :, :], p[:, na:, :, :]
|
||||
ioup = ioup.unsqueeze(-1)
|
||||
p = p.reshape((b, na, -1, h, w)).transpose((0, 1, 3, 4, 2))
|
||||
x, y = p[:, :, :, :, 0:1], p[:, :, :, :, 1:2]
|
||||
w, h = p[:, :, :, :, 2:3], p[:, :, :, :, 3:4]
|
||||
obj, pcls = p[:, :, :, :, 4:5], p[:, :, :, :, 5:]
|
||||
self.distill_pairs.append([x, y, w, h, obj, pcls])
|
||||
|
||||
t = t.transpose((0, 1, 3, 4, 2))
|
||||
tx, ty = t[:, :, :, :, 0:1], t[:, :, :, :, 1:2]
|
||||
tw, th = t[:, :, :, :, 2:3], t[:, :, :, :, 3:4]
|
||||
tscale = t[:, :, :, :, 4:5]
|
||||
tobj, tcls = t[:, :, :, :, 5:6], t[:, :, :, :, 6:]
|
||||
|
||||
tscale_obj = tscale * tobj
|
||||
loss = dict()
|
||||
|
||||
x = scale * F.sigmoid(x) - 0.5 * (scale - 1.)
|
||||
y = scale * F.sigmoid(y) - 0.5 * (scale - 1.)
|
||||
|
||||
if abs(scale - 1.) < eps:
|
||||
loss_x = F.binary_cross_entropy(x, tx, reduction='none')
|
||||
loss_y = F.binary_cross_entropy(y, ty, reduction='none')
|
||||
loss_xy = tscale_obj * (loss_x + loss_y)
|
||||
else:
|
||||
loss_x = paddle.abs(x - tx)
|
||||
loss_y = paddle.abs(y - ty)
|
||||
loss_xy = tscale_obj * (loss_x + loss_y)
|
||||
|
||||
loss_xy = loss_xy.sum([1, 2, 3, 4]).mean()
|
||||
|
||||
loss_w = paddle.abs(w - tw)
|
||||
loss_h = paddle.abs(h - th)
|
||||
loss_wh = tscale_obj * (loss_w + loss_h)
|
||||
loss_wh = loss_wh.sum([1, 2, 3, 4]).mean()
|
||||
|
||||
loss['loss_xy'] = loss_xy
|
||||
loss['loss_wh'] = loss_wh
|
||||
|
||||
if self.iou_loss is not None:
|
||||
# warn: do not modify x, y, w, h in place
|
||||
box, tbox = [x, y, w, h], [tx, ty, tw, th]
|
||||
pbox = bbox_transform(box, anchor, downsample)
|
||||
gbox = bbox_transform(tbox, anchor, downsample)
|
||||
loss_iou = self.iou_loss(pbox, gbox)
|
||||
loss_iou = loss_iou * tscale_obj
|
||||
loss_iou = loss_iou.sum([1, 2, 3, 4]).mean()
|
||||
loss['loss_iou'] = loss_iou
|
||||
|
||||
if self.iou_aware_loss is not None:
|
||||
box, tbox = [x, y, w, h], [tx, ty, tw, th]
|
||||
pbox = bbox_transform(box, anchor, downsample)
|
||||
gbox = bbox_transform(tbox, anchor, downsample)
|
||||
loss_iou_aware = self.iou_aware_loss(ioup, pbox, gbox)
|
||||
loss_iou_aware = loss_iou_aware * tobj
|
||||
loss_iou_aware = loss_iou_aware.sum([1, 2, 3, 4]).mean()
|
||||
loss['loss_iou_aware'] = loss_iou_aware
|
||||
|
||||
box = [x, y, w, h]
|
||||
loss_obj = self.obj_loss(box, gt_box, obj, tobj, anchor, downsample)
|
||||
loss_obj = loss_obj.sum(-1).mean()
|
||||
loss['loss_obj'] = loss_obj
|
||||
loss_cls = self.cls_loss(pcls, tcls) * tobj
|
||||
loss_cls = loss_cls.sum([1, 2, 3, 4]).mean()
|
||||
loss['loss_cls'] = loss_cls
|
||||
return loss
|
||||
|
||||
def forward(self, inputs, targets, anchors):
|
||||
np = len(inputs)
|
||||
gt_targets = [targets['target{}'.format(i)] for i in range(np)]
|
||||
gt_box = targets['gt_bbox']
|
||||
yolo_losses = dict()
|
||||
self.distill_pairs.clear()
|
||||
for x, t, anchor, downsample in zip(inputs, gt_targets, anchors,
|
||||
self.downsample):
|
||||
yolo_loss = self.yolov3_loss(
|
||||
x.astype('float32'), t, gt_box, anchor, downsample,
|
||||
self.scale_x_y)
|
||||
for k, v in yolo_loss.items():
|
||||
if k in yolo_losses:
|
||||
yolo_losses[k] += v
|
||||
else:
|
||||
yolo_losses[k] = v
|
||||
|
||||
loss = 0
|
||||
for k, v in yolo_losses.items():
|
||||
loss += v
|
||||
|
||||
yolo_losses['loss'] = loss
|
||||
return yolo_losses
|
||||
Reference in New Issue
Block a user