更换文档检测模型

This commit is contained in:
2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions

View File

@@ -0,0 +1,58 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import yolo_loss
from . import iou_aware_loss
from . import iou_loss
from . import ssd_loss
from . import fcos_loss
from . import solov2_loss
from . import ctfocal_loss
from . import keypoint_loss
from . import jde_loss
from . import fairmot_loss
from . import gfocal_loss
from . import detr_loss
from . import sparsercnn_loss
from . import focal_loss
from . import smooth_l1_loss
from . import probiou_loss
from . import cot_loss
from . import supcontrast
from . import queryinst_loss
from . import clrnet_loss
from . import clrnet_line_iou_loss
from .yolo_loss import *
from .iou_aware_loss import *
from .iou_loss import *
from .ssd_loss import *
from .fcos_loss import *
from .solov2_loss import *
from .ctfocal_loss import *
from .keypoint_loss import *
from .jde_loss import *
from .fairmot_loss import *
from .gfocal_loss import *
from .detr_loss import *
from .sparsercnn_loss import *
from .focal_loss import *
from .smooth_l1_loss import *
from .pose3d_loss import *
from .probiou_loss import *
from .cot_loss import *
from .supcontrast import *
from .queryinst_loss import *
from .clrnet_loss import *
from .clrnet_line_iou_loss import *

View File

@@ -0,0 +1,41 @@
import paddle
def line_iou(pred, target, img_w, length=15, aligned=True):
'''
Calculate the line iou value between predictions and targets
Args:
pred: lane predictions, shape: (num_pred, 72)
target: ground truth, shape: (num_target, 72)
img_w: image width
length: extended radius
aligned: True for iou loss calculation, False for pair-wise ious in assign
'''
px1 = pred - length
px2 = pred + length
tx1 = target - length
tx2 = target + length
if aligned:
invalid_mask = target
ovr = paddle.minimum(px2, tx2) - paddle.maximum(px1, tx1)
union = paddle.maximum(px2, tx2) - paddle.minimum(px1, tx1)
else:
num_pred = pred.shape[0]
invalid_mask = target.tile([num_pred, 1, 1])
ovr = (paddle.minimum(px2[:, None, :], tx2[None, ...]) - paddle.maximum(
px1[:, None, :], tx1[None, ...]))
union = (paddle.maximum(px2[:, None, :], tx2[None, ...]) -
paddle.minimum(px1[:, None, :], tx1[None, ...]))
invalid_masks = (invalid_mask < 0) | (invalid_mask >= img_w)
ovr[invalid_masks] = 0.
union[invalid_masks] = 0.
iou = ovr.sum(axis=-1) / (union.sum(axis=-1) + 1e-9)
return iou
def liou_loss(pred, target, img_w, length=15):
return (1 - line_iou(pred, target, img_w, length)).mean()

View File

@@ -0,0 +1,283 @@
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register
from ppdet.modeling.clrnet_utils import accuracy
from ppdet.modeling.assigners.clrnet_assigner import assign
from ppdet.modeling.losses.clrnet_line_iou_loss import liou_loss
__all__ = ['CLRNetLoss']
class SoftmaxFocalLoss(nn.Layer):
def __init__(self, gamma, ignore_lb=255, *args, **kwargs):
super(SoftmaxFocalLoss, self).__init__()
self.gamma = gamma
self.nll = nn.NLLLoss(ignore_index=ignore_lb)
def forward(self, logits, labels):
scores = F.softmax(logits, dim=1)
factor = paddle.pow(1. - scores, self.gamma)
log_score = F.log_softmax(logits, dim=1)
log_score = factor * log_score
loss = self.nll(log_score, labels)
return loss
def focal_loss(input: paddle.Tensor,
target: paddle.Tensor,
alpha: float,
gamma: float=2.0,
reduction: str='none',
eps: float=1e-8) -> paddle.Tensor:
r"""Function that computes Focal loss.
See :class:`~kornia.losses.FocalLoss` for details.
"""
if not paddle.is_tensor(input):
raise TypeError("Input type is not a torch.Tensor. Got {}".format(
type(input)))
if not len(input.shape) >= 2:
raise ValueError("Invalid input shape, we expect BxCx*. Got: {}".format(
input.shape))
if input.shape[0] != target.shape[0]:
raise ValueError(
'Expected input batch_size ({}) to match target batch_size ({}).'.
format(input.shape[0], target.shape[0]))
n = input.shape[0]
out_size = (n, ) + tuple(input.shape[2:])
if target.shape[1:] != input.shape[2:]:
raise ValueError('Expected target size {}, got {}'.format(out_size,
target.shape))
if (isinstance(input.place, paddle.CUDAPlace) and
isinstance(target.place, paddle.CPUPlace)) | (isinstance(
input.place, paddle.CPUPlace) and isinstance(target.place,
paddle.CUDAPlace)):
raise ValueError(
"input and target must be in the same device. Got: {} and {}".
format(input.place, target.place))
# compute softmax over the classes axis
input_soft: paddle.Tensor = F.softmax(input, axis=1) + eps
# create the labels one hot tensor
target_one_hot: paddle.Tensor = paddle.to_tensor(
F.one_hot(
target, num_classes=input.shape[1]).cast(input.dtype),
place=input.place)
# compute the actual focal loss
weight = paddle.pow(-input_soft + 1., gamma)
focal = -alpha * weight * paddle.log(input_soft)
loss_tmp = paddle.sum(target_one_hot * focal, axis=1)
if reduction == 'none':
loss = loss_tmp
elif reduction == 'mean':
loss = paddle.mean(loss_tmp)
elif reduction == 'sum':
loss = paddle.sum(loss_tmp)
else:
raise NotImplementedError("Invalid reduction mode: {}".format(
reduction))
return loss
class FocalLoss(nn.Layer):
r"""Criterion that computes Focal loss.
According to [1], the Focal loss is computed as follows:
.. math::
\text{FL}(p_t) = -\alpha_t (1 - p_t)^{\gamma} \, \text{log}(p_t)
where:
- :math:`p_t` is the model's estimated probability for each class.
Arguments:
alpha (float): Weighting factor :math:`\alpha \in [0, 1]`.
gamma (float): Focusing parameter :math:`\gamma >= 0`.
reduction (str, optional): Specifies the reduction to apply to the
output: none | mean | sum. none: no reduction will be applied,
mean: the sum of the output will be divided by the number of elements
in the output, sum: the output will be summed. Default: none.
Shape:
- Input: :math:`(N, C, *)` where C = number of classes.
- Target: :math:`(N, *)` where each value is
:math:`0 ≤ targets[i] ≤ C1`.
Examples:
>>> N = 5 # num_classes
>>> kwargs = {"alpha": 0.5, "gamma": 2.0, "reduction": 'mean'}
>>> loss = kornia.losses.FocalLoss(**kwargs)
>>> input = torch.randn(1, N, 3, 5, requires_grad=True)
>>> target = torch.empty(1, 3, 5, dtype=torch.long).random_(N)
>>> output = loss(input, target)
>>> output.backward()
References:
[1] https://arxiv.org/abs/1708.02002
"""
def __init__(self, alpha: float, gamma: float=2.0,
reduction: str='none') -> None:
super(FocalLoss, self).__init__()
self.alpha: float = alpha
self.gamma: float = gamma
self.reduction: str = reduction
self.eps: float = 1e-6
def forward( # type: ignore
self, input: paddle.Tensor, target: paddle.Tensor) -> paddle.Tensor:
return focal_loss(input, target, self.alpha, self.gamma, self.reduction,
self.eps)
@register
class CLRNetLoss(nn.Layer):
__shared__ = ['img_w', 'img_h', 'num_classes', 'num_points']
def __init__(self,
cls_loss_weight=2.0,
xyt_loss_weight=0.2,
iou_loss_weight=2.0,
seg_loss_weight=1.0,
refine_layers=3,
num_points=72,
img_w=800,
img_h=320,
num_classes=5,
ignore_label=255,
bg_weight=0.4):
super(CLRNetLoss, self).__init__()
self.cls_loss_weight = cls_loss_weight
self.xyt_loss_weight = xyt_loss_weight
self.iou_loss_weight = iou_loss_weight
self.seg_loss_weight = seg_loss_weight
self.refine_layers = refine_layers
self.img_w = img_w
self.img_h = img_h
self.n_strips = num_points - 1
self.num_classes = num_classes
self.ignore_label = ignore_label
weights = paddle.ones(shape=[self.num_classes])
weights[0] = bg_weight
self.criterion = nn.NLLLoss(
ignore_index=self.ignore_label, weight=weights)
def forward(self, output, batch):
predictions_lists = output['predictions_lists']
targets = batch['lane_line'].clone()
cls_criterion = FocalLoss(alpha=0.25, gamma=2.0)
cls_loss = paddle.to_tensor(0.0)
reg_xytl_loss = paddle.to_tensor(0.0)
iou_loss = paddle.to_tensor(0.0)
cls_acc = []
cls_acc_stage = []
for stage in range(self.refine_layers):
predictions_list = predictions_lists[stage]
for predictions, target in zip(predictions_list, targets):
target = target[target[:, 1] == 1]
if len(target) == 0:
# If there are no targets, all predictions have to be negatives (i.e., 0 confidence)
cls_target = paddle.zeros(
[predictions.shape[0]], dtype='int64')
cls_pred = predictions[:, :2]
cls_loss = cls_loss + cls_criterion(cls_pred,
cls_target).sum()
continue
with paddle.no_grad():
matched_row_inds, matched_col_inds = assign(
predictions, target, self.img_w, self.img_h)
# classification targets
cls_target = paddle.zeros([predictions.shape[0]], dtype='int64')
cls_target[matched_row_inds] = 1
cls_pred = predictions[:, :2]
# regression targets -> [start_y, start_x, theta] (all transformed to absolute values), only on matched pairs
reg_yxtl = predictions.index_select(matched_row_inds)[..., 2:6]
reg_yxtl[:, 0] *= self.n_strips
reg_yxtl[:, 1] *= (self.img_w - 1)
reg_yxtl[:, 2] *= 180
reg_yxtl[:, 3] *= self.n_strips
target_yxtl = target.index_select(matched_col_inds)[..., 2:
6].clone()
# regression targets -> S coordinates (all transformed to absolute values)
reg_pred = predictions.index_select(matched_row_inds)[..., 6:]
reg_pred *= (self.img_w - 1)
reg_targets = target.index_select(matched_col_inds)[...,
6:].clone()
with paddle.no_grad():
predictions_starts = paddle.clip(
(predictions.index_select(matched_row_inds)[..., 2] *
self.n_strips).round().cast("int64"),
min=0,
max=self.
n_strips) # ensure the predictions starts is valid
target_starts = (
target.index_select(matched_col_inds)[..., 2] *
self.n_strips).round().cast("int64")
target_yxtl[:, -1] -= (
predictions_starts - target_starts) # reg length
# Loss calculation
cls_loss = cls_loss + cls_criterion(
cls_pred, cls_target).sum() / target.shape[0]
target_yxtl[:, 0] *= self.n_strips
target_yxtl[:, 2] *= 180
reg_xytl_loss = reg_xytl_loss + F.smooth_l1_loss(
input=reg_yxtl, label=target_yxtl, reduction='none').mean()
iou_loss = iou_loss + liou_loss(
reg_pred, reg_targets, self.img_w, length=15)
cls_accuracy = accuracy(cls_pred, cls_target)
cls_acc_stage.append(cls_accuracy)
cls_acc.append(sum(cls_acc_stage) / (len(cls_acc_stage) + 1e-5))
# extra segmentation loss
seg_loss = self.criterion(
F.log_softmax(
output['seg'], axis=1), batch['seg'].cast('int64'))
cls_loss /= (len(targets) * self.refine_layers)
reg_xytl_loss /= (len(targets) * self.refine_layers)
iou_loss /= (len(targets) * self.refine_layers)
loss = cls_loss * self.cls_loss_weight \
+ reg_xytl_loss * self.xyt_loss_weight \
+ seg_loss * self.seg_loss_weight \
+ iou_loss * self.iou_loss_weight
return_value = {
'loss': loss,
'cls_loss': cls_loss * self.cls_loss_weight,
'reg_xytl_loss': reg_xytl_loss * self.xyt_loss_weight,
'seg_loss': seg_loss * self.seg_loss_weight,
'iou_loss': iou_loss * self.iou_loss_weight
}
for i in range(self.refine_layers):
if not isinstance(cls_acc[i], paddle.Tensor):
cls_acc[i] = paddle.to_tensor(cls_acc[i])
return_value['stage_{}_acc'.format(i)] = cls_acc[i]
return return_value

View File

@@ -0,0 +1,61 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
import numpy as np
from ppdet.core.workspace import register
__all__ = ['COTLoss']
@register
class COTLoss(nn.Layer):
__shared__ = ['num_classes']
def __init__(self,
num_classes=80,
cot_scale=1,
cot_lambda=1):
super(COTLoss, self).__init__()
self.cot_scale = cot_scale
self.cot_lambda = cot_lambda
self.num_classes = num_classes
def forward(self, scores, targets, cot_relation):
cls_name = 'loss_bbox_cls_cot'
loss_bbox = {}
tgt_labels, tgt_bboxes, tgt_gt_inds = targets
tgt_labels = paddle.concat(tgt_labels) if len(
tgt_labels) > 1 else tgt_labels[0]
mask = (tgt_labels < self.num_classes)
valid_inds = paddle.nonzero(tgt_labels >= 0).flatten()
if valid_inds.shape[0] == 0:
loss_bbox[cls_name] = paddle.zeros([1], dtype='float32')
else:
tgt_labels = tgt_labels.cast('int64')
valid_cot_targets = []
for i in range(tgt_labels.shape[0]):
train_label = tgt_labels[i]
if train_label < self.num_classes:
valid_cot_targets.append(cot_relation[train_label])
coco_targets = paddle.to_tensor(valid_cot_targets)
coco_targets.stop_gradient = True
coco_loss = - coco_targets * F.log_softmax(scores[mask][:, :-1] * self.cot_scale)
loss_bbox[cls_name] = self.cot_lambda * paddle.mean(paddle.sum(coco_loss, axis=-1))
return loss_bbox

View File

@@ -0,0 +1,68 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from ppdet.core.workspace import register, serializable
__all__ = ['CTFocalLoss']
@register
@serializable
class CTFocalLoss(object):
"""
CTFocalLoss: CornerNet & CenterNet Focal Loss
Args:
loss_weight (float): loss weight
gamma (float): gamma parameter for Focal Loss
"""
def __init__(self, loss_weight=1., gamma=2.0):
self.loss_weight = loss_weight
self.gamma = gamma
def __call__(self, pred, target):
"""
Calculate the loss
Args:
pred (Tensor): heatmap prediction
target (Tensor): target for positive samples
Return:
ct_focal_loss (Tensor): Focal Loss used in CornerNet & CenterNet.
Note that the values in target are in [0, 1] since gaussian is
used to reduce the punishment and we treat [0, 1) as neg example.
"""
fg_map = paddle.cast(target == 1, 'float32')
fg_map.stop_gradient = True
bg_map = paddle.cast(target < 1, 'float32')
bg_map.stop_gradient = True
neg_weights = paddle.pow(1 - target, 4)
pos_loss = 0 - paddle.log(pred) * paddle.pow(1 - pred,
self.gamma) * fg_map
neg_loss = 0 - paddle.log(1 - pred) * paddle.pow(
pred, self.gamma) * neg_weights * bg_map
pos_loss = paddle.sum(pos_loss)
neg_loss = paddle.sum(neg_loss)
fg_num = paddle.sum(fg_map)
ct_focal_loss = (pos_loss + neg_loss) / (
fg_num + paddle.cast(fg_num == 0, 'float32'))
return ct_focal_loss * self.loss_weight

View File

@@ -0,0 +1,631 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register
from .iou_loss import GIoULoss
from ..transformers import bbox_cxcywh_to_xyxy, sigmoid_focal_loss, varifocal_loss_with_logits
from ..bbox_utils import bbox_iou
__all__ = ['DETRLoss', 'DINOLoss']
@register
class DETRLoss(nn.Layer):
__shared__ = ['num_classes', 'use_focal_loss']
__inject__ = ['matcher']
def __init__(self,
num_classes=80,
matcher='HungarianMatcher',
loss_coeff={
'class': 1,
'bbox': 5,
'giou': 2,
'no_object': 0.1,
'mask': 1,
'dice': 1
},
aux_loss=True,
use_focal_loss=False,
use_vfl=False,
use_uni_match=False,
uni_match_ind=0):
r"""
Args:
num_classes (int): The number of classes.
matcher (HungarianMatcher): It computes an assignment between the targets
and the predictions of the network.
loss_coeff (dict): The coefficient of loss.
aux_loss (bool): If 'aux_loss = True', loss at each decoder layer are to be used.
use_focal_loss (bool): Use focal loss or not.
"""
super(DETRLoss, self).__init__()
self.num_classes = num_classes
self.matcher = matcher
self.loss_coeff = loss_coeff
self.aux_loss = aux_loss
self.use_focal_loss = use_focal_loss
self.use_vfl = use_vfl
self.use_uni_match = use_uni_match
self.uni_match_ind = uni_match_ind
if not self.use_focal_loss:
self.loss_coeff['class'] = paddle.full([num_classes + 1],
loss_coeff['class'])
self.loss_coeff['class'][-1] = loss_coeff['no_object']
self.giou_loss = GIoULoss()
def _get_loss_class(self,
logits,
gt_class,
match_indices,
bg_index,
num_gts,
postfix="",
iou_score=None,
gt_score=None):
# logits: [b, query, num_classes], gt_class: list[[n, 1]]
name_class = "loss_class" + postfix
target_label = paddle.full(logits.shape[:2], bg_index, dtype='int64')
bs, num_query_objects = target_label.shape
num_gt = sum(len(a) for a in gt_class)
if num_gt > 0:
index, updates = self._get_index_updates(num_query_objects,
gt_class, match_indices)
target_label = paddle.scatter(
target_label.reshape([-1, 1]), index, updates.astype('int64'))
target_label = target_label.reshape([bs, num_query_objects])
if self.use_focal_loss:
target_label = F.one_hot(target_label,
self.num_classes + 1)[..., :-1]
if iou_score is not None and self.use_vfl:
if gt_score is not None:
target_score = paddle.zeros([bs, num_query_objects])
target_score = paddle.scatter(
target_score.reshape([-1, 1]), index, gt_score)
target_score = target_score.reshape(
[bs, num_query_objects, 1]) * target_label
target_score_iou = paddle.zeros([bs, num_query_objects])
target_score_iou = paddle.scatter(
target_score_iou.reshape([-1, 1]), index, iou_score)
target_score_iou = target_score_iou.reshape(
[bs, num_query_objects, 1]) * target_label
target_score = paddle.multiply(target_score,
target_score_iou)
loss_ = self.loss_coeff[
'class'] * varifocal_loss_with_logits(
logits, target_score, target_label,
num_gts / num_query_objects)
else:
target_score = paddle.zeros([bs, num_query_objects])
if num_gt > 0:
target_score = paddle.scatter(
target_score.reshape([-1, 1]), index, iou_score)
target_score = target_score.reshape(
[bs, num_query_objects, 1]) * target_label
loss_ = self.loss_coeff[
'class'] * varifocal_loss_with_logits(
logits, target_score, target_label,
num_gts / num_query_objects)
else:
loss_ = self.loss_coeff['class'] * sigmoid_focal_loss(
logits, target_label, num_gts / num_query_objects)
else:
loss_ = F.cross_entropy(
logits, target_label, weight=self.loss_coeff['class'])
return {name_class: loss_}
def _get_loss_bbox(self, boxes, gt_bbox, match_indices, num_gts,
postfix=""):
# boxes: [b, query, 4], gt_bbox: list[[n, 4]]
name_bbox = "loss_bbox" + postfix
name_giou = "loss_giou" + postfix
loss = dict()
if sum(len(a) for a in gt_bbox) == 0:
loss[name_bbox] = paddle.to_tensor([0.])
loss[name_giou] = paddle.to_tensor([0.])
return loss
src_bbox, target_bbox = self._get_src_target_assign(boxes, gt_bbox,
match_indices)
loss[name_bbox] = self.loss_coeff['bbox'] * F.l1_loss(
src_bbox, target_bbox, reduction='sum') / num_gts
loss[name_giou] = self.giou_loss(
bbox_cxcywh_to_xyxy(src_bbox), bbox_cxcywh_to_xyxy(target_bbox))
loss[name_giou] = loss[name_giou].sum() / num_gts
loss[name_giou] = self.loss_coeff['giou'] * loss[name_giou]
return loss
def _get_loss_mask(self, masks, gt_mask, match_indices, num_gts,
postfix=""):
# masks: [b, query, h, w], gt_mask: list[[n, H, W]]
name_mask = "loss_mask" + postfix
name_dice = "loss_dice" + postfix
loss = dict()
if sum(len(a) for a in gt_mask) == 0:
loss[name_mask] = paddle.to_tensor([0.])
loss[name_dice] = paddle.to_tensor([0.])
return loss
src_masks, target_masks = self._get_src_target_assign(masks, gt_mask,
match_indices)
src_masks = F.interpolate(
src_masks.unsqueeze(0),
size=target_masks.shape[-2:],
mode="bilinear")[0]
loss[name_mask] = self.loss_coeff['mask'] * F.sigmoid_focal_loss(
src_masks,
target_masks,
paddle.to_tensor(
[num_gts], dtype='float32'))
loss[name_dice] = self.loss_coeff['dice'] * self._dice_loss(
src_masks, target_masks, num_gts)
return loss
def _dice_loss(self, inputs, targets, num_gts):
inputs = F.sigmoid(inputs)
inputs = inputs.flatten(1)
targets = targets.flatten(1)
numerator = 2 * (inputs * targets).sum(1)
denominator = inputs.sum(-1) + targets.sum(-1)
loss = 1 - (numerator + 1) / (denominator + 1)
return loss.sum() / num_gts
def _get_loss_aux(self,
boxes,
logits,
gt_bbox,
gt_class,
bg_index,
num_gts,
dn_match_indices=None,
postfix="",
masks=None,
gt_mask=None,
gt_score=None):
loss_class = []
loss_bbox, loss_giou = [], []
loss_mask, loss_dice = [], []
if dn_match_indices is not None:
match_indices = dn_match_indices
elif self.use_uni_match:
match_indices = self.matcher(
boxes[self.uni_match_ind],
logits[self.uni_match_ind],
gt_bbox,
gt_class,
masks=masks[self.uni_match_ind] if masks is not None else None,
gt_mask=gt_mask)
for i, (aux_boxes, aux_logits) in enumerate(zip(boxes, logits)):
aux_masks = masks[i] if masks is not None else None
if not self.use_uni_match and dn_match_indices is None:
match_indices = self.matcher(
aux_boxes,
aux_logits,
gt_bbox,
gt_class,
masks=aux_masks,
gt_mask=gt_mask)
if self.use_vfl:
if sum(len(a) for a in gt_bbox) > 0:
src_bbox, target_bbox = self._get_src_target_assign(
aux_boxes.detach(), gt_bbox, match_indices)
iou_score = bbox_iou(
bbox_cxcywh_to_xyxy(src_bbox).split(4, -1),
bbox_cxcywh_to_xyxy(target_bbox).split(4, -1))
else:
iou_score = None
if gt_score is not None:
_, target_score = self._get_src_target_assign(
logits[-1].detach(), gt_score, match_indices)
else:
iou_score = None
loss_class.append(
self._get_loss_class(
aux_logits,
gt_class,
match_indices,
bg_index,
num_gts,
postfix,
iou_score,
gt_score=target_score
if gt_score is not None else None)['loss_class' + postfix])
loss_ = self._get_loss_bbox(aux_boxes, gt_bbox, match_indices,
num_gts, postfix)
loss_bbox.append(loss_['loss_bbox' + postfix])
loss_giou.append(loss_['loss_giou' + postfix])
if masks is not None and gt_mask is not None:
loss_ = self._get_loss_mask(aux_masks, gt_mask, match_indices,
num_gts, postfix)
loss_mask.append(loss_['loss_mask' + postfix])
loss_dice.append(loss_['loss_dice' + postfix])
loss = {
"loss_class_aux" + postfix: paddle.add_n(loss_class),
"loss_bbox_aux" + postfix: paddle.add_n(loss_bbox),
"loss_giou_aux" + postfix: paddle.add_n(loss_giou)
}
if masks is not None and gt_mask is not None:
loss["loss_mask_aux" + postfix] = paddle.add_n(loss_mask)
loss["loss_dice_aux" + postfix] = paddle.add_n(loss_dice)
return loss
def _get_index_updates(self, num_query_objects, target, match_indices):
batch_idx = paddle.concat([
paddle.full_like(src, i) for i, (src, _) in enumerate(match_indices)
])
src_idx = paddle.concat([src for (src, _) in match_indices])
src_idx += (batch_idx * num_query_objects)
target_assign = paddle.concat([
paddle.gather(
t, dst, axis=0) for t, (_, dst) in zip(target, match_indices)
])
return src_idx, target_assign
def _get_src_target_assign(self, src, target, match_indices):
src_assign = paddle.concat([
paddle.gather(
t, I, axis=0) if len(I) > 0 else paddle.zeros([0, t.shape[-1]])
for t, (I, _) in zip(src, match_indices)
])
target_assign = paddle.concat([
paddle.gather(
t, J, axis=0) if len(J) > 0 else paddle.zeros([0, t.shape[-1]])
for t, (_, J) in zip(target, match_indices)
])
return src_assign, target_assign
def _get_num_gts(self, targets, dtype="float32"):
num_gts = sum(len(a) for a in targets)
num_gts = paddle.to_tensor([num_gts], dtype=dtype)
if paddle.distributed.get_world_size() > 1:
paddle.distributed.all_reduce(num_gts)
num_gts /= paddle.distributed.get_world_size()
num_gts = paddle.clip(num_gts, min=1.)
return num_gts
def _get_prediction_loss(self,
boxes,
logits,
gt_bbox,
gt_class,
masks=None,
gt_mask=None,
postfix="",
dn_match_indices=None,
num_gts=1,
gt_score=None):
if dn_match_indices is None:
match_indices = self.matcher(
boxes, logits, gt_bbox, gt_class, masks=masks, gt_mask=gt_mask)
else:
match_indices = dn_match_indices
if self.use_vfl:
if gt_score is not None: #ssod
_, target_score = self._get_src_target_assign(
logits[-1].detach(), gt_score, match_indices)
elif sum(len(a) for a in gt_bbox) > 0:
src_bbox, target_bbox = self._get_src_target_assign(
boxes.detach(), gt_bbox, match_indices)
iou_score = bbox_iou(
bbox_cxcywh_to_xyxy(src_bbox).split(4, -1),
bbox_cxcywh_to_xyxy(target_bbox).split(4, -1))
else:
iou_score = None
else:
iou_score = None
loss = dict()
loss.update(
self._get_loss_class(
logits,
gt_class,
match_indices,
self.num_classes,
num_gts,
postfix,
iou_score,
gt_score=target_score if gt_score is not None else None))
loss.update(
self._get_loss_bbox(boxes, gt_bbox, match_indices, num_gts,
postfix))
if masks is not None and gt_mask is not None:
loss.update(
self._get_loss_mask(masks, gt_mask, match_indices, num_gts,
postfix))
return loss
def forward(self,
boxes,
logits,
gt_bbox,
gt_class,
masks=None,
gt_mask=None,
postfix="",
gt_score=None,
**kwargs):
r"""
Args:
boxes (Tensor): [l, b, query, 4]
logits (Tensor): [l, b, query, num_classes]
gt_bbox (List(Tensor)): list[[n, 4]]
gt_class (List(Tensor)): list[[n, 1]]
masks (Tensor, optional): [l, b, query, h, w]
gt_mask (List(Tensor), optional): list[[n, H, W]]
postfix (str): postfix of loss name
"""
dn_match_indices = kwargs.get("dn_match_indices", None)
num_gts = kwargs.get("num_gts", None)
if num_gts is None:
num_gts = self._get_num_gts(gt_class)
total_loss = self._get_prediction_loss(
boxes[-1],
logits[-1],
gt_bbox,
gt_class,
masks=masks[-1] if masks is not None else None,
gt_mask=gt_mask,
postfix=postfix,
dn_match_indices=dn_match_indices,
num_gts=num_gts,
gt_score=gt_score if gt_score is not None else None)
if self.aux_loss:
total_loss.update(
self._get_loss_aux(
boxes[:-1],
logits[:-1],
gt_bbox,
gt_class,
self.num_classes,
num_gts,
dn_match_indices,
postfix,
masks=masks[:-1] if masks is not None else None,
gt_mask=gt_mask,
gt_score=gt_score if gt_score is not None else None))
return total_loss
@register
class DINOLoss(DETRLoss):
def forward(self,
boxes,
logits,
gt_bbox,
gt_class,
masks=None,
gt_mask=None,
postfix="",
dn_out_bboxes=None,
dn_out_logits=None,
dn_meta=None,
gt_score=None,
**kwargs):
num_gts = self._get_num_gts(gt_class)
total_loss = super(DINOLoss, self).forward(
boxes,
logits,
gt_bbox,
gt_class,
num_gts=num_gts,
gt_score=gt_score)
if dn_meta is not None:
dn_positive_idx, dn_num_group = \
dn_meta["dn_positive_idx"], dn_meta["dn_num_group"]
assert len(gt_class) == len(dn_positive_idx)
# denoising match indices
dn_match_indices = self.get_dn_match_indices(
gt_class, dn_positive_idx, dn_num_group)
# compute denoising training loss
num_gts *= dn_num_group
dn_loss = super(DINOLoss, self).forward(
dn_out_bboxes,
dn_out_logits,
gt_bbox,
gt_class,
postfix="_dn",
dn_match_indices=dn_match_indices,
num_gts=num_gts,
gt_score=gt_score)
total_loss.update(dn_loss)
else:
total_loss.update(
{k + '_dn': paddle.to_tensor([0.])
for k in total_loss.keys()})
return total_loss
@staticmethod
def get_dn_match_indices(labels, dn_positive_idx, dn_num_group):
dn_match_indices = []
for i in range(len(labels)):
num_gt = len(labels[i])
if num_gt > 0:
gt_idx = paddle.arange(end=num_gt, dtype="int64")
gt_idx = gt_idx.tile([dn_num_group])
assert len(dn_positive_idx[i]) == len(gt_idx)
dn_match_indices.append((dn_positive_idx[i], gt_idx))
else:
dn_match_indices.append((paddle.zeros(
[0], dtype="int64"), paddle.zeros(
[0], dtype="int64")))
return dn_match_indices
@register
class MaskDINOLoss(DETRLoss):
__shared__ = ['num_classes', 'use_focal_loss', 'num_sample_points']
__inject__ = ['matcher']
def __init__(self,
num_classes=80,
matcher='HungarianMatcher',
loss_coeff={
'class': 4,
'bbox': 5,
'giou': 2,
'mask': 5,
'dice': 5
},
aux_loss=True,
use_focal_loss=False,
num_sample_points=12544,
oversample_ratio=3.0,
important_sample_ratio=0.75):
super(MaskDINOLoss, self).__init__(num_classes, matcher, loss_coeff,
aux_loss, use_focal_loss)
assert oversample_ratio >= 1
assert important_sample_ratio <= 1 and important_sample_ratio >= 0
self.num_sample_points = num_sample_points
self.oversample_ratio = oversample_ratio
self.important_sample_ratio = important_sample_ratio
self.num_oversample_points = int(num_sample_points * oversample_ratio)
self.num_important_points = int(num_sample_points *
important_sample_ratio)
self.num_random_points = num_sample_points - self.num_important_points
def forward(self,
boxes,
logits,
gt_bbox,
gt_class,
masks=None,
gt_mask=None,
postfix="",
dn_out_bboxes=None,
dn_out_logits=None,
dn_out_masks=None,
dn_meta=None,
**kwargs):
num_gts = self._get_num_gts(gt_class)
total_loss = super(MaskDINOLoss, self).forward(
boxes,
logits,
gt_bbox,
gt_class,
masks=masks,
gt_mask=gt_mask,
num_gts=num_gts)
if dn_meta is not None:
dn_positive_idx, dn_num_group = \
dn_meta["dn_positive_idx"], dn_meta["dn_num_group"]
assert len(gt_class) == len(dn_positive_idx)
# denoising match indices
dn_match_indices = DINOLoss.get_dn_match_indices(
gt_class, dn_positive_idx, dn_num_group)
# compute denoising training loss
num_gts *= dn_num_group
dn_loss = super(MaskDINOLoss, self).forward(
dn_out_bboxes,
dn_out_logits,
gt_bbox,
gt_class,
masks=dn_out_masks,
gt_mask=gt_mask,
postfix="_dn",
dn_match_indices=dn_match_indices,
num_gts=num_gts)
total_loss.update(dn_loss)
else:
total_loss.update(
{k + '_dn': paddle.to_tensor([0.])
for k in total_loss.keys()})
return total_loss
def _get_loss_mask(self, masks, gt_mask, match_indices, num_gts,
postfix=""):
# masks: [b, query, h, w], gt_mask: list[[n, H, W]]
name_mask = "loss_mask" + postfix
name_dice = "loss_dice" + postfix
loss = dict()
if sum(len(a) for a in gt_mask) == 0:
loss[name_mask] = paddle.to_tensor([0.])
loss[name_dice] = paddle.to_tensor([0.])
return loss
src_masks, target_masks = self._get_src_target_assign(masks, gt_mask,
match_indices)
# sample points
sample_points = self._get_point_coords_by_uncertainty(src_masks)
sample_points = 2.0 * sample_points.unsqueeze(1) - 1.0
src_masks = F.grid_sample(
src_masks.unsqueeze(1), sample_points,
align_corners=False).squeeze([1, 2])
target_masks = F.grid_sample(
target_masks.unsqueeze(1), sample_points,
align_corners=False).squeeze([1, 2]).detach()
loss[name_mask] = self.loss_coeff[
'mask'] * F.binary_cross_entropy_with_logits(
src_masks, target_masks,
reduction='none').mean(1).sum() / num_gts
loss[name_dice] = self.loss_coeff['dice'] * self._dice_loss(
src_masks, target_masks, num_gts)
return loss
def _get_point_coords_by_uncertainty(self, masks):
# Sample points based on their uncertainty.
masks = masks.detach()
num_masks = masks.shape[0]
sample_points = paddle.rand(
[num_masks, 1, self.num_oversample_points, 2])
out_mask = F.grid_sample(
masks.unsqueeze(1), 2.0 * sample_points - 1.0,
align_corners=False).squeeze([1, 2])
out_mask = -paddle.abs(out_mask)
_, topk_ind = paddle.topk(out_mask, self.num_important_points, axis=1)
batch_ind = paddle.arange(end=num_masks, dtype=topk_ind.dtype)
batch_ind = batch_ind.unsqueeze(-1).tile([1, self.num_important_points])
topk_ind = paddle.stack([batch_ind, topk_ind], axis=-1)
sample_points = paddle.gather_nd(sample_points.squeeze(1), topk_ind)
if self.num_random_points > 0:
sample_points = paddle.concat(
[
sample_points,
paddle.rand([num_masks, self.num_random_points, 2])
],
axis=1)
return sample_points

View File

@@ -0,0 +1,41 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
from paddle.nn.initializer import Constant
from ppdet.core.workspace import register
__all__ = ['FairMOTLoss']
@register
class FairMOTLoss(nn.Layer):
def __init__(self):
super(FairMOTLoss, self).__init__()
self.det_weight = self.create_parameter(
shape=[1], default_initializer=Constant(-1.85))
self.reid_weight = self.create_parameter(
shape=[1], default_initializer=Constant(-1.05))
def forward(self, det_loss, reid_loss):
loss = paddle.exp(-self.det_weight) * det_loss + paddle.exp(
-self.reid_weight) * reid_loss + (self.det_weight + self.reid_weight
)
loss *= 0.5
return {'loss': loss}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,138 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn.functional as F
import paddle.nn as nn
from ppdet.core.workspace import register
__all__ = ['FocalLoss', 'Weighted_FocalLoss']
@register
class FocalLoss(nn.Layer):
"""A wrapper around paddle.nn.functional.sigmoid_focal_loss.
Args:
use_sigmoid (bool): currently only support use_sigmoid=True
alpha (float): parameter alpha in Focal Loss
gamma (float): parameter gamma in Focal Loss
loss_weight (float): final loss will be multiplied by this
"""
def __init__(self,
use_sigmoid=True,
alpha=0.25,
gamma=2.0,
loss_weight=1.0):
super(FocalLoss, self).__init__()
assert use_sigmoid == True, \
'Focal Loss only supports sigmoid at the moment'
self.use_sigmoid = use_sigmoid
self.alpha = alpha
self.gamma = gamma
self.loss_weight = loss_weight
def forward(self, pred, target, reduction='none'):
"""forward function.
Args:
pred (Tensor): logits of class prediction, of shape (N, num_classes)
target (Tensor): target class label, of shape (N, )
reduction (str): the way to reduce loss, one of (none, sum, mean)
"""
num_classes = pred.shape[1]
target = F.one_hot(target, num_classes+1).cast(pred.dtype)
target = target[:, :-1].detach()
loss = F.sigmoid_focal_loss(
pred, target, alpha=self.alpha, gamma=self.gamma,
reduction=reduction)
return loss * self.loss_weight
@register
class Weighted_FocalLoss(FocalLoss):
"""A wrapper around paddle.nn.functional.sigmoid_focal_loss.
Args:
use_sigmoid (bool): currently only support use_sigmoid=True
alpha (float): parameter alpha in Focal Loss
gamma (float): parameter gamma in Focal Loss
loss_weight (float): final loss will be multiplied by this
"""
def __init__(self,
use_sigmoid=True,
alpha=0.25,
gamma=2.0,
loss_weight=1.0,
reduction="mean"):
super(FocalLoss, self).__init__()
assert use_sigmoid == True, \
'Focal Loss only supports sigmoid at the moment'
self.use_sigmoid = use_sigmoid
self.alpha = alpha
self.gamma = gamma
self.loss_weight = loss_weight
self.reduction = reduction
def forward(self, pred, target, weight=None, avg_factor=None, reduction_override=None):
"""forward function.
Args:
pred (Tensor): logits of class prediction, of shape (N, num_classes)
target (Tensor): target class label, of shape (N, )
reduction (str): the way to reduce loss, one of (none, sum, mean)
"""
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
num_classes = pred.shape[1]
target = F.one_hot(target, num_classes + 1).astype(pred.dtype)
target = target[:, :-1].detach()
loss = F.sigmoid_focal_loss(
pred, target, alpha=self.alpha, gamma=self.gamma,
reduction='none')
if weight is not None:
if weight.shape != loss.shape:
if weight.shape[0] == loss.shape[0]:
# For most cases, weight is of shape (num_priors, ),
# which means it does not have the second axis num_class
weight = weight.reshape((-1, 1))
else:
# Sometimes, weight per anchor per class is also needed. e.g.
# in FSAF. But it may be flattened of shape
# (num_priors x num_class, ), while loss is still of shape
# (num_priors, num_class).
assert weight.numel() == loss.numel()
weight = weight.reshape((loss.shape[0], -1))
assert weight.ndim == loss.ndim
loss = loss * weight
# if avg_factor is not specified, just reduce the loss
if avg_factor is None:
if reduction == 'mean':
loss = loss.mean()
elif reduction == 'sum':
loss = loss.sum()
else:
# if reduction is mean, then average the loss by avg_factor
if reduction == 'mean':
# Avoid causing ZeroDivisionError when avg_factor is 0.0,
# i.e., all labels of an image belong to ignore index.
eps = 1e-10
loss = loss.sum() / (avg_factor + eps)
# if reduction is 'none', then do nothing, otherwise raise an error
elif reduction != 'none':
raise ValueError('avg_factor can not be used with reduction="sum"')
return loss * self.loss_weight

View File

@@ -0,0 +1,217 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# The code is based on:
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/losses/gfocal_loss.py
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register, serializable
from ppdet.modeling import ops
__all__ = ['QualityFocalLoss', 'DistributionFocalLoss']
def quality_focal_loss(pred, target, beta=2.0, use_sigmoid=True):
"""
Quality Focal Loss (QFL) is from `Generalized Focal Loss: Learning
Qualified and Distributed Bounding Boxes for Dense Object Detection
<https://arxiv.org/abs/2006.04388>`_.
Args:
pred (Tensor): Predicted joint representation of classification
and quality (IoU) estimation with shape (N, C), C is the number of
classes.
target (tuple([Tensor])): Target category label with shape (N,)
and target quality label with shape (N,).
beta (float): The beta parameter for calculating the modulating factor.
Defaults to 2.0.
Returns:
Tensor: Loss tensor with shape (N,).
"""
assert len(target) == 2, """target for QFL must be a tuple of two elements,
including category label and quality label, respectively"""
# label denotes the category id, score denotes the quality score
label, score = target
if use_sigmoid:
func = F.binary_cross_entropy_with_logits
else:
func = F.binary_cross_entropy
# negatives are supervised by 0 quality score
pred_sigmoid = F.sigmoid(pred) if use_sigmoid else pred
scale_factor = pred_sigmoid
zerolabel = paddle.zeros(pred.shape, dtype='float32')
loss = func(pred, zerolabel, reduction='none') * scale_factor.pow(beta)
# FG cat_id: [0, num_classes -1], BG cat_id: num_classes
bg_class_ind = pred.shape[1]
pos = paddle.logical_and((label >= 0),
(label < bg_class_ind)).nonzero().squeeze(1)
if pos.shape[0] == 0:
return loss.sum(axis=1)
pos_label = paddle.gather(label, pos, axis=0)
pos_mask = np.zeros(pred.shape, dtype=np.int32)
pos_mask[pos.numpy(), pos_label.numpy()] = 1
pos_mask = paddle.to_tensor(pos_mask, dtype='bool')
score = score.unsqueeze(-1).expand([-1, pred.shape[1]]).cast('float32')
# positives are supervised by bbox quality (IoU) score
scale_factor_new = score - pred_sigmoid
loss_pos = func(
pred, score, reduction='none') * scale_factor_new.abs().pow(beta)
loss = loss * paddle.logical_not(pos_mask) + loss_pos * pos_mask
loss = loss.sum(axis=1)
return loss
def distribution_focal_loss(pred, label):
"""Distribution Focal Loss (DFL) is from `Generalized Focal Loss: Learning
Qualified and Distributed Bounding Boxes for Dense Object Detection
<https://arxiv.org/abs/2006.04388>`_.
Args:
pred (Tensor): Predicted general distribution of bounding boxes
(before softmax) with shape (N, n+1), n is the max value of the
integral set `{0, ..., n}` in paper.
label (Tensor): Target distance label for bounding boxes with
shape (N,).
Returns:
Tensor: Loss tensor with shape (N,).
"""
dis_left = label.cast('int64')
dis_right = dis_left + 1
weight_left = dis_right.cast('float32') - label
weight_right = label - dis_left.cast('float32')
loss = F.cross_entropy(pred, dis_left, reduction='none') * weight_left \
+ F.cross_entropy(pred, dis_right, reduction='none') * weight_right
return loss
@register
@serializable
class QualityFocalLoss(nn.Layer):
r"""Quality Focal Loss (QFL) is a variant of `Generalized Focal Loss:
Learning Qualified and Distributed Bounding Boxes for Dense Object
Detection <https://arxiv.org/abs/2006.04388>`_.
Args:
use_sigmoid (bool): Whether sigmoid operation is conducted in QFL.
Defaults to True.
beta (float): The beta parameter for calculating the modulating factor.
Defaults to 2.0.
reduction (str): Options are "none", "mean" and "sum".
loss_weight (float): Loss weight of current loss.
"""
def __init__(self,
use_sigmoid=True,
beta=2.0,
reduction='mean',
loss_weight=1.0):
super(QualityFocalLoss, self).__init__()
self.use_sigmoid = use_sigmoid
self.beta = beta
assert reduction in ('none', 'mean', 'sum')
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self, pred, target, weight=None, avg_factor=None):
"""Forward function.
Args:
pred (Tensor): Predicted joint representation of
classification and quality (IoU) estimation with shape (N, C),
C is the number of classes.
target (tuple([Tensor])): Target category label with shape
(N,) and target quality label with shape (N,).
weight (Tensor, optional): The weight of loss for each
prediction. Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
"""
loss = self.loss_weight * quality_focal_loss(
pred, target, beta=self.beta, use_sigmoid=self.use_sigmoid)
if weight is not None:
loss = loss * weight
if avg_factor is None:
if self.reduction == 'none':
return loss
elif self.reduction == 'mean':
return loss.mean()
elif self.reduction == 'sum':
return loss.sum()
else:
# if reduction is mean, then average the loss by avg_factor
if self.reduction == 'mean':
loss = loss.sum() / avg_factor
# if reduction is 'none', then do nothing, otherwise raise an error
elif self.reduction != 'none':
raise ValueError(
'avg_factor can not be used with reduction="sum"')
return loss
@register
@serializable
class DistributionFocalLoss(nn.Layer):
"""Distribution Focal Loss (DFL) is a variant of `Generalized Focal Loss:
Learning Qualified and Distributed Bounding Boxes for Dense Object
Detection <https://arxiv.org/abs/2006.04388>`_.
Args:
reduction (str): Options are `'none'`, `'mean'` and `'sum'`.
loss_weight (float): Loss weight of current loss.
"""
def __init__(self, reduction='mean', loss_weight=1.0):
super(DistributionFocalLoss, self).__init__()
assert reduction in ('none', 'mean', 'sum')
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self, pred, target, weight=None, avg_factor=None):
"""Forward function.
Args:
pred (Tensor): Predicted general distribution of bounding
boxes (before softmax) with shape (N, n+1), n is the max value
of the integral set `{0, ..., n}` in paper.
target (Tensor): Target distance label for bounding boxes
with shape (N,).
weight (Tensor, optional): The weight of loss for each
prediction. Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
"""
loss = self.loss_weight * distribution_focal_loss(pred, target)
if weight is not None:
loss = loss * weight
if avg_factor is None:
if self.reduction == 'none':
return loss
elif self.reduction == 'mean':
return loss.mean()
elif self.reduction == 'sum':
return loss.sum()
else:
# if reduction is mean, then average the loss by avg_factor
if self.reduction == 'mean':
loss = loss.sum() / avg_factor
# if reduction is 'none', then do nothing, otherwise raise an error
elif self.reduction != 'none':
raise ValueError(
'avg_factor can not be used with reduction="sum"')
return loss

View File

@@ -0,0 +1,47 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.nn.functional as F
from ppdet.core.workspace import register, serializable
from .iou_loss import IouLoss
from ..bbox_utils import bbox_iou
@register
@serializable
class IouAwareLoss(IouLoss):
"""
iou aware loss, see https://arxiv.org/abs/1912.05992
Args:
loss_weight (float): iou aware loss weight, default is 1.0
max_height (int): max height of input to support random shape input
max_width (int): max width of input to support random shape input
"""
def __init__(self, loss_weight=1.0, giou=False, diou=False, ciou=False):
super(IouAwareLoss, self).__init__(
loss_weight=loss_weight, giou=giou, diou=diou, ciou=ciou)
def __call__(self, ioup, pbox, gbox):
iou = bbox_iou(
pbox, gbox, giou=self.giou, diou=self.diou, ciou=self.ciou)
iou.stop_gradient = True
loss_iou_aware = F.binary_cross_entropy_with_logits(
ioup, iou, reduction='none')
loss_iou_aware = loss_iou_aware * self.loss_weight
return loss_iou_aware

View File

@@ -0,0 +1,295 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import math
import paddle
from ppdet.core.workspace import register, serializable
from ..bbox_utils import bbox_iou
__all__ = ['IouLoss', 'GIoULoss', 'DIouLoss', 'SIoULoss']
@register
@serializable
class IouLoss(object):
"""
iou loss, see https://arxiv.org/abs/1908.03851
loss = 1.0 - iou * iou
Args:
loss_weight (float): iou loss weight, default is 2.5
max_height (int): max height of input to support random shape input
max_width (int): max width of input to support random shape input
ciou_term (bool): whether to add ciou_term
loss_square (bool): whether to square the iou term
"""
def __init__(self,
loss_weight=2.5,
giou=False,
diou=False,
ciou=False,
loss_square=True):
self.loss_weight = loss_weight
self.giou = giou
self.diou = diou
self.ciou = ciou
self.loss_square = loss_square
def __call__(self, pbox, gbox):
iou = bbox_iou(
pbox, gbox, giou=self.giou, diou=self.diou, ciou=self.ciou)
if self.loss_square:
loss_iou = 1 - iou * iou
else:
loss_iou = 1 - iou
loss_iou = loss_iou * self.loss_weight
return loss_iou
@register
@serializable
class GIoULoss(object):
"""
Generalized Intersection over Union, see https://arxiv.org/abs/1902.09630
Args:
loss_weight (float): giou loss weight, default as 1
eps (float): epsilon to avoid divide by zero, default as 1e-10
reduction (string): Options are "none", "mean" and "sum". default as none
"""
def __init__(self, loss_weight=1., eps=1e-10, reduction='none'):
self.loss_weight = loss_weight
self.eps = eps
assert reduction in ('none', 'mean', 'sum')
self.reduction = reduction
def bbox_overlap(self, box1, box2, eps=1e-10):
"""calculate the iou of box1 and box2
Args:
box1 (Tensor): box1 with the shape (..., 4)
box2 (Tensor): box1 with the shape (..., 4)
eps (float): epsilon to avoid divide by zero
Return:
iou (Tensor): iou of box1 and box2
overlap (Tensor): overlap of box1 and box2
union (Tensor): union of box1 and box2
"""
x1, y1, x2, y2 = box1
x1g, y1g, x2g, y2g = box2
xkis1 = paddle.maximum(x1, x1g)
ykis1 = paddle.maximum(y1, y1g)
xkis2 = paddle.minimum(x2, x2g)
ykis2 = paddle.minimum(y2, y2g)
w_inter = (xkis2 - xkis1).clip(0)
h_inter = (ykis2 - ykis1).clip(0)
overlap = w_inter * h_inter
area1 = (x2 - x1) * (y2 - y1)
area2 = (x2g - x1g) * (y2g - y1g)
union = area1 + area2 - overlap + eps
iou = overlap / union
return iou, overlap, union
def __call__(self, pbox, gbox, iou_weight=1., loc_reweight=None):
x1, y1, x2, y2 = paddle.split(pbox, num_or_sections=4, axis=-1)
x1g, y1g, x2g, y2g = paddle.split(gbox, num_or_sections=4, axis=-1)
box1 = [x1, y1, x2, y2]
box2 = [x1g, y1g, x2g, y2g]
iou, overlap, union = self.bbox_overlap(box1, box2, self.eps)
xc1 = paddle.minimum(x1, x1g)
yc1 = paddle.minimum(y1, y1g)
xc2 = paddle.maximum(x2, x2g)
yc2 = paddle.maximum(y2, y2g)
area_c = (xc2 - xc1) * (yc2 - yc1) + self.eps
miou = iou - ((area_c - union) / area_c)
if loc_reweight is not None:
loc_reweight = paddle.reshape(loc_reweight, shape=(-1, 1))
loc_thresh = 0.9
giou = 1 - (1 - loc_thresh
) * miou - loc_thresh * miou * loc_reweight
else:
giou = 1 - miou
if self.reduction == 'none':
loss = giou
elif self.reduction == 'sum':
loss = paddle.sum(giou * iou_weight)
else:
loss = paddle.mean(giou * iou_weight)
return loss * self.loss_weight
@register
@serializable
class DIouLoss(GIoULoss):
"""
Distance-IoU Loss, see https://arxiv.org/abs/1911.08287
Args:
loss_weight (float): giou loss weight, default as 1
eps (float): epsilon to avoid divide by zero, default as 1e-10
use_complete_iou_loss (bool): whether to use complete iou loss
"""
def __init__(self, loss_weight=1., eps=1e-10, use_complete_iou_loss=True):
super(DIouLoss, self).__init__(loss_weight=loss_weight, eps=eps)
self.use_complete_iou_loss = use_complete_iou_loss
def __call__(self, pbox, gbox, iou_weight=1.):
x1, y1, x2, y2 = paddle.split(pbox, num_or_sections=4, axis=-1)
x1g, y1g, x2g, y2g = paddle.split(gbox, num_or_sections=4, axis=-1)
cx = (x1 + x2) / 2
cy = (y1 + y2) / 2
w = x2 - x1
h = y2 - y1
cxg = (x1g + x2g) / 2
cyg = (y1g + y2g) / 2
wg = x2g - x1g
hg = y2g - y1g
x2 = paddle.maximum(x1, x2)
y2 = paddle.maximum(y1, y2)
# A and B
xkis1 = paddle.maximum(x1, x1g)
ykis1 = paddle.maximum(y1, y1g)
xkis2 = paddle.minimum(x2, x2g)
ykis2 = paddle.minimum(y2, y2g)
# A or B
xc1 = paddle.minimum(x1, x1g)
yc1 = paddle.minimum(y1, y1g)
xc2 = paddle.maximum(x2, x2g)
yc2 = paddle.maximum(y2, y2g)
intsctk = (xkis2 - xkis1) * (ykis2 - ykis1)
intsctk = intsctk * paddle.greater_than(
xkis2, xkis1) * paddle.greater_than(ykis2, ykis1)
unionk = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g
) - intsctk + self.eps
iouk = intsctk / unionk
# DIOU term
dist_intersection = (cx - cxg) * (cx - cxg) + (cy - cyg) * (cy - cyg)
dist_union = (xc2 - xc1) * (xc2 - xc1) + (yc2 - yc1) * (yc2 - yc1)
diou_term = (dist_intersection + self.eps) / (dist_union + self.eps)
# CIOU term
ciou_term = 0
if self.use_complete_iou_loss:
ar_gt = wg / hg
ar_pred = w / h
arctan = paddle.atan(ar_gt) - paddle.atan(ar_pred)
ar_loss = 4. / np.pi / np.pi * arctan * arctan
alpha = ar_loss / (1 - iouk + ar_loss + self.eps)
alpha.stop_gradient = True
ciou_term = alpha * ar_loss
diou = paddle.mean((1 - iouk + ciou_term + diou_term) * iou_weight)
return diou * self.loss_weight
@register
@serializable
class SIoULoss(GIoULoss):
"""
see https://arxiv.org/pdf/2205.12740.pdf
Args:
loss_weight (float): siou loss weight, default as 1
eps (float): epsilon to avoid divide by zero, default as 1e-10
theta (float): default as 4
reduction (str): Options are "none", "mean" and "sum". default as none
"""
def __init__(self, loss_weight=1., eps=1e-10, theta=4., reduction='none'):
super(SIoULoss, self).__init__(loss_weight=loss_weight, eps=eps)
self.loss_weight = loss_weight
self.eps = eps
self.theta = theta
self.reduction = reduction
def __call__(self, pbox, gbox):
x1, y1, x2, y2 = paddle.split(pbox, num_or_sections=4, axis=-1)
x1g, y1g, x2g, y2g = paddle.split(gbox, num_or_sections=4, axis=-1)
box1 = [x1, y1, x2, y2]
box2 = [x1g, y1g, x2g, y2g]
iou = bbox_iou(box1, box2)
cx = (x1 + x2) / 2
cy = (y1 + y2) / 2
w = x2 - x1 + self.eps
h = y2 - y1 + self.eps
cxg = (x1g + x2g) / 2
cyg = (y1g + y2g) / 2
wg = x2g - x1g + self.eps
hg = y2g - y1g + self.eps
x2 = paddle.maximum(x1, x2)
y2 = paddle.maximum(y1, y2)
# A or B
xc1 = paddle.minimum(x1, x1g)
yc1 = paddle.minimum(y1, y1g)
xc2 = paddle.maximum(x2, x2g)
yc2 = paddle.maximum(y2, y2g)
cw_out = xc2 - xc1
ch_out = yc2 - yc1
ch = paddle.maximum(cy, cyg) - paddle.minimum(cy, cyg)
cw = paddle.maximum(cx, cxg) - paddle.minimum(cx, cxg)
# angle cost
dist_intersection = paddle.sqrt((cx - cxg)**2 + (cy - cyg)**2)
sin_angle_alpha = ch / dist_intersection
sin_angle_beta = cw / dist_intersection
thred = paddle.pow(paddle.to_tensor(2), 0.5) / 2
thred.stop_gradient = True
sin_alpha = paddle.where(sin_angle_alpha > thred, sin_angle_beta,
sin_angle_alpha)
angle_cost = paddle.cos(paddle.asin(sin_alpha) * 2 - math.pi / 2)
# distance cost
gamma = 2 - angle_cost
# gamma.stop_gradient = True
beta_x = ((cxg - cx) / cw_out)**2
beta_y = ((cyg - cy) / ch_out)**2
dist_cost = 1 - paddle.exp(-gamma * beta_x) + 1 - paddle.exp(-gamma *
beta_y)
# shape cost
omega_w = paddle.abs(w - wg) / paddle.maximum(w, wg)
omega_h = paddle.abs(hg - h) / paddle.maximum(h, hg)
omega = (1 - paddle.exp(-omega_w))**self.theta + (
1 - paddle.exp(-omega_h))**self.theta
siou_loss = 1 - iou + (omega + dist_cost) / 2
if self.reduction == 'mean':
siou_loss = paddle.mean(siou_loss)
elif self.reduction == 'sum':
siou_loss = paddle.sum(siou_loss)
return siou_loss * self.loss_weight

View File

@@ -0,0 +1,193 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register
__all__ = ['JDEDetectionLoss', 'JDEEmbeddingLoss', 'JDELoss']
@register
class JDEDetectionLoss(nn.Layer):
__shared__ = ['num_classes']
def __init__(self, num_classes=1, for_mot=True):
super(JDEDetectionLoss, self).__init__()
self.num_classes = num_classes
self.for_mot = for_mot
def det_loss(self, p_det, anchor, t_conf, t_box):
pshape = paddle.shape(p_det)
pshape.stop_gradient = True
nB, nGh, nGw = pshape[0], pshape[-2], pshape[-1]
nA = len(anchor)
p_det = paddle.reshape(
p_det, [nB, nA, self.num_classes + 5, nGh, nGw]).transpose(
(0, 1, 3, 4, 2))
# 1. loss_conf: cross_entropy
p_conf = p_det[:, :, :, :, 4:6]
p_conf_flatten = paddle.reshape(p_conf, [-1, 2])
t_conf_flatten = t_conf.flatten()
t_conf_flatten = paddle.cast(t_conf_flatten, dtype="int64")
t_conf_flatten.stop_gradient = True
loss_conf = F.cross_entropy(
p_conf_flatten, t_conf_flatten, ignore_index=-1, reduction='mean')
loss_conf.stop_gradient = False
# 2. loss_box: smooth_l1_loss
p_box = p_det[:, :, :, :, :4]
p_box_flatten = paddle.reshape(p_box, [-1, 4])
t_box_flatten = paddle.reshape(t_box, [-1, 4])
fg_inds = paddle.nonzero(t_conf_flatten > 0).flatten()
if fg_inds.numel() > 0:
reg_delta = paddle.gather(p_box_flatten, fg_inds)
reg_target = paddle.gather(t_box_flatten, fg_inds)
else:
reg_delta = paddle.to_tensor([0, 0, 0, 0], dtype='float32')
reg_delta.stop_gradient = False
reg_target = paddle.to_tensor([0, 0, 0, 0], dtype='float32')
reg_target.stop_gradient = True
loss_box = F.smooth_l1_loss(
reg_delta, reg_target, reduction='mean', delta=1.0)
loss_box.stop_gradient = False
return loss_conf, loss_box
def forward(self, det_outs, targets, anchors):
"""
Args:
det_outs (list[Tensor]): output from detection head, each one
is a 4-D Tensor with shape [N, C, H, W].
targets (dict): contains 'im_id', 'gt_bbox', 'gt_ide', 'image',
'im_shape', 'scale_factor' and 'tbox', 'tconf', 'tide' of
each FPN level.
anchors (list[list]): anchor setting of JDE model, N row M col, N is
the anchor levels(FPN levels), M is the anchor scales each
level.
"""
assert len(det_outs) == len(anchors)
loss_confs = []
loss_boxes = []
for i, (p_det, anchor) in enumerate(zip(det_outs, anchors)):
t_conf = targets['tconf{}'.format(i)]
t_box = targets['tbox{}'.format(i)]
loss_conf, loss_box = self.det_loss(p_det, anchor, t_conf, t_box)
loss_confs.append(loss_conf)
loss_boxes.append(loss_box)
if self.for_mot:
return {'loss_confs': loss_confs, 'loss_boxes': loss_boxes}
else:
jde_conf_losses = sum(loss_confs)
jde_box_losses = sum(loss_boxes)
jde_det_losses = {
"loss_conf": jde_conf_losses,
"loss_box": jde_box_losses,
"loss": jde_conf_losses + jde_box_losses,
}
return jde_det_losses
@register
class JDEEmbeddingLoss(nn.Layer):
def __init__(self, ):
super(JDEEmbeddingLoss, self).__init__()
self.phony = self.create_parameter(shape=[1], dtype="float32")
def emb_loss(self, p_ide, t_conf, t_ide, emb_scale, classifier):
emb_dim = p_ide.shape[1]
p_ide = p_ide.transpose((0, 2, 3, 1))
p_ide_flatten = paddle.reshape(p_ide, [-1, emb_dim])
mask = t_conf > 0
mask = paddle.cast(mask, dtype="int64")
mask.stop_gradient = True
emb_mask = mask.max(1).flatten()
emb_mask_inds = paddle.nonzero(emb_mask > 0).flatten()
emb_mask_inds.stop_gradient = True
# use max(1) to decide the id, TODO: more reseanable strategy
t_ide_flatten = t_ide.max(1).flatten()
t_ide_flatten = paddle.cast(t_ide_flatten, dtype="int64")
valid_inds = paddle.nonzero(t_ide_flatten != -1).flatten()
if emb_mask_inds.numel() == 0 or valid_inds.numel() == 0:
# loss_ide = paddle.to_tensor([0]) # will be error in gradient backward
loss_ide = self.phony * 0 # todo
else:
embedding = paddle.gather(p_ide_flatten, emb_mask_inds)
embedding = emb_scale * F.normalize(embedding)
logits = classifier(embedding)
ide_target = paddle.gather(t_ide_flatten, emb_mask_inds)
loss_ide = F.cross_entropy(
logits, ide_target, ignore_index=-1, reduction='mean')
loss_ide.stop_gradient = False
return loss_ide
def forward(self, ide_outs, targets, emb_scale, classifier):
loss_ides = []
for i, p_ide in enumerate(ide_outs):
t_conf = targets['tconf{}'.format(i)]
t_ide = targets['tide{}'.format(i)]
loss_ide = self.emb_loss(p_ide, t_conf, t_ide, emb_scale,
classifier)
loss_ides.append(loss_ide)
return loss_ides
@register
class JDELoss(nn.Layer):
def __init__(self):
super(JDELoss, self).__init__()
def forward(self, loss_confs, loss_boxes, loss_ides, loss_params_cls,
loss_params_reg, loss_params_ide, targets):
assert len(loss_confs) == len(loss_boxes) == len(loss_ides)
assert len(loss_params_cls) == len(loss_params_reg) == len(
loss_params_ide)
assert len(loss_confs) == len(loss_params_cls)
batchsize = targets['gt_bbox'].shape[0]
nTargets = paddle.nonzero(paddle.sum(targets['gt_bbox'], axis=2)).shape[
0] / batchsize
nTargets = paddle.to_tensor(nTargets, dtype='float32')
nTargets.stop_gradient = True
jde_losses = []
for i, (loss_conf, loss_box, loss_ide, l_conf_p, l_box_p,
l_ide_p) in enumerate(
zip(loss_confs, loss_boxes, loss_ides, loss_params_cls,
loss_params_reg, loss_params_ide)):
jde_loss = l_conf_p(loss_conf) + l_box_p(loss_box) + l_ide_p(
loss_ide)
jde_losses.append(jde_loss)
loss_all = {
"loss_conf": sum(loss_confs),
"loss_box": sum(loss_boxes),
"loss_ide": sum(loss_ides),
"loss": sum(jde_losses),
"nTargets": nTargets,
}
return loss_all

View File

@@ -0,0 +1,632 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from itertools import cycle, islice
from collections import abc
import numpy as np
import paddle
import paddle.nn as nn
from ppdet.core.workspace import register, serializable
__all__ = ['HrHRNetLoss', 'KeyPointMSELoss', 'OKSLoss', 'CenterFocalLoss', 'L1Loss']
@register
@serializable
class KeyPointMSELoss(nn.Layer):
def __init__(self, use_target_weight=True, loss_scale=0.5):
"""
KeyPointMSELoss layer
Args:
use_target_weight (bool): whether to use target weight
"""
super(KeyPointMSELoss, self).__init__()
self.criterion = nn.MSELoss(reduction='mean')
self.use_target_weight = use_target_weight
self.loss_scale = loss_scale
def forward(self, output, records):
target = records['target']
target_weight = records['target_weight']
batch_size = output.shape[0]
num_joints = output.shape[1]
heatmaps_pred = output.reshape(
(batch_size, num_joints, -1)).split(num_joints, 1)
heatmaps_gt = target.reshape(
(batch_size, num_joints, -1)).split(num_joints, 1)
loss = 0
for idx in range(num_joints):
heatmap_pred = heatmaps_pred[idx].squeeze()
heatmap_gt = heatmaps_gt[idx].squeeze()
if self.use_target_weight:
loss += self.loss_scale * self.criterion(
heatmap_pred.multiply(target_weight[:, idx]),
heatmap_gt.multiply(target_weight[:, idx]))
else:
loss += self.loss_scale * self.criterion(heatmap_pred,
heatmap_gt)
keypoint_losses = dict()
keypoint_losses['loss'] = loss / num_joints
return keypoint_losses
@register
@serializable
class HrHRNetLoss(nn.Layer):
def __init__(self, num_joints, swahr):
"""
HrHRNetLoss layer
Args:
num_joints (int): number of keypoints
"""
super(HrHRNetLoss, self).__init__()
if swahr:
self.heatmaploss = HeatMapSWAHRLoss(num_joints)
else:
self.heatmaploss = HeatMapLoss()
self.aeloss = AELoss()
self.ziploss = ZipLoss(
[self.heatmaploss, self.heatmaploss, self.aeloss])
def forward(self, inputs, records):
targets = []
targets.append([records['heatmap_gt1x'], records['mask_1x']])
targets.append([records['heatmap_gt2x'], records['mask_2x']])
targets.append(records['tagmap'])
keypoint_losses = dict()
loss = self.ziploss(inputs, targets)
keypoint_losses['heatmap_loss'] = loss[0] + loss[1]
keypoint_losses['pull_loss'] = loss[2][0]
keypoint_losses['push_loss'] = loss[2][1]
keypoint_losses['loss'] = recursive_sum(loss)
return keypoint_losses
class HeatMapLoss(object):
def __init__(self, loss_factor=1.0):
super(HeatMapLoss, self).__init__()
self.loss_factor = loss_factor
def __call__(self, preds, targets):
heatmap, mask = targets
loss = ((preds - heatmap)**2 * mask.cast('float').unsqueeze(1))
loss = paddle.clip(loss, min=0, max=2).mean()
loss *= self.loss_factor
return loss
class HeatMapSWAHRLoss(object):
def __init__(self, num_joints, loss_factor=1.0):
super(HeatMapSWAHRLoss, self).__init__()
self.loss_factor = loss_factor
self.num_joints = num_joints
def __call__(self, preds, targets):
heatmaps_gt, mask = targets
heatmaps_pred = preds[0]
scalemaps_pred = preds[1]
heatmaps_scaled_gt = paddle.where(heatmaps_gt > 0, 0.5 * heatmaps_gt * (
1 + (1 +
(scalemaps_pred - 1.) * paddle.log(heatmaps_gt + 1e-10))**2),
heatmaps_gt)
regularizer_loss = paddle.mean(
paddle.pow((scalemaps_pred - 1.) * (heatmaps_gt > 0).astype(float),
2))
omiga = 0.01
# thres = 2**(-1/omiga), threshold for positive weight
hm_weight = heatmaps_scaled_gt**(
omiga
) * paddle.abs(1 - heatmaps_pred) + paddle.abs(heatmaps_pred) * (
1 - heatmaps_scaled_gt**(omiga))
loss = (((heatmaps_pred - heatmaps_scaled_gt)**2) *
mask.cast('float').unsqueeze(1)) * hm_weight
loss = loss.mean()
loss = self.loss_factor * (loss + 1.0 * regularizer_loss)
return loss
class AELoss(object):
def __init__(self, pull_factor=0.001, push_factor=0.001):
super(AELoss, self).__init__()
self.pull_factor = pull_factor
self.push_factor = push_factor
def apply_single(self, pred, tagmap):
if tagmap.numpy()[:, :, 3].sum() == 0:
return (paddle.zeros([1]), paddle.zeros([1]))
nonzero = paddle.nonzero(tagmap[:, :, 3] > 0)
if nonzero.shape[0] == 0:
return (paddle.zeros([1]), paddle.zeros([1]))
p_inds = paddle.unique(nonzero[:, 0])
num_person = p_inds.shape[0]
if num_person == 0:
return (paddle.zeros([1]), paddle.zeros([1]))
pull = 0
tagpull_num = 0
embs_all = []
person_unvalid = 0
for person_idx in p_inds.numpy():
valid_single = tagmap[person_idx.item()]
validkpts = paddle.nonzero(valid_single[:, 3] > 0)
valid_single = paddle.index_select(valid_single, validkpts)
emb = paddle.gather_nd(pred, valid_single[:, :3])
if emb.shape[0] == 1:
person_unvalid += 1
mean = paddle.mean(emb, axis=0)
embs_all.append(mean)
pull += paddle.mean(paddle.pow(emb - mean, 2), axis=0)
tagpull_num += emb.shape[0]
pull /= max(num_person - person_unvalid, 1)
if num_person < 2:
return pull, paddle.zeros([1])
embs_all = paddle.stack(embs_all)
A = embs_all.expand([num_person, num_person])
B = A.transpose([1, 0])
diff = A - B
diff = paddle.pow(diff, 2)
push = paddle.exp(-diff)
push = paddle.sum(push) - num_person
push /= 2 * num_person * (num_person - 1)
return pull, push
def __call__(self, preds, tagmaps):
bs = preds.shape[0]
losses = [
self.apply_single(preds[i:i + 1].squeeze(),
tagmaps[i:i + 1].squeeze()) for i in range(bs)
]
pull = self.pull_factor * sum(loss[0] for loss in losses) / len(losses)
push = self.push_factor * sum(loss[1] for loss in losses) / len(losses)
return pull, push
class ZipLoss(object):
def __init__(self, loss_funcs):
super(ZipLoss, self).__init__()
self.loss_funcs = loss_funcs
def __call__(self, inputs, targets):
assert len(self.loss_funcs) == len(targets) >= len(inputs)
def zip_repeat(*args):
longest = max(map(len, args))
filled = [islice(cycle(x), longest) for x in args]
return zip(*filled)
return tuple(
fn(x, y)
for x, y, fn in zip_repeat(inputs, targets, self.loss_funcs))
def recursive_sum(inputs):
if isinstance(inputs, abc.Sequence):
return sum([recursive_sum(x) for x in inputs])
return inputs
def oks_overlaps(kpt_preds, kpt_gts, kpt_valids, kpt_areas, sigmas):
if not kpt_gts.astype('bool').any():
return kpt_preds.sum()*0
sigmas = paddle.to_tensor(sigmas, dtype=kpt_preds.dtype)
variances = (sigmas * 2)**2
assert kpt_preds.shape[0] == kpt_gts.shape[0]
kpt_preds = kpt_preds.reshape((-1, kpt_preds.shape[-1] // 2, 2))
kpt_gts = kpt_gts.reshape((-1, kpt_gts.shape[-1] // 2, 2))
squared_distance = (kpt_preds[:, :, 0] - kpt_gts[:, :, 0]) ** 2 + \
(kpt_preds[:, :, 1] - kpt_gts[:, :, 1]) ** 2
assert (kpt_valids.sum(-1) > 0).all()
squared_distance0 = squared_distance / (
kpt_areas[:, None] * variances[None, :] * 2)
squared_distance1 = paddle.exp(-squared_distance0)
squared_distance1 = squared_distance1 * kpt_valids
oks = squared_distance1.sum(axis=1) / kpt_valids.sum(axis=1)
return oks
def oks_loss(pred,
target,
weight,
valid=None,
area=None,
linear=False,
sigmas=None,
eps=1e-6,
avg_factor=None,
reduction=None):
"""Oks loss.
Computing the oks loss between a set of predicted poses and target poses.
The loss is calculated as negative log of oks.
Args:
pred (Tensor): Predicted poses of format (x1, y1, x2, y2, ...),
shape (n, K*2).
target (Tensor): Corresponding gt poses, shape (n, K*2).
linear (bool, optional): If True, use linear scale of loss instead of
log scale. Default: False.
eps (float): Eps to avoid log(0).
Returns:
Tensor: Loss tensor.
"""
oks = oks_overlaps(pred, target, valid, area, sigmas).clip(min=eps)
if linear:
loss = 1 - oks
else:
loss = -oks.log()
if weight is not None:
if weight.shape != loss.shape:
if weight.shape[0] == loss.shape[0]:
# For most cases, weight is of shape (num_priors, ),
# which means it does not have the second axis num_class
weight = weight.reshape((-1, 1))
else:
# Sometimes, weight per anchor per class is also needed. e.g.
# in FSAF. But it may be flattened of shape
# (num_priors x num_class, ), while loss is still of shape
# (num_priors, num_class).
assert weight.numel() == loss.numel()
weight = weight.reshape((loss.shape[0], -1))
assert weight.ndim == loss.ndim
loss = loss * weight
# if avg_factor is not specified, just reduce the loss
if avg_factor is None:
if reduction == 'mean':
loss = loss.mean()
elif reduction == 'sum':
loss = loss.sum()
else:
# if reduction is mean, then average the loss by avg_factor
if reduction == 'mean':
# Avoid causing ZeroDivisionError when avg_factor is 0.0,
# i.e., all labels of an image belong to ignore index.
eps = 1e-10
loss = loss.sum() / (avg_factor + eps)
# if reduction is 'none', then do nothing, otherwise raise an error
elif reduction != 'none':
raise ValueError('avg_factor can not be used with reduction="sum"')
return loss
@register
@serializable
class OKSLoss(nn.Layer):
"""OKSLoss.
Computing the oks loss between a set of predicted poses and target poses.
Args:
linear (bool): If True, use linear scale of loss instead of log scale.
Default: False.
eps (float): Eps to avoid log(0).
reduction (str): Options are "none", "mean" and "sum".
loss_weight (float): Weight of loss.
"""
def __init__(self,
linear=False,
num_keypoints=17,
eps=1e-6,
reduction='mean',
loss_weight=1.0):
super(OKSLoss, self).__init__()
self.linear = linear
self.eps = eps
self.reduction = reduction
self.loss_weight = loss_weight
if num_keypoints == 17:
self.sigmas = np.array([
.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07,
1.07, .87, .87, .89, .89
], dtype=np.float32) / 10.0
elif num_keypoints == 14:
self.sigmas = np.array([
.79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89,
.79, .79
]) / 10.0
else:
raise ValueError(f'Unsupported keypoints number {num_keypoints}')
def forward(self,
pred,
target,
valid,
area,
weight=None,
avg_factor=None,
reduction_override=None,
**kwargs):
"""Forward function.
Args:
pred (Tensor): The prediction.
target (Tensor): The learning target of the prediction.
valid (Tensor): The visible flag of the target pose.
area (Tensor): The area of the target pose.
weight (Tensor, optional): The weight of loss for each
prediction. Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction_override (str, optional): The reduction method used to
override the original reduction method of the loss.
Defaults to None. Options are "none", "mean" and "sum".
"""
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
if (weight is not None) and (not paddle.any(weight > 0)) and (
reduction != 'none'):
if pred.dim() == weight.dim() + 1:
weight = weight.unsqueeze(1)
return (pred * weight).sum() # 0
if weight is not None and weight.dim() > 1:
# TODO: remove this in the future
# reduce the weight of shape (n, 4) to (n,) to match the
# iou_loss of shape (n,)
assert weight.shape == pred.shape
weight = weight.mean(-1)
loss = self.loss_weight * oks_loss(
pred,
target,
weight,
valid=valid,
area=area,
linear=self.linear,
sigmas=self.sigmas,
eps=self.eps,
reduction=reduction,
avg_factor=avg_factor,
**kwargs)
return loss
def center_focal_loss(pred, gt, weight=None, mask=None, avg_factor=None, reduction=None):
"""Modified focal loss. Exactly the same as CornerNet.
Runs faster and costs a little bit more memory.
Args:
pred (Tensor): The prediction with shape [bs, c, h, w].
gt (Tensor): The learning target of the prediction in gaussian
distribution, with shape [bs, c, h, w].
mask (Tensor): The valid mask. Defaults to None.
"""
if not gt.astype('bool').any():
return pred.sum()*0
pos_inds = gt.equal(1).astype('float32')
if mask is None:
neg_inds = gt.less_than(paddle.to_tensor([1], dtype='float32')).astype('float32')
else:
neg_inds = gt.less_than(paddle.to_tensor([1], dtype='float32')).astype('float32') * mask.equal(0).astype('float32')
neg_weights = paddle.pow(1 - gt, 4)
loss = 0
pos_loss = paddle.log(pred) * paddle.pow(1 - pred, 2) * pos_inds
neg_loss = paddle.log(1 - pred) * paddle.pow(pred, 2) * neg_weights * \
neg_inds
num_pos = pos_inds.astype('float32').sum()
pos_loss = pos_loss.sum()
neg_loss = neg_loss.sum()
if num_pos == 0:
loss = loss - neg_loss
else:
loss = loss - (pos_loss + neg_loss) / num_pos
if weight is not None:
if weight.shape != loss.shape:
if weight.shape[0] == loss.shape[0]:
# For most cases, weight is of shape (num_priors, ),
# which means it does not have the second axis num_class
weight = weight.reshape((-1, 1))
else:
# Sometimes, weight per anchor per class is also needed. e.g.
# in FSAF. But it may be flattened of shape
# (num_priors x num_class, ), while loss is still of shape
# (num_priors, num_class).
assert weight.numel() == loss.numel()
weight = weight.reshape((loss.shape[0], -1))
assert weight.ndim == loss.ndim
loss = loss * weight
# if avg_factor is not specified, just reduce the loss
if avg_factor is None:
if reduction == 'mean':
loss = loss.mean()
elif reduction == 'sum':
loss = loss.sum()
else:
# if reduction is mean, then average the loss by avg_factor
if reduction == 'mean':
# Avoid causing ZeroDivisionError when avg_factor is 0.0,
# i.e., all labels of an image belong to ignore index.
eps = 1e-10
loss = loss.sum() / (avg_factor + eps)
# if reduction is 'none', then do nothing, otherwise raise an error
elif reduction != 'none':
raise ValueError('avg_factor can not be used with reduction="sum"')
return loss
@register
@serializable
class CenterFocalLoss(nn.Layer):
"""CenterFocalLoss is a variant of focal loss.
More details can be found in the `paper
<https://arxiv.org/abs/1808.01244>`_
Args:
reduction (str): Options are "none", "mean" and "sum".
loss_weight (float): Loss weight of current loss.
"""
def __init__(self,
reduction='none',
loss_weight=1.0):
super(CenterFocalLoss, self).__init__()
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred,
target,
weight=None,
mask=None,
avg_factor=None,
reduction_override=None):
"""Forward function.
Args:
pred (Tensor): The prediction.
target (Tensor): The learning target of the prediction in gaussian
distribution.
weight (Tensor, optional): The weight of loss for each
prediction. Defaults to None.
mask (Tensor): The valid mask. Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction_override (str, optional): The reduction method used to
override the original reduction method of the loss.
Defaults to None.
"""
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
loss_reg = self.loss_weight * center_focal_loss(
pred,
target,
weight,
mask=mask,
reduction=reduction,
avg_factor=avg_factor)
return loss_reg
def l1_loss(pred, target, weight=None, reduction='mean', avg_factor=None):
"""L1 loss.
Args:
pred (Tensor): The prediction.
target (Tensor): The learning target of the prediction.
Returns:
Tensor: Calculated loss
"""
if not target.astype('bool').any():
return pred.sum() * 0
assert pred.shape == target.shape
loss = paddle.abs(pred - target)
if weight is not None:
if weight.shape != loss.shape:
if weight.shape[0] == loss.shape[0]:
# For most cases, weight is of shape (num_priors, ),
# which means it does not have the second axis num_class
weight = weight.reshape((-1, 1))
else:
# Sometimes, weight per anchor per class is also needed. e.g.
# in FSAF. But it may be flattened of shape
# (num_priors x num_class, ), while loss is still of shape
# (num_priors, num_class).
assert weight.numel() == loss.numel()
weight = weight.reshape((loss.shape[0], -1))
assert weight.ndim == loss.ndim
loss = loss * weight
# if avg_factor is not specified, just reduce the loss
if avg_factor is None:
if reduction == 'mean':
loss = loss.mean()
elif reduction == 'sum':
loss = loss.sum()
else:
# if reduction is mean, then average the loss by avg_factor
if reduction == 'mean':
# Avoid causing ZeroDivisionError when avg_factor is 0.0,
# i.e., all labels of an image belong to ignore index.
eps = 1e-10
loss = loss.sum() / (avg_factor + eps)
# if reduction is 'none', then do nothing, otherwise raise an error
elif reduction != 'none':
raise ValueError('avg_factor can not be used with reduction="sum"')
return loss
@register
@serializable
class L1Loss(nn.Layer):
"""L1 loss.
Args:
reduction (str, optional): The method to reduce the loss.
Options are "none", "mean" and "sum".
loss_weight (float, optional): The weight of loss.
"""
def __init__(self, reduction='mean', loss_weight=1.0):
super(L1Loss, self).__init__()
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self,
pred,
target,
weight=None,
avg_factor=None,
reduction_override=None):
"""Forward function.
Args:
pred (Tensor): The prediction.
target (Tensor): The learning target of the prediction.
weight (Tensor, optional): The weight of loss for each
prediction. Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction_override (str, optional): The reduction method used to
override the original reduction method of the loss.
Defaults to None.
"""
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (
reduction_override if reduction_override else self.reduction)
loss_bbox = self.loss_weight * l1_loss(
pred, target, weight, reduction=reduction, avg_factor=avg_factor)
return loss_bbox

View File

@@ -0,0 +1,250 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from itertools import cycle, islice
from collections import abc
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register, serializable
from ppdet.utils.logger import setup_logger
logger = setup_logger('ppdet.engine')
__all__ = ['Pose3DLoss']
@register
@serializable
class Pose3DLoss(nn.Layer):
def __init__(self, weight_3d=1.0, weight_2d=0.0, reduction='none'):
"""
KeyPointMSELoss layer
Args:
weight_3d (float): weight of 3d loss
weight_2d (float): weight of 2d loss
reduction (bool): whether use reduction to loss
"""
super(Pose3DLoss, self).__init__()
self.weight_3d = weight_3d
self.weight_2d = weight_2d
self.criterion_2dpose = nn.MSELoss(reduction=reduction)
self.criterion_3dpose = nn.L1Loss(reduction=reduction)
self.criterion_smoothl1 = nn.SmoothL1Loss(
reduction=reduction, delta=1.0)
self.criterion_vertices = nn.L1Loss()
def forward(self, pred3d, pred2d, inputs):
"""
mpjpe: mpjpe loss between 3d joints
keypoint_2d_loss: 2d joints loss compute by criterion_2dpose
"""
gt_3d_joints = inputs['joints_3d']
gt_2d_joints = inputs['joints_2d']
has_3d_joints = inputs['has_3d_joints']
has_2d_joints = inputs['has_2d_joints']
loss_3d = mpjpe_focal(pred3d, gt_3d_joints, has_3d_joints)
loss = self.weight_3d * loss_3d
epoch = inputs['epoch_id']
if self.weight_2d > 0:
weight = self.weight_2d * pow(0.1, (epoch // 8))
if epoch > 8:
weight = 0
loss_2d = keypoint_2d_loss(self.criterion_2dpose, pred2d,
gt_2d_joints, has_2d_joints)
loss += weight * loss_2d
return loss
def filter_3d_joints(pred, gt, has_3d_joints):
"""
filter 3d joints
"""
gt = gt[has_3d_joints == 1]
gt = gt[:, :, :3]
pred = pred[has_3d_joints == 1]
gt_pelvis = (gt[:, 2, :] + gt[:, 3, :]) / 2
gt = gt - gt_pelvis[:, None, :]
pred_pelvis = (pred[:, 2, :] + pred[:, 3, :]) / 2
pred = pred - pred_pelvis[:, None, :]
return pred, gt
def mpjpe(pred, gt, has_3d_joints):
"""
mPJPE loss
"""
pred, gt = filter_3d_joints(pred, gt, has_3d_joints)
error = paddle.sqrt((paddle.minimum((pred - gt), paddle.to_tensor(1.2))**2
).sum(axis=-1)).mean()
return error
def mpjpe_focal(pred, gt, has_3d_joints):
"""
mPJPE loss
"""
pred, gt = filter_3d_joints(pred, gt, has_3d_joints)
mse_error = ((pred - gt)**2).sum(axis=-1)
mpjpe_error = paddle.sqrt(mse_error)
mean = mpjpe_error.mean()
std = mpjpe_error.std()
atte = 2 * F.sigmoid(6 * (mpjpe_error - mean) / std)
mse_error *= atte
return mse_error.mean()
def mpjpe_mse(pred, gt, has_3d_joints, weight=1.):
"""
mPJPE loss
"""
pred, gt = filter_3d_joints(pred, gt, has_3d_joints)
error = (((pred - gt)**2).sum(axis=-1)).mean()
return error
def mpjpe_criterion(pred, gt, has_3d_joints, criterion_pose3d):
"""
mPJPE loss of self define criterion
"""
pred, gt = filter_3d_joints(pred, gt, has_3d_joints)
error = paddle.sqrt(criterion_pose3d(pred, gt)).mean()
return error
@register
@serializable
def weighted_mpjpe(pred, gt, has_3d_joints):
"""
Weighted_mPJPE
"""
pred, gt = filter_3d_joints(pred, gt, has_3d_joints)
weight = paddle.linalg.norm(pred, p=2, axis=-1)
weight = paddle.to_tensor(
[1.5, 1.3, 1.2, 1.2, 1.3, 1.5, 1.5, 1.3, 1.2, 1.2, 1.3, 1.5, 1., 1.])
error = (weight * paddle.linalg.norm(pred - gt, p=2, axis=-1)).mean()
return error
@register
@serializable
def normed_mpjpe(pred, gt, has_3d_joints):
"""
Normalized MPJPE (scale only), adapted from:
https://github.com/hrhodin/UnsupervisedGeometryAwareRepresentationLearning/blob/master/losses/poses.py
"""
assert pred.shape == gt.shape
pred, gt = filter_3d_joints(pred, gt, has_3d_joints)
norm_predicted = paddle.mean(
paddle.sum(pred**2, axis=3, keepdim=True), axis=2, keepdim=True)
norm_target = paddle.mean(
paddle.sum(gt * pred, axis=3, keepdim=True), axis=2, keepdim=True)
scale = norm_target / norm_predicted
return mpjpe(scale * pred, gt)
@register
@serializable
def mpjpe_np(pred, gt, has_3d_joints):
"""
mPJPE_NP
"""
pred, gt = filter_3d_joints(pred, gt, has_3d_joints)
error = np.sqrt(((pred - gt)**2).sum(axis=-1)).mean()
return error
@register
@serializable
def mean_per_vertex_error(pred, gt, has_smpl):
"""
Compute mPVE
"""
pred = pred[has_smpl == 1]
gt = gt[has_smpl == 1]
with paddle.no_grad():
error = paddle.sqrt(((pred - gt)**2).sum(axis=-1)).mean()
return error
@register
@serializable
def keypoint_2d_loss(criterion_keypoints, pred_keypoints_2d, gt_keypoints_2d,
has_pose_2d):
"""
Compute 2D reprojection loss if 2D keypoint annotations are available.
The confidence (conf) is binary and indicates whether the keypoints exist or not.
"""
conf = gt_keypoints_2d[:, :, -1].unsqueeze(-1).clone()
loss = (conf * criterion_keypoints(
pred_keypoints_2d, gt_keypoints_2d[:, :, :-1] * 0.001)).mean()
return loss
@register
@serializable
def keypoint_3d_loss(criterion_keypoints, pred_keypoints_3d, gt_keypoints_3d,
has_pose_3d):
"""
Compute 3D keypoint loss if 3D keypoint annotations are available.
"""
conf = gt_keypoints_3d[:, :, -1].unsqueeze(-1).clone()
gt_keypoints_3d = gt_keypoints_3d[:, :, :-1].clone()
gt_keypoints_3d = gt_keypoints_3d[has_pose_3d == 1]
conf = conf[has_pose_3d == 1]
pred_keypoints_3d = pred_keypoints_3d[has_pose_3d == 1]
if len(gt_keypoints_3d) > 0:
gt_pelvis = (gt_keypoints_3d[:, 2, :] + gt_keypoints_3d[:, 3, :]) / 2
gt_keypoints_3d = gt_keypoints_3d - gt_pelvis[:, None, :]
pred_pelvis = (
pred_keypoints_3d[:, 2, :] + pred_keypoints_3d[:, 3, :]) / 2
pred_keypoints_3d = pred_keypoints_3d - pred_pelvis[:, None, :]
return (conf * criterion_keypoints(pred_keypoints_3d,
gt_keypoints_3d)).mean()
else:
return paddle.to_tensor([1.]).fill_(0.)
@register
@serializable
def vertices_loss(criterion_vertices, pred_vertices, gt_vertices, has_smpl):
"""
Compute per-vertex loss if vertex annotations are available.
"""
pred_vertices_with_shape = pred_vertices[has_smpl == 1]
gt_vertices_with_shape = gt_vertices[has_smpl == 1]
if len(gt_vertices_with_shape) > 0:
return criterion_vertices(pred_vertices_with_shape,
gt_vertices_with_shape)
else:
return paddle.to_tensor([1.]).fill_(0.)
@register
@serializable
def rectify_pose(pose):
pose = pose.copy()
R_mod = cv2.Rodrigues(np.array([np.pi, 0, 0]))[0]
R_root = cv2.Rodrigues(pose[:3])[0]
new_root = R_root.dot(R_mod)
pose[:3] = cv2.Rodrigues(new_root)[0].reshape(3)
return pose

View File

@@ -0,0 +1,104 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
import paddle.nn.functional as F
from ppdet.core.workspace import register, serializable
__all__ = ['ProbIoULoss']
def gbb_form(boxes):
xy, wh, angle = paddle.split(boxes, [2, 2, 1], axis=-1)
return paddle.concat([xy, wh.pow(2) / 12., angle], axis=-1)
def rotated_form(a_, b_, angles):
cos_a = paddle.cos(angles)
sin_a = paddle.sin(angles)
a = a_ * paddle.pow(cos_a, 2) + b_ * paddle.pow(sin_a, 2)
b = a_ * paddle.pow(sin_a, 2) + b_ * paddle.pow(cos_a, 2)
c = (a_ - b_) * cos_a * sin_a
return a, b, c
def probiou_loss(pred, target, eps=1e-3, mode='l1'):
"""
pred -> a matrix [N,5](x,y,w,h,angle - in radians) containing ours predicted box ;in case of HBB angle == 0
target -> a matrix [N,5](x,y,w,h,angle - in radians) containing ours target box ;in case of HBB angle == 0
eps -> threshold to avoid infinite values
mode -> ('l1' in [0,1] or 'l2' in [0,inf]) metrics according our paper
"""
gbboxes1 = gbb_form(pred)
gbboxes2 = gbb_form(target)
x1, y1, a1_, b1_, c1_ = gbboxes1[:,
0], gbboxes1[:,
1], gbboxes1[:,
2], gbboxes1[:,
3], gbboxes1[:,
4]
x2, y2, a2_, b2_, c2_ = gbboxes2[:,
0], gbboxes2[:,
1], gbboxes2[:,
2], gbboxes2[:,
3], gbboxes2[:,
4]
a1, b1, c1 = rotated_form(a1_, b1_, c1_)
a2, b2, c2 = rotated_form(a2_, b2_, c2_)
t1 = 0.25 * ((a1 + a2) * (paddle.pow(y1 - y2, 2)) + (b1 + b2) * (paddle.pow(x1 - x2, 2))) + \
0.5 * ((c1+c2)*(x2-x1)*(y1-y2))
t2 = (a1 + a2) * (b1 + b2) - paddle.pow(c1 + c2, 2)
t3_ = (a1 * b1 - c1 * c1) * (a2 * b2 - c2 * c2)
t3 = 0.5 * paddle.log(t2 / (4 * paddle.sqrt(F.relu(t3_)) + eps))
B_d = (t1 / t2) + t3
# B_d = t1 + t2 + t3
B_d = paddle.clip(B_d, min=eps, max=100.0)
l1 = paddle.sqrt(1.0 - paddle.exp(-B_d) + eps)
l_i = paddle.pow(l1, 2.0)
l2 = -paddle.log(1.0 - l_i + eps)
if mode == 'l1':
probiou = l1
if mode == 'l2':
probiou = l2
return probiou
@serializable
@register
class ProbIoULoss(object):
""" ProbIoU Loss, refer to https://arxiv.org/abs/2106.06072 for details """
def __init__(self, mode='l1', eps=1e-3):
super(ProbIoULoss, self).__init__()
self.mode = mode
self.eps = eps
def __call__(self, pred_rboxes, assigned_rboxes):
return probiou_loss(pred_rboxes, assigned_rboxes, self.eps, self.mode)

View File

@@ -0,0 +1,175 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn.functional as F
from ppdet.core.workspace import register
from ppdet.modeling.losses.iou_loss import GIoULoss
from .sparsercnn_loss import HungarianMatcher
__all__ = ['QueryInstLoss']
@register
class QueryInstLoss(object):
__shared__ = ['num_classes']
def __init__(self,
num_classes=80,
focal_loss_alpha=0.25,
focal_loss_gamma=2.0,
class_weight=2.0,
l1_weight=5.0,
giou_weight=2.0,
mask_weight=8.0):
super(QueryInstLoss, self).__init__()
self.num_classes = num_classes
self.focal_loss_alpha = focal_loss_alpha
self.focal_loss_gamma = focal_loss_gamma
self.loss_weights = {
"loss_cls": class_weight,
"loss_bbox": l1_weight,
"loss_giou": giou_weight,
"loss_mask": mask_weight
}
self.giou_loss = GIoULoss(eps=1e-6, reduction='sum')
self.matcher = HungarianMatcher(focal_loss_alpha, focal_loss_gamma,
class_weight, l1_weight, giou_weight)
def loss_classes(self, class_logits, targets, indices, avg_factor):
tgt_labels = paddle.full(
class_logits.shape[:2], self.num_classes, dtype='int32')
if sum(len(v['labels']) for v in targets) > 0:
tgt_classes = paddle.concat([
paddle.gather(
tgt['labels'], tgt_idx, axis=0)
for tgt, (_, tgt_idx) in zip(targets, indices)
])
batch_idx, src_idx = self._get_src_permutation_idx(indices)
for i, (batch_i, src_i) in enumerate(zip(batch_idx, src_idx)):
tgt_labels[int(batch_i), int(src_i)] = tgt_classes[i]
tgt_labels = tgt_labels.flatten(0, 1).unsqueeze(-1)
tgt_labels_onehot = paddle.cast(
tgt_labels == paddle.arange(0, self.num_classes), dtype='float32')
tgt_labels_onehot.stop_gradient = True
src_logits = class_logits.flatten(0, 1)
loss_cls = F.sigmoid_focal_loss(
src_logits,
tgt_labels_onehot,
alpha=self.focal_loss_alpha,
gamma=self.focal_loss_gamma,
reduction='sum') / avg_factor
losses = {'loss_cls': loss_cls * self.loss_weights['loss_cls']}
return losses
def loss_bboxes(self, bbox_pred, targets, indices, avg_factor):
bboxes = paddle.concat([
paddle.gather(
src, src_idx, axis=0)
for src, (src_idx, _) in zip(bbox_pred, indices)
])
tgt_bboxes = paddle.concat([
paddle.gather(
tgt['boxes'], tgt_idx, axis=0)
for tgt, (_, tgt_idx) in zip(targets, indices)
])
tgt_bboxes.stop_gradient = True
im_shapes = paddle.concat([tgt['img_whwh_tgt'] for tgt in targets])
bboxes_norm = bboxes / im_shapes
tgt_bboxes_norm = tgt_bboxes / im_shapes
loss_giou = self.giou_loss(bboxes, tgt_bboxes) / avg_factor
loss_bbox = F.l1_loss(
bboxes_norm, tgt_bboxes_norm, reduction='sum') / avg_factor
losses = {
'loss_bbox': loss_bbox * self.loss_weights['loss_bbox'],
'loss_giou': loss_giou * self.loss_weights['loss_giou']
}
return losses
def loss_masks(self, pos_bbox_pred, mask_logits, targets, indices,
avg_factor):
tgt_segm = [
paddle.gather(
tgt['gt_segm'], tgt_idx, axis=0)
for tgt, (_, tgt_idx) in zip(targets, indices)
]
tgt_masks = []
for i in range(len(indices)):
gt_segm = tgt_segm[i].unsqueeze(1)
if len(gt_segm) == 0:
continue
boxes = pos_bbox_pred[i]
boxes[:, 0::2] = paddle.clip(
boxes[:, 0::2], min=0, max=gt_segm.shape[3])
boxes[:, 1::2] = paddle.clip(
boxes[:, 1::2], min=0, max=gt_segm.shape[2])
boxes_num = paddle.to_tensor([1] * len(boxes), dtype='int32')
gt_mask = paddle.vision.ops.roi_align(
gt_segm,
boxes,
boxes_num,
output_size=mask_logits.shape[-2:],
aligned=True)
tgt_masks.append(gt_mask)
tgt_masks = paddle.concat(tgt_masks).squeeze(1)
tgt_masks = paddle.cast(tgt_masks >= 0.5, dtype='float32')
tgt_masks.stop_gradient = True
tgt_labels = paddle.concat([
paddle.gather(
tgt['labels'], tgt_idx, axis=0)
for tgt, (_, tgt_idx) in zip(targets, indices)
])
mask_label = F.one_hot(tgt_labels, self.num_classes).unsqueeze([2, 3])
mask_label = paddle.expand_as(mask_label, mask_logits)
mask_label.stop_gradient = True
src_masks = paddle.gather_nd(mask_logits, paddle.nonzero(mask_label))
shape = mask_logits.shape
src_masks = paddle.reshape(src_masks, [shape[0], shape[2], shape[3]])
src_masks = F.sigmoid(src_masks)
X = src_masks.flatten(1)
Y = tgt_masks.flatten(1)
inter = paddle.sum(X * Y, 1)
union = paddle.sum(X * X, 1) + paddle.sum(Y * Y, 1)
dice = (2 * inter) / (union + 2e-5)
loss_mask = (1 - dice).sum() / avg_factor
losses = {'loss_mask': loss_mask * self.loss_weights['loss_mask']}
return losses
@staticmethod
def _get_src_permutation_idx(indices):
batch_idx = paddle.concat(
[paddle.full_like(src, i) for i, (src, _) in enumerate(indices)])
src_idx = paddle.concat([src for (src, _) in indices])
return batch_idx, src_idx

View File

@@ -0,0 +1,60 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register
__all__ = ['SmoothL1Loss']
@register
class SmoothL1Loss(nn.Layer):
"""Smooth L1 Loss.
Args:
beta (float): controls smooth region, it becomes L1 Loss when beta=0.0
loss_weight (float): the final loss will be multiplied by this
"""
def __init__(self,
beta=1.0,
loss_weight=1.0):
super(SmoothL1Loss, self).__init__()
assert beta >= 0
self.beta = beta
self.loss_weight = loss_weight
def forward(self, pred, target, reduction='none'):
"""forward function, based on fvcore.
Args:
pred (Tensor): prediction tensor
target (Tensor): target tensor, pred.shape must be the same as target.shape
reduction (str): the way to reduce loss, one of (none, sum, mean)
"""
assert reduction in ('none', 'sum', 'mean')
target = target.detach()
if self.beta < 1e-5:
loss = paddle.abs(pred - target)
else:
n = paddle.abs(pred - target)
cond = n < self.beta
loss = paddle.where(cond, 0.5 * n ** 2 / self.beta, n - 0.5 * self.beta)
if reduction == 'mean':
loss = loss.mean() if loss.size > 0 else 0.0 * loss.sum()
elif reduction == 'sum':
loss = loss.sum()
return loss * self.loss_weight

View File

@@ -0,0 +1,101 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn.functional as F
from ppdet.core.workspace import register, serializable
__all__ = ['SOLOv2Loss']
@register
@serializable
class SOLOv2Loss(object):
"""
SOLOv2Loss
Args:
ins_loss_weight (float): Weight of instance loss.
focal_loss_gamma (float): Gamma parameter for focal loss.
focal_loss_alpha (float): Alpha parameter for focal loss.
"""
def __init__(self,
ins_loss_weight=3.0,
focal_loss_gamma=2.0,
focal_loss_alpha=0.25):
self.ins_loss_weight = ins_loss_weight
self.focal_loss_gamma = focal_loss_gamma
self.focal_loss_alpha = focal_loss_alpha
def _dice_loss(self, input, target):
input = paddle.reshape(input, shape=(paddle.shape(input)[0], -1))
target = paddle.reshape(target, shape=(paddle.shape(target)[0], -1))
a = paddle.sum(input * target, axis=1)
b = paddle.sum(input * input, axis=1) + 0.001
c = paddle.sum(target * target, axis=1) + 0.001
d = (2 * a) / (b + c)
return 1 - d
def __call__(self, ins_pred_list, ins_label_list, cate_preds, cate_labels,
num_ins):
"""
Get loss of network of SOLOv2.
Args:
ins_pred_list (list): Variable list of instance branch output.
ins_label_list (list): List of instance labels pre batch.
cate_preds (list): Concat Variable list of categroy branch output.
cate_labels (list): Concat list of categroy labels pre batch.
num_ins (int): Number of positive samples in a mini-batch.
Returns:
loss_ins (Variable): The instance loss Variable of SOLOv2 network.
loss_cate (Variable): The category loss Variable of SOLOv2 network.
"""
#1. Ues dice_loss to calculate instance loss
loss_ins = []
total_weights = paddle.zeros(shape=[1], dtype='float32')
for input, target in zip(ins_pred_list, ins_label_list):
if input is None:
continue
target = paddle.cast(target, 'float32')
target = paddle.reshape(
target,
shape=[-1, paddle.shape(input)[-2], paddle.shape(input)[-1]])
weights = paddle.cast(
paddle.sum(target, axis=[1, 2]) > 0, 'float32')
input = F.sigmoid(input)
dice_out = paddle.multiply(self._dice_loss(input, target), weights)
total_weights += paddle.sum(weights)
loss_ins.append(dice_out)
loss_ins = paddle.sum(paddle.concat(loss_ins)) / total_weights
loss_ins = loss_ins * self.ins_loss_weight
#2. Ues sigmoid_focal_loss to calculate category loss
# expand onehot labels
num_classes = cate_preds.shape[-1]
cate_labels_bin = F.one_hot(cate_labels, num_classes=num_classes + 1)
cate_labels_bin = cate_labels_bin[:, 1:]
loss_cate = F.sigmoid_focal_loss(
cate_preds,
label=cate_labels_bin,
normalizer=num_ins + 1.,
gamma=self.focal_loss_gamma,
alpha=self.focal_loss_alpha)
return loss_ins, loss_cate

View File

@@ -0,0 +1,430 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/PeizeSun/SparseR-CNN/blob/main/projects/SparseRCNN/sparsercnn/loss.py
Ths copyright of PeizeSun/SparseR-CNN is as follows:
MIT License [see LICENSE for details]
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from scipy.optimize import linear_sum_assignment
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.metric import accuracy
from ppdet.core.workspace import register
from ppdet.modeling.losses.iou_loss import GIoULoss
__all__ = ["SparseRCNNLoss"]
@register
class SparseRCNNLoss(nn.Layer):
""" This class computes the loss for SparseRCNN.
The process happens in two steps:
1) we compute hungarian assignment between ground truth boxes and the outputs of the model
2) we supervise each pair of matched ground-truth / prediction (supervise class and box)
"""
__shared__ = ['num_classes']
def __init__(self,
losses,
focal_loss_alpha,
focal_loss_gamma,
num_classes=80,
class_weight=2.,
l1_weight=5.,
giou_weight=2.):
""" Create the criterion.
Parameters:
num_classes: number of object categories, omitting the special no-object category
weight_dict: dict containing as key the names of the losses and as values their relative weight.
losses: list of all the losses to be applied. See get_loss for list of available losses.
matcher: module able to compute a matching between targets and proposals
"""
super().__init__()
self.num_classes = num_classes
weight_dict = {
"loss_ce": class_weight,
"loss_bbox": l1_weight,
"loss_giou": giou_weight
}
self.weight_dict = weight_dict
self.losses = losses
self.giou_loss = GIoULoss(reduction="sum")
self.focal_loss_alpha = focal_loss_alpha
self.focal_loss_gamma = focal_loss_gamma
self.matcher = HungarianMatcher(focal_loss_alpha, focal_loss_gamma,
class_weight, l1_weight, giou_weight)
def loss_labels(self, outputs, targets, indices, num_boxes, log=True):
"""Classification loss (NLL)
targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes]
"""
assert 'pred_logits' in outputs
src_logits = outputs['pred_logits']
idx = self._get_src_permutation_idx(indices)
target_classes_o = paddle.concat([
paddle.gather(
t["labels"], J, axis=0) for t, (_, J) in zip(targets, indices)
])
target_classes = paddle.full(
src_logits.shape[:2], self.num_classes, dtype="int32")
for i, ind in enumerate(zip(idx[0], idx[1])):
target_classes[int(ind[0]), int(ind[1])] = target_classes_o[i]
target_classes.stop_gradient = True
src_logits = src_logits.flatten(start_axis=0, stop_axis=1)
# prepare one_hot target.
target_classes = target_classes.flatten(start_axis=0, stop_axis=1)
class_ids = paddle.arange(0, self.num_classes)
labels = (target_classes.unsqueeze(-1) == class_ids).astype("float32")
labels.stop_gradient = True
# comp focal loss.
class_loss = sigmoid_focal_loss(
src_logits,
labels,
alpha=self.focal_loss_alpha,
gamma=self.focal_loss_gamma,
reduction="sum", ) / num_boxes
losses = {'loss_ce': class_loss}
if log:
label_acc = target_classes_o.unsqueeze(-1)
src_idx = [src for (src, _) in indices]
pred_list = []
for i in range(outputs["pred_logits"].shape[0]):
pred_list.append(
paddle.gather(
outputs["pred_logits"][i], src_idx[i], axis=0))
pred = F.sigmoid(paddle.concat(pred_list, axis=0))
acc = accuracy(pred, label_acc.astype("int64"))
losses["acc"] = acc
return losses
def loss_boxes(self, outputs, targets, indices, num_boxes):
"""Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss
targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]
The target boxes are expected in format (center_x, center_y, w, h), normalized by the image size.
"""
assert 'pred_boxes' in outputs # [batch_size, num_proposals, 4]
src_idx = [src for (src, _) in indices]
src_boxes_list = []
for i in range(outputs["pred_boxes"].shape[0]):
src_boxes_list.append(
paddle.gather(
outputs["pred_boxes"][i], src_idx[i], axis=0))
src_boxes = paddle.concat(src_boxes_list, axis=0)
target_boxes = paddle.concat(
[
paddle.gather(
t['boxes'], I, axis=0)
for t, (_, I) in zip(targets, indices)
],
axis=0)
target_boxes.stop_gradient = True
losses = {}
losses['loss_giou'] = self.giou_loss(src_boxes,
target_boxes) / num_boxes
image_size = paddle.concat([v["img_whwh_tgt"] for v in targets])
src_boxes_ = src_boxes / image_size
target_boxes_ = target_boxes / image_size
loss_bbox = F.l1_loss(src_boxes_, target_boxes_, reduction='sum')
losses['loss_bbox'] = loss_bbox / num_boxes
return losses
def _get_src_permutation_idx(self, indices):
# permute predictions following indices
batch_idx = paddle.concat(
[paddle.full_like(src, i) for i, (src, _) in enumerate(indices)])
src_idx = paddle.concat([src for (src, _) in indices])
return batch_idx, src_idx
def _get_tgt_permutation_idx(self, indices):
# permute targets following indices
batch_idx = paddle.concat(
[paddle.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
tgt_idx = paddle.concat([tgt for (_, tgt) in indices])
return batch_idx, tgt_idx
def get_loss(self, loss, outputs, targets, indices, num_boxes, **kwargs):
loss_map = {
'labels': self.loss_labels,
'boxes': self.loss_boxes,
}
assert loss in loss_map, f'do you really want to compute {loss} loss?'
return loss_map[loss](outputs, targets, indices, num_boxes, **kwargs)
def forward(self, outputs, targets):
""" This performs the loss computation.
Parameters:
outputs: dict of tensors, see the output specification of the model for the format
targets: list of dicts, such that len(targets) == batch_size.
The expected keys in each dict depends on the losses applied, see each loss' doc
"""
outputs_without_aux = {
k: v
for k, v in outputs.items() if k != 'aux_outputs'
}
# Retrieve the matching between the outputs of the last layer and the targets
indices = self.matcher(outputs_without_aux, targets)
# Compute the average number of target boxes across all nodes, for normalization purposes
num_boxes = sum(len(t["labels"]) for t in targets)
num_boxes = paddle.to_tensor(
[num_boxes],
dtype="float32",
place=next(iter(outputs.values())).place)
# Compute all the requested losses
losses = {}
for loss in self.losses:
losses.update(
self.get_loss(loss, outputs, targets, indices, num_boxes))
# In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
if 'aux_outputs' in outputs:
for i, aux_outputs in enumerate(outputs['aux_outputs']):
indices = self.matcher(aux_outputs, targets)
for loss in self.losses:
kwargs = {}
if loss == 'labels':
# Logging is enabled only for the last layer
kwargs = {'log': False}
l_dict = self.get_loss(loss, aux_outputs, targets, indices,
num_boxes, **kwargs)
w_dict = {}
for k in l_dict.keys():
if k in self.weight_dict:
w_dict[k + f'_{i}'] = l_dict[k] * self.weight_dict[
k]
else:
w_dict[k + f'_{i}'] = l_dict[k]
losses.update(w_dict)
return losses
class HungarianMatcher(nn.Layer):
"""This class computes an assignment between the targets and the predictions of the network
For efficiency reasons, the targets don't include the no_object. Because of this, in general,
there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions,
while the others are un-matched (and thus treated as non-objects).
"""
def __init__(self,
focal_loss_alpha,
focal_loss_gamma,
cost_class: float=1,
cost_bbox: float=1,
cost_giou: float=1):
"""Creates the matcher
Params:
cost_class: This is the relative weight of the classification error in the matching cost
cost_bbox: This is the relative weight of the L1 error of the bounding box coordinates in the matching cost
cost_giou: This is the relative weight of the giou loss of the bounding box in the matching cost
"""
super().__init__()
self.cost_class = cost_class
self.cost_bbox = cost_bbox
self.cost_giou = cost_giou
self.focal_loss_alpha = focal_loss_alpha
self.focal_loss_gamma = focal_loss_gamma
assert cost_class != 0 or cost_bbox != 0 or cost_giou != 0, "all costs cant be 0"
@paddle.no_grad()
def forward(self, outputs, targets):
""" Performs the matching
Args:
outputs: This is a dict that contains at least these entries:
"pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
"pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates
eg. outputs = {"pred_logits": pred_logits, "pred_boxes": pred_boxes}
targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
"labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
objects in the target) containing the class labels
"boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates
eg. targets = [{"labels":labels, "boxes": boxes}, ...,{"labels":labels, "boxes": boxes}]
Returns:
A list of size batch_size, containing tuples of (index_i, index_j) where:
- index_i is the indices of the selected predictions (in order)
- index_j is the indices of the corresponding selected targets (in order)
For each batch element, it holds:
len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
"""
bs, num_queries = outputs["pred_logits"].shape[:2]
if sum(len(v["labels"]) for v in targets) == 0:
return [(paddle.to_tensor(
[], dtype=paddle.int64), paddle.to_tensor(
[], dtype=paddle.int64)) for _ in range(bs)]
# We flatten to compute the cost matrices in a batch
out_prob = F.sigmoid(outputs["pred_logits"].flatten(
start_axis=0, stop_axis=1))
out_bbox = outputs["pred_boxes"].flatten(start_axis=0, stop_axis=1)
# Also concat the target labels and boxes
tgt_ids = paddle.concat([v["labels"] for v in targets])
assert (tgt_ids > -1).all()
tgt_bbox = paddle.concat([v["boxes"] for v in targets])
# Compute the classification cost. Contrary to the loss, we don't use the NLL,
# but approximate it in 1 - proba[target class].
# The 1 is a constant that doesn't change the matching, it can be ommitted.
# Compute the classification cost.
alpha = self.focal_loss_alpha
gamma = self.focal_loss_gamma
neg_cost_class = (1 - alpha) * (out_prob**gamma) * (-(
1 - out_prob + 1e-8).log())
pos_cost_class = alpha * ((1 - out_prob)
**gamma) * (-(out_prob + 1e-8).log())
cost_class = paddle.gather(
pos_cost_class, tgt_ids, axis=1) - paddle.gather(
neg_cost_class, tgt_ids, axis=1)
# Compute the L1 cost between boxes
image_size_out = paddle.concat(
[v["img_whwh"].unsqueeze(0) for v in targets])
image_size_out = image_size_out.unsqueeze(1).tile(
[1, num_queries, 1]).flatten(
start_axis=0, stop_axis=1)
image_size_tgt = paddle.concat([v["img_whwh_tgt"] for v in targets])
out_bbox_ = out_bbox / image_size_out
tgt_bbox_ = tgt_bbox / image_size_tgt
cost_bbox = F.l1_loss(
out_bbox_.unsqueeze(-2), tgt_bbox_,
reduction='none').sum(-1) # [batch_size * num_queries, num_tgts]
# Compute the giou cost betwen boxes
cost_giou = -get_bboxes_giou(out_bbox, tgt_bbox)
# Final cost matrix
C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou
C = C.reshape([bs, num_queries, -1])
sizes = [len(v["boxes"]) for v in targets]
indices = [
linear_sum_assignment(c[i].numpy())
for i, c in enumerate(C.split(sizes, -1))
]
return [(paddle.to_tensor(
i, dtype="int32"), paddle.to_tensor(
j, dtype="int32")) for i, j in indices]
def box_area(boxes):
assert (boxes[:, 2:] >= boxes[:, :2]).all()
wh = boxes[:, 2:] - boxes[:, :2]
return wh[:, 0] * wh[:, 1]
def boxes_iou(boxes1, boxes2):
'''
Compute iou
Args:
boxes1 (paddle.tensor) shape (N, 4)
boxes2 (paddle.tensor) shape (M, 4)
Return:
(paddle.tensor) shape (N, M)
'''
area1 = box_area(boxes1)
area2 = box_area(boxes2)
lt = paddle.maximum(boxes1.unsqueeze(-2)[:, :, :2], boxes2[:, :2])
rb = paddle.minimum(boxes1.unsqueeze(-2)[:, :, 2:], boxes2[:, 2:])
wh = (rb - lt).astype("float32").clip(min=1e-9)
inter = wh[:, :, 0] * wh[:, :, 1]
union = area1.unsqueeze(-1) + area2 - inter + 1e-9
iou = inter / union
return iou, union
def get_bboxes_giou(boxes1, boxes2, eps=1e-9):
"""calculate the ious of boxes1 and boxes2
Args:
boxes1 (Tensor): shape [N, 4]
boxes2 (Tensor): shape [M, 4]
eps (float): epsilon to avoid divide by zero
Return:
ious (Tensor): ious of boxes1 and boxes2, with the shape [N, M]
"""
assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
iou, union = boxes_iou(boxes1, boxes2)
lt = paddle.minimum(boxes1.unsqueeze(-2)[:, :, :2], boxes2[:, :2])
rb = paddle.maximum(boxes1.unsqueeze(-2)[:, :, 2:], boxes2[:, 2:])
wh = (rb - lt).astype("float32").clip(min=eps)
enclose_area = wh[:, :, 0] * wh[:, :, 1]
giou = iou - (enclose_area - union) / enclose_area
return giou
def sigmoid_focal_loss(inputs, targets, alpha, gamma, reduction="sum"):
assert reduction in ["sum", "mean"
], f'do not support this {reduction} reduction?'
p = F.sigmoid(inputs)
ce_loss = F.binary_cross_entropy_with_logits(
inputs, targets, reduction="none")
p_t = p * targets + (1 - p) * (1 - targets)
loss = ce_loss * ((1 - p_t)**gamma)
if alpha >= 0:
alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
loss = alpha_t * loss
if reduction == "mean":
loss = loss.mean()
elif reduction == "sum":
loss = loss.sum()
return loss

View File

@@ -0,0 +1,168 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register
from ..bbox_utils import iou_similarity, bbox2delta
__all__ = ['SSDLoss']
@register
class SSDLoss(nn.Layer):
"""
SSDLoss
Args:
overlap_threshold (float32, optional): IoU threshold for negative bboxes
and positive bboxes, 0.5 by default.
neg_pos_ratio (float): The ratio of negative samples / positive samples.
loc_loss_weight (float): The weight of loc_loss.
conf_loss_weight (float): The weight of conf_loss.
prior_box_var (list): Variances corresponding to prior box coord, [0.1,
0.1, 0.2, 0.2] by default.
"""
def __init__(self,
overlap_threshold=0.5,
neg_pos_ratio=3.0,
loc_loss_weight=1.0,
conf_loss_weight=1.0,
prior_box_var=[0.1, 0.1, 0.2, 0.2]):
super(SSDLoss, self).__init__()
self.overlap_threshold = overlap_threshold
self.neg_pos_ratio = neg_pos_ratio
self.loc_loss_weight = loc_loss_weight
self.conf_loss_weight = conf_loss_weight
self.prior_box_var = [1. / a for a in prior_box_var]
def _bipartite_match_for_batch(self, gt_bbox, gt_label, prior_boxes,
bg_index):
"""
Args:
gt_bbox (Tensor): [B, N, 4]
gt_label (Tensor): [B, N, 1]
prior_boxes (Tensor): [A, 4]
bg_index (int): Background class index
"""
batch_size, num_priors = gt_bbox.shape[0], prior_boxes.shape[0]
ious = iou_similarity(gt_bbox.reshape((-1, 4)), prior_boxes).reshape(
(batch_size, -1, num_priors))
# For each prior box, get the max IoU of all GTs.
prior_max_iou, prior_argmax_iou = ious.max(axis=1), ious.argmax(axis=1)
# For each GT, get the max IoU of all prior boxes.
gt_max_iou, gt_argmax_iou = ious.max(axis=2), ious.argmax(axis=2)
# Gather target bbox and label according to 'prior_argmax_iou' index.
batch_ind = paddle.arange(end=batch_size, dtype='int64').unsqueeze(-1)
prior_argmax_iou = paddle.stack(
[batch_ind.tile([1, num_priors]), prior_argmax_iou], axis=-1)
targets_bbox = paddle.gather_nd(gt_bbox, prior_argmax_iou)
targets_label = paddle.gather_nd(gt_label, prior_argmax_iou)
# Assign negative
bg_index_tensor = paddle.full([batch_size, num_priors, 1], bg_index,
'int64')
targets_label = paddle.where(
prior_max_iou.unsqueeze(-1) < self.overlap_threshold,
bg_index_tensor, targets_label)
# Ensure each GT can match the max IoU prior box.
batch_ind = (batch_ind * num_priors + gt_argmax_iou).flatten()
targets_bbox = paddle.scatter(
targets_bbox.reshape([-1, 4]), batch_ind,
gt_bbox.reshape([-1, 4])).reshape([batch_size, -1, 4])
targets_label = paddle.scatter(
targets_label.reshape([-1, 1]), batch_ind,
gt_label.reshape([-1, 1])).reshape([batch_size, -1, 1])
targets_label[:, :1] = bg_index
# Encode box
prior_boxes = prior_boxes.unsqueeze(0).tile([batch_size, 1, 1])
targets_bbox = bbox2delta(
prior_boxes.reshape([-1, 4]),
targets_bbox.reshape([-1, 4]), self.prior_box_var)
targets_bbox = targets_bbox.reshape([batch_size, -1, 4])
return targets_bbox, targets_label
def _mine_hard_example(self,
conf_loss,
targets_label,
bg_index,
mine_neg_ratio=0.01):
pos = (targets_label != bg_index).astype(conf_loss.dtype)
num_pos = pos.sum(axis=1, keepdim=True)
neg = (targets_label == bg_index).astype(conf_loss.dtype)
conf_loss = conf_loss.detach() * neg
loss_idx = conf_loss.argsort(axis=1, descending=True)
idx_rank = loss_idx.argsort(axis=1)
num_negs = []
for i in range(conf_loss.shape[0]):
cur_num_pos = num_pos[i]
num_neg = paddle.clip(
cur_num_pos * self.neg_pos_ratio, max=pos.shape[1])
num_neg = num_neg if num_neg > 0 else paddle.to_tensor(
[pos.shape[1] * mine_neg_ratio])
num_negs.append(num_neg)
num_negs = paddle.stack(num_negs).expand_as(idx_rank)
neg_mask = (idx_rank < num_negs).astype(conf_loss.dtype)
return (neg_mask + pos).astype('bool')
def forward(self, boxes, scores, gt_bbox, gt_label, prior_boxes):
boxes = paddle.concat(boxes, axis=1)
scores = paddle.concat(scores, axis=1)
gt_label = gt_label.unsqueeze(-1).astype('int64')
prior_boxes = paddle.concat(prior_boxes, axis=0)
bg_index = scores.shape[-1] - 1
# Match bbox and get targets.
targets_bbox, targets_label = \
self._bipartite_match_for_batch(gt_bbox, gt_label, prior_boxes, bg_index)
targets_bbox.stop_gradient = True
targets_label.stop_gradient = True
# Compute regression loss.
# Select positive samples.
bbox_mask = paddle.tile(targets_label != bg_index, [1, 1, 4])
if bbox_mask.astype(boxes.dtype).sum() > 0:
location = paddle.masked_select(boxes, bbox_mask)
targets_bbox = paddle.masked_select(targets_bbox, bbox_mask)
loc_loss = F.smooth_l1_loss(location, targets_bbox, reduction='sum')
loc_loss = loc_loss * self.loc_loss_weight
else:
loc_loss = paddle.zeros([1])
# Compute confidence loss.
conf_loss = F.cross_entropy(scores, targets_label, reduction="none")
# Mining hard examples.
label_mask = self._mine_hard_example(
conf_loss.squeeze(-1), targets_label.squeeze(-1), bg_index)
conf_loss = paddle.masked_select(conf_loss, label_mask.unsqueeze(-1))
conf_loss = conf_loss.sum() * self.conf_loss_weight
# Compute overall weighted loss.
normalizer = (targets_label != bg_index).astype('float32').sum().clip(
min=1)
loss = (conf_loss + loc_loss) / normalizer
return loss

View File

@@ -0,0 +1,83 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
import random
from ppdet.core.workspace import register
__all__ = ['SupContrast']
@register
class SupContrast(nn.Layer):
__shared__ = [
'num_classes'
]
def __init__(self, num_classes=80, temperature=2.5, sample_num=4096, thresh=0.75):
super(SupContrast, self).__init__()
self.num_classes = num_classes
self.temperature = temperature
self.sample_num = sample_num
self.thresh = thresh
def forward(self, features, labels, scores):
assert features.shape[0] == labels.shape[0] == scores.shape[0]
positive_mask = (labels < self.num_classes)
positive_features, positive_labels, positive_scores = features[positive_mask], labels[positive_mask], \
scores[positive_mask]
negative_mask = (labels == self.num_classes)
negative_features, negative_labels, negative_scores = features[negative_mask], labels[negative_mask], \
scores[negative_mask]
N = negative_features.shape[0]
S = self.sample_num - positive_mask.sum()
index = paddle.to_tensor(random.sample(range(N), int(S)), dtype='int32')
negative_features = paddle.index_select(x=negative_features, index=index, axis=0)
negative_labels = paddle.index_select(x=negative_labels, index=index, axis=0)
negative_scores = paddle.index_select(x=negative_scores, index=index, axis=0)
features = paddle.concat([positive_features, negative_features], 0)
labels = paddle.concat([positive_labels, negative_labels], 0)
scores = paddle.concat([positive_scores, negative_scores], 0)
if len(labels.shape) == 1:
labels = labels.reshape([-1, 1])
label_mask = paddle.equal(labels, labels.T).detach()
similarity = (paddle.matmul(features, features.T) / self.temperature)
sim_row_max = paddle.max(similarity, axis=1, keepdim=True)
similarity = similarity - sim_row_max
logits_mask = paddle.ones_like(similarity).detach()
logits_mask.fill_diagonal_(0)
exp_sim = paddle.exp(similarity) * logits_mask
log_prob = similarity - paddle.log(exp_sim.sum(axis=1, keepdim=True))
per_label_log_prob = (log_prob * logits_mask * label_mask).sum(1) / label_mask.sum(1)
keep = scores > self.thresh
per_label_log_prob = per_label_log_prob[keep]
loss = -per_label_log_prob
return loss.mean()

View File

@@ -0,0 +1,152 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# The code is based on:
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/losses/varifocal_loss.py
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register, serializable
from ppdet.modeling import ops
__all__ = ['VarifocalLoss']
def varifocal_loss(pred,
target,
alpha=0.75,
gamma=2.0,
iou_weighted=True,
use_sigmoid=True):
"""`Varifocal Loss <https://arxiv.org/abs/2008.13367>`_
Args:
pred (Tensor): The prediction with shape (N, C), C is the
number of classes
target (Tensor): The learning target of the iou-aware
classification score with shape (N, C), C is the number of classes.
alpha (float, optional): A balance factor for the negative part of
Varifocal Loss, which is different from the alpha of Focal Loss.
Defaults to 0.75.
gamma (float, optional): The gamma for calculating the modulating
factor. Defaults to 2.0.
iou_weighted (bool, optional): Whether to weight the loss of the
positive example with the iou target. Defaults to True.
"""
# pred and target should be of the same size
assert pred.shape == target.shape
if use_sigmoid:
pred_new = F.sigmoid(pred)
else:
pred_new = pred
target = target.cast(pred.dtype)
if iou_weighted:
focal_weight = target * (target > 0.0).cast('float32') + \
alpha * (pred_new - target).abs().pow(gamma) * \
(target <= 0.0).cast('float32')
else:
focal_weight = (target > 0.0).cast('float32') + \
alpha * (pred_new - target).abs().pow(gamma) * \
(target <= 0.0).cast('float32')
if use_sigmoid:
loss = F.binary_cross_entropy_with_logits(
pred, target, reduction='none') * focal_weight
else:
loss = F.binary_cross_entropy(
pred, target, reduction='none') * focal_weight
loss = loss.sum(axis=1)
return loss
@register
@serializable
class VarifocalLoss(nn.Layer):
def __init__(self,
use_sigmoid=True,
alpha=0.75,
gamma=2.0,
iou_weighted=True,
reduction='mean',
loss_weight=1.0):
"""`Varifocal Loss <https://arxiv.org/abs/2008.13367>`_
Args:
use_sigmoid (bool, optional): Whether the prediction is
used for sigmoid or softmax. Defaults to True.
alpha (float, optional): A balance factor for the negative part of
Varifocal Loss, which is different from the alpha of Focal
Loss. Defaults to 0.75.
gamma (float, optional): The gamma for calculating the modulating
factor. Defaults to 2.0.
iou_weighted (bool, optional): Whether to weight the loss of the
positive examples with the iou target. Defaults to True.
reduction (str, optional): The method used to reduce the loss into
a scalar. Defaults to 'mean'. Options are "none", "mean" and
"sum".
loss_weight (float, optional): Weight of loss. Defaults to 1.0.
"""
super(VarifocalLoss, self).__init__()
assert alpha >= 0.0
self.use_sigmoid = use_sigmoid
self.alpha = alpha
self.gamma = gamma
self.iou_weighted = iou_weighted
self.reduction = reduction
self.loss_weight = loss_weight
def forward(self, pred, target, weight=None, avg_factor=None):
"""Forward function.
Args:
pred (Tensor): The prediction.
target (Tensor): The learning target of the prediction.
weight (Tensor, optional): The weight of loss for each
prediction. Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
Returns:
Tensor: The calculated loss
"""
loss = self.loss_weight * varifocal_loss(
pred,
target,
alpha=self.alpha,
gamma=self.gamma,
iou_weighted=self.iou_weighted,
use_sigmoid=self.use_sigmoid)
if weight is not None:
loss = loss * weight
if avg_factor is None:
if self.reduction == 'none':
return loss
elif self.reduction == 'mean':
return loss.mean()
elif self.reduction == 'sum':
return loss.sum()
else:
# if reduction is mean, then average the loss by avg_factor
if self.reduction == 'mean':
loss = loss.sum() / avg_factor
# if reduction is 'none', then do nothing, otherwise raise an error
elif self.reduction != 'none':
raise ValueError(
'avg_factor can not be used with reduction="sum"')
return loss

View File

@@ -0,0 +1,207 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register
from ..bbox_utils import decode_yolo, xywh2xyxy, batch_iou_similarity
__all__ = ['YOLOv3Loss']
def bbox_transform(pbox, anchor, downsample):
pbox = decode_yolo(pbox, anchor, downsample)
pbox = xywh2xyxy(pbox)
return pbox
@register
class YOLOv3Loss(nn.Layer):
__inject__ = ['iou_loss', 'iou_aware_loss']
__shared__ = ['num_classes']
def __init__(self,
num_classes=80,
ignore_thresh=0.7,
label_smooth=False,
downsample=[32, 16, 8],
scale_x_y=1.,
iou_loss=None,
iou_aware_loss=None):
"""
YOLOv3Loss layer
Args:
num_calsses (int): number of foreground classes
ignore_thresh (float): threshold to ignore confidence loss
label_smooth (bool): whether to use label smoothing
downsample (list): downsample ratio for each detection block
scale_x_y (float): scale_x_y factor
iou_loss (object): IoULoss instance
iou_aware_loss (object): IouAwareLoss instance
"""
super(YOLOv3Loss, self).__init__()
self.num_classes = num_classes
self.ignore_thresh = ignore_thresh
self.label_smooth = label_smooth
self.downsample = downsample
self.scale_x_y = scale_x_y
self.iou_loss = iou_loss
self.iou_aware_loss = iou_aware_loss
self.distill_pairs = []
def obj_loss(self, pbox, gbox, pobj, tobj, anchor, downsample):
# pbox
pbox = decode_yolo(pbox, anchor, downsample)
pbox = xywh2xyxy(pbox)
pbox = paddle.concat(pbox, axis=-1)
b = pbox.shape[0]
pbox = pbox.reshape((b, -1, 4))
# gbox
gxy = gbox[:, :, 0:2] - gbox[:, :, 2:4] * 0.5
gwh = gbox[:, :, 0:2] + gbox[:, :, 2:4] * 0.5
gbox = paddle.concat([gxy, gwh], axis=-1)
iou = batch_iou_similarity(pbox, gbox)
iou.stop_gradient = True
iou_max = iou.max(2) # [N, M1]
iou_mask = paddle.cast(iou_max <= self.ignore_thresh, dtype=pbox.dtype)
iou_mask.stop_gradient = True
pobj = pobj.reshape((b, -1))
tobj = tobj.reshape((b, -1))
obj_mask = paddle.cast(tobj > 0, dtype=pbox.dtype)
obj_mask.stop_gradient = True
loss_obj = F.binary_cross_entropy_with_logits(
pobj, obj_mask, reduction='none')
loss_obj_pos = (loss_obj * tobj)
loss_obj_neg = (loss_obj * (1 - obj_mask) * iou_mask)
return loss_obj_pos + loss_obj_neg
def cls_loss(self, pcls, tcls):
if self.label_smooth:
delta = min(1. / self.num_classes, 1. / 40)
pos, neg = 1 - delta, delta
# 1 for positive, 0 for negative
tcls = pos * paddle.cast(
tcls > 0., dtype=tcls.dtype) + neg * paddle.cast(
tcls <= 0., dtype=tcls.dtype)
loss_cls = F.binary_cross_entropy_with_logits(
pcls, tcls, reduction='none')
return loss_cls
def yolov3_loss(self, p, t, gt_box, anchor, downsample, scale=1.,
eps=1e-10):
na = len(anchor)
b, c, h, w = p.shape
if self.iou_aware_loss:
ioup, p = p[:, 0:na, :, :], p[:, na:, :, :]
ioup = ioup.unsqueeze(-1)
p = p.reshape((b, na, -1, h, w)).transpose((0, 1, 3, 4, 2))
x, y = p[:, :, :, :, 0:1], p[:, :, :, :, 1:2]
w, h = p[:, :, :, :, 2:3], p[:, :, :, :, 3:4]
obj, pcls = p[:, :, :, :, 4:5], p[:, :, :, :, 5:]
self.distill_pairs.append([x, y, w, h, obj, pcls])
t = t.transpose((0, 1, 3, 4, 2))
tx, ty = t[:, :, :, :, 0:1], t[:, :, :, :, 1:2]
tw, th = t[:, :, :, :, 2:3], t[:, :, :, :, 3:4]
tscale = t[:, :, :, :, 4:5]
tobj, tcls = t[:, :, :, :, 5:6], t[:, :, :, :, 6:]
tscale_obj = tscale * tobj
loss = dict()
x = scale * F.sigmoid(x) - 0.5 * (scale - 1.)
y = scale * F.sigmoid(y) - 0.5 * (scale - 1.)
if abs(scale - 1.) < eps:
loss_x = F.binary_cross_entropy(x, tx, reduction='none')
loss_y = F.binary_cross_entropy(y, ty, reduction='none')
loss_xy = tscale_obj * (loss_x + loss_y)
else:
loss_x = paddle.abs(x - tx)
loss_y = paddle.abs(y - ty)
loss_xy = tscale_obj * (loss_x + loss_y)
loss_xy = loss_xy.sum([1, 2, 3, 4]).mean()
loss_w = paddle.abs(w - tw)
loss_h = paddle.abs(h - th)
loss_wh = tscale_obj * (loss_w + loss_h)
loss_wh = loss_wh.sum([1, 2, 3, 4]).mean()
loss['loss_xy'] = loss_xy
loss['loss_wh'] = loss_wh
if self.iou_loss is not None:
# warn: do not modify x, y, w, h in place
box, tbox = [x, y, w, h], [tx, ty, tw, th]
pbox = bbox_transform(box, anchor, downsample)
gbox = bbox_transform(tbox, anchor, downsample)
loss_iou = self.iou_loss(pbox, gbox)
loss_iou = loss_iou * tscale_obj
loss_iou = loss_iou.sum([1, 2, 3, 4]).mean()
loss['loss_iou'] = loss_iou
if self.iou_aware_loss is not None:
box, tbox = [x, y, w, h], [tx, ty, tw, th]
pbox = bbox_transform(box, anchor, downsample)
gbox = bbox_transform(tbox, anchor, downsample)
loss_iou_aware = self.iou_aware_loss(ioup, pbox, gbox)
loss_iou_aware = loss_iou_aware * tobj
loss_iou_aware = loss_iou_aware.sum([1, 2, 3, 4]).mean()
loss['loss_iou_aware'] = loss_iou_aware
box = [x, y, w, h]
loss_obj = self.obj_loss(box, gt_box, obj, tobj, anchor, downsample)
loss_obj = loss_obj.sum(-1).mean()
loss['loss_obj'] = loss_obj
loss_cls = self.cls_loss(pcls, tcls) * tobj
loss_cls = loss_cls.sum([1, 2, 3, 4]).mean()
loss['loss_cls'] = loss_cls
return loss
def forward(self, inputs, targets, anchors):
np = len(inputs)
gt_targets = [targets['target{}'.format(i)] for i in range(np)]
gt_box = targets['gt_bbox']
yolo_losses = dict()
self.distill_pairs.clear()
for x, t, anchor, downsample in zip(inputs, gt_targets, anchors,
self.downsample):
yolo_loss = self.yolov3_loss(
x.astype('float32'), t, gt_box, anchor, downsample,
self.scale_x_y)
for k, v in yolo_loss.items():
if k in yolo_losses:
yolo_losses[k] += v
else:
yolo_losses[k] = v
loss = 0
for k, v in yolo_losses.items():
loss += v
yolo_losses['loss'] = loss
return yolo_losses