更换文档检测模型
This commit is contained in:
275
paddle_detection/ppdet/modeling/assigners/pose_utils.py
Normal file
275
paddle_detection/ppdet/modeling/assigners/pose_utils.py
Normal file
@@ -0,0 +1,275 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
import paddle.nn.functional as F
|
||||
|
||||
from ppdet.core.workspace import register
|
||||
|
||||
__all__ = ['KptL1Cost', 'OksCost', 'ClassificationCost']
|
||||
|
||||
|
||||
def masked_fill(x, mask, value):
|
||||
y = paddle.full(x.shape, value, x.dtype)
|
||||
return paddle.where(mask, y, x)
|
||||
|
||||
|
||||
@register
|
||||
class KptL1Cost(object):
|
||||
"""KptL1Cost.
|
||||
|
||||
this function based on: https://github.com/hikvision-research/opera/blob/main/opera/core/bbox/match_costs/match_cost.py
|
||||
|
||||
Args:
|
||||
weight (int | float, optional): loss_weight.
|
||||
"""
|
||||
|
||||
def __init__(self, weight=1.0):
|
||||
self.weight = weight
|
||||
|
||||
def __call__(self, kpt_pred, gt_keypoints, valid_kpt_flag):
|
||||
"""
|
||||
Args:
|
||||
kpt_pred (Tensor): Predicted keypoints with normalized coordinates
|
||||
(x_{i}, y_{i}), which are all in range [0, 1]. Shape
|
||||
[num_query, K, 2].
|
||||
gt_keypoints (Tensor): Ground truth keypoints with normalized
|
||||
coordinates (x_{i}, y_{i}). Shape [num_gt, K, 2].
|
||||
valid_kpt_flag (Tensor): valid flag of ground truth keypoints.
|
||||
Shape [num_gt, K].
|
||||
|
||||
Returns:
|
||||
paddle.Tensor: kpt_cost value with weight.
|
||||
"""
|
||||
kpt_cost = []
|
||||
for i in range(len(gt_keypoints)):
|
||||
if gt_keypoints[i].size == 0:
|
||||
kpt_cost.append(kpt_pred.sum() * 0)
|
||||
kpt_pred_tmp = kpt_pred.clone()
|
||||
valid_flag = valid_kpt_flag[i] > 0
|
||||
valid_flag_expand = valid_flag.unsqueeze(0).unsqueeze(-1).expand_as(
|
||||
kpt_pred_tmp)
|
||||
if not valid_flag_expand.all():
|
||||
kpt_pred_tmp = masked_fill(kpt_pred_tmp, ~valid_flag_expand, 0)
|
||||
cost = F.pairwise_distance(
|
||||
kpt_pred_tmp.reshape((kpt_pred_tmp.shape[0], -1)),
|
||||
gt_keypoints[i].reshape((-1, )).unsqueeze(0),
|
||||
p=1,
|
||||
keepdim=True)
|
||||
avg_factor = paddle.clip(
|
||||
valid_flag.astype('float32').sum() * 2, 1.0)
|
||||
cost = cost / avg_factor
|
||||
kpt_cost.append(cost)
|
||||
kpt_cost = paddle.concat(kpt_cost, axis=1)
|
||||
return kpt_cost * self.weight
|
||||
|
||||
|
||||
@register
|
||||
class OksCost(object):
|
||||
"""OksCost.
|
||||
|
||||
this function based on: https://github.com/hikvision-research/opera/blob/main/opera/core/bbox/match_costs/match_cost.py
|
||||
|
||||
Args:
|
||||
num_keypoints (int): number of keypoints
|
||||
weight (int | float, optional): loss_weight.
|
||||
"""
|
||||
|
||||
def __init__(self, num_keypoints=17, weight=1.0):
|
||||
self.weight = weight
|
||||
if num_keypoints == 17:
|
||||
self.sigmas = np.array(
|
||||
[
|
||||
.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07,
|
||||
1.07, .87, .87, .89, .89
|
||||
],
|
||||
dtype=np.float32) / 10.0
|
||||
elif num_keypoints == 14:
|
||||
self.sigmas = np.array(
|
||||
[
|
||||
.79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89,
|
||||
.89, .79, .79
|
||||
],
|
||||
dtype=np.float32) / 10.0
|
||||
else:
|
||||
raise ValueError(f'Unsupported keypoints number {num_keypoints}')
|
||||
|
||||
def __call__(self, kpt_pred, gt_keypoints, valid_kpt_flag, gt_areas):
|
||||
"""
|
||||
Args:
|
||||
kpt_pred (Tensor): Predicted keypoints with unnormalized
|
||||
coordinates (x_{i}, y_{i}). Shape [num_query, K, 2].
|
||||
gt_keypoints (Tensor): Ground truth keypoints with unnormalized
|
||||
coordinates (x_{i}, y_{i}). Shape [num_gt, K, 2].
|
||||
valid_kpt_flag (Tensor): valid flag of ground truth keypoints.
|
||||
Shape [num_gt, K].
|
||||
gt_areas (Tensor): Ground truth mask areas. Shape [num_gt,].
|
||||
|
||||
Returns:
|
||||
paddle.Tensor: oks_cost value with weight.
|
||||
"""
|
||||
sigmas = paddle.to_tensor(self.sigmas)
|
||||
variances = (sigmas * 2)**2
|
||||
|
||||
oks_cost = []
|
||||
assert len(gt_keypoints) == len(gt_areas)
|
||||
for i in range(len(gt_keypoints)):
|
||||
if gt_keypoints[i].size == 0:
|
||||
oks_cost.append(kpt_pred.sum() * 0)
|
||||
squared_distance = \
|
||||
(kpt_pred[:, :, 0] - gt_keypoints[i, :, 0].unsqueeze(0)) ** 2 + \
|
||||
(kpt_pred[:, :, 1] - gt_keypoints[i, :, 1].unsqueeze(0)) ** 2
|
||||
vis_flag = (valid_kpt_flag[i] > 0).astype('int')
|
||||
vis_ind = vis_flag.nonzero(as_tuple=False)[:, 0]
|
||||
num_vis_kpt = vis_ind.shape[0]
|
||||
# assert num_vis_kpt > 0
|
||||
if num_vis_kpt == 0:
|
||||
oks_cost.append(paddle.zeros((squared_distance.shape[0], 1)))
|
||||
continue
|
||||
area = gt_areas[i]
|
||||
|
||||
squared_distance0 = squared_distance / (area * variances * 2)
|
||||
squared_distance0 = paddle.index_select(
|
||||
squared_distance0, vis_ind, axis=1)
|
||||
squared_distance1 = paddle.exp(-squared_distance0).sum(axis=1,
|
||||
keepdim=True)
|
||||
oks = squared_distance1 / num_vis_kpt
|
||||
# The 1 is a constant that doesn't change the matching, so omitted.
|
||||
oks_cost.append(-oks)
|
||||
oks_cost = paddle.concat(oks_cost, axis=1)
|
||||
return oks_cost * self.weight
|
||||
|
||||
|
||||
@register
|
||||
class ClassificationCost:
|
||||
"""ClsSoftmaxCost.
|
||||
|
||||
Args:
|
||||
weight (int | float, optional): loss_weight
|
||||
"""
|
||||
|
||||
def __init__(self, weight=1.):
|
||||
self.weight = weight
|
||||
|
||||
def __call__(self, cls_pred, gt_labels):
|
||||
"""
|
||||
Args:
|
||||
cls_pred (Tensor): Predicted classification logits, shape
|
||||
(num_query, num_class).
|
||||
gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
|
||||
|
||||
Returns:
|
||||
paddle.Tensor: cls_cost value with weight
|
||||
"""
|
||||
# Following the official DETR repo, contrary to the loss that
|
||||
# NLL is used, we approximate it in 1 - cls_score[gt_label].
|
||||
# The 1 is a constant that doesn't change the matching,
|
||||
# so it can be omitted.
|
||||
cls_score = cls_pred.softmax(-1)
|
||||
cls_cost = -cls_score[:, gt_labels]
|
||||
return cls_cost * self.weight
|
||||
|
||||
|
||||
@register
|
||||
class FocalLossCost:
|
||||
"""FocalLossCost.
|
||||
|
||||
Args:
|
||||
weight (int | float, optional): loss_weight
|
||||
alpha (int | float, optional): focal_loss alpha
|
||||
gamma (int | float, optional): focal_loss gamma
|
||||
eps (float, optional): default 1e-12
|
||||
binary_input (bool, optional): Whether the input is binary,
|
||||
default False.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
weight=1.,
|
||||
alpha=0.25,
|
||||
gamma=2,
|
||||
eps=1e-12,
|
||||
binary_input=False):
|
||||
self.weight = weight
|
||||
self.alpha = alpha
|
||||
self.gamma = gamma
|
||||
self.eps = eps
|
||||
self.binary_input = binary_input
|
||||
|
||||
def _focal_loss_cost(self, cls_pred, gt_labels):
|
||||
"""
|
||||
Args:
|
||||
cls_pred (Tensor): Predicted classification logits, shape
|
||||
(num_query, num_class).
|
||||
gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
|
||||
|
||||
Returns:
|
||||
paddle.Tensor: cls_cost value with weight
|
||||
"""
|
||||
if gt_labels.size == 0:
|
||||
return cls_pred.sum() * 0
|
||||
cls_pred = F.sigmoid(cls_pred)
|
||||
neg_cost = -(1 - cls_pred + self.eps).log() * (
|
||||
1 - self.alpha) * cls_pred.pow(self.gamma)
|
||||
pos_cost = -(cls_pred + self.eps).log() * self.alpha * (
|
||||
1 - cls_pred).pow(self.gamma)
|
||||
|
||||
cls_cost = paddle.index_select(
|
||||
pos_cost, gt_labels, axis=1) - paddle.index_select(
|
||||
neg_cost, gt_labels, axis=1)
|
||||
return cls_cost * self.weight
|
||||
|
||||
def _mask_focal_loss_cost(self, cls_pred, gt_labels):
|
||||
"""
|
||||
Args:
|
||||
cls_pred (Tensor): Predicted classfication logits
|
||||
in shape (num_query, d1, ..., dn), dtype=paddle.float32.
|
||||
gt_labels (Tensor): Ground truth in shape (num_gt, d1, ..., dn),
|
||||
dtype=paddle.long. Labels should be binary.
|
||||
|
||||
Returns:
|
||||
Tensor: Focal cost matrix with weight in shape\
|
||||
(num_query, num_gt).
|
||||
"""
|
||||
cls_pred = cls_pred.flatten(1)
|
||||
gt_labels = gt_labels.flatten(1).float()
|
||||
n = cls_pred.shape[1]
|
||||
cls_pred = F.sigmoid(cls_pred)
|
||||
neg_cost = -(1 - cls_pred + self.eps).log() * (
|
||||
1 - self.alpha) * cls_pred.pow(self.gamma)
|
||||
pos_cost = -(cls_pred + self.eps).log() * self.alpha * (
|
||||
1 - cls_pred).pow(self.gamma)
|
||||
|
||||
cls_cost = paddle.einsum('nc,mc->nm', pos_cost, gt_labels) + \
|
||||
paddle.einsum('nc,mc->nm', neg_cost, (1 - gt_labels))
|
||||
return cls_cost / n * self.weight
|
||||
|
||||
def __call__(self, cls_pred, gt_labels):
|
||||
"""
|
||||
Args:
|
||||
cls_pred (Tensor): Predicted classfication logits.
|
||||
gt_labels (Tensor)): Labels.
|
||||
|
||||
Returns:
|
||||
Tensor: Focal cost matrix with weight in shape\
|
||||
(num_query, num_gt).
|
||||
"""
|
||||
if self.binary_input:
|
||||
return self._mask_focal_loss_cost(cls_pred, gt_labels)
|
||||
else:
|
||||
return self._focal_loss_cost(cls_pred, gt_labels)
|
||||
Reference in New Issue
Block a user