更换文档检测模型
This commit is contained in:
217
paddle_detection/ppdet/modeling/architectures/keypoint_petr.py
Normal file
217
paddle_detection/ppdet/modeling/architectures/keypoint_petr.py
Normal file
@@ -0,0 +1,217 @@
|
||||
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
this code is base on https://github.com/hikvision-research/opera/blob/main/opera/models/detectors/petr.py
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from ppdet.core.workspace import register
|
||||
from .meta_arch import BaseArch
|
||||
from .. import layers as L
|
||||
|
||||
__all__ = ['PETR']
|
||||
|
||||
|
||||
@register
|
||||
class PETR(BaseArch):
|
||||
__category__ = 'architecture'
|
||||
__inject__ = ['backbone', 'neck', 'bbox_head']
|
||||
|
||||
def __init__(self,
|
||||
backbone='ResNet',
|
||||
neck='ChannelMapper',
|
||||
bbox_head='PETRHead'):
|
||||
"""
|
||||
PETR, see https://openaccess.thecvf.com/content/CVPR2022/papers/Shi_End-to-End_Multi-Person_Pose_Estimation_With_Transformers_CVPR_2022_paper.pdf
|
||||
|
||||
Args:
|
||||
backbone (nn.Layer): backbone instance
|
||||
neck (nn.Layer): neck between backbone and head
|
||||
bbox_head (nn.Layer): model output and loss
|
||||
"""
|
||||
super(PETR, self).__init__()
|
||||
self.backbone = backbone
|
||||
if neck is not None:
|
||||
self.with_neck = True
|
||||
self.neck = neck
|
||||
self.bbox_head = bbox_head
|
||||
self.deploy = False
|
||||
|
||||
def extract_feat(self, img):
|
||||
"""Directly extract features from the backbone+neck."""
|
||||
x = self.backbone(img)
|
||||
if self.with_neck:
|
||||
x = self.neck(x)
|
||||
return x
|
||||
|
||||
def get_inputs(self):
|
||||
img_metas = []
|
||||
gt_bboxes = []
|
||||
gt_labels = []
|
||||
gt_keypoints = []
|
||||
gt_areas = []
|
||||
pad_gt_mask = self.inputs['pad_gt_mask'].astype("bool").squeeze(-1)
|
||||
for idx, im_shape in enumerate(self.inputs['im_shape']):
|
||||
img_meta = {
|
||||
'img_shape': im_shape.astype("int32").tolist() + [1, ],
|
||||
'batch_input_shape': self.inputs['image'].shape[-2:],
|
||||
'image_name': self.inputs['image_file'][idx]
|
||||
}
|
||||
img_metas.append(img_meta)
|
||||
if (not pad_gt_mask[idx].any()):
|
||||
gt_keypoints.append(self.inputs['gt_joints'][idx][:1])
|
||||
gt_labels.append(self.inputs['gt_class'][idx][:1])
|
||||
gt_bboxes.append(self.inputs['gt_bbox'][idx][:1])
|
||||
gt_areas.append(self.inputs['gt_areas'][idx][:1])
|
||||
continue
|
||||
|
||||
gt_keypoints.append(self.inputs['gt_joints'][idx][pad_gt_mask[idx]])
|
||||
gt_labels.append(self.inputs['gt_class'][idx][pad_gt_mask[idx]])
|
||||
gt_bboxes.append(self.inputs['gt_bbox'][idx][pad_gt_mask[idx]])
|
||||
gt_areas.append(self.inputs['gt_areas'][idx][pad_gt_mask[idx]])
|
||||
|
||||
return img_metas, gt_bboxes, gt_labels, gt_keypoints, gt_areas
|
||||
|
||||
def get_loss(self):
|
||||
"""
|
||||
Args:
|
||||
img (Tensor): Input images of shape (N, C, H, W).
|
||||
Typically these should be mean centered and std scaled.
|
||||
img_metas (list[dict]): A List of image info dict where each dict
|
||||
has: 'img_shape', 'scale_factor', 'flip', and may also contain
|
||||
'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
|
||||
For details on the values of these keys see
|
||||
:class:`mmdet.datasets.pipelines.Collect`.
|
||||
gt_bboxes (list[Tensor]): Each item are the truth boxes for each
|
||||
image in [tl_x, tl_y, br_x, br_y] format.
|
||||
gt_labels (list[Tensor]): Class indices corresponding to each box.
|
||||
gt_keypoints (list[Tensor]): Each item are the truth keypoints for
|
||||
each image in [p^{1}_x, p^{1}_y, p^{1}_v, ..., p^{K}_x,
|
||||
p^{K}_y, p^{K}_v] format.
|
||||
gt_areas (list[Tensor]): mask areas corresponding to each box.
|
||||
gt_bboxes_ignore (None | list[Tensor]): Specify which bounding
|
||||
boxes can be ignored when computing the loss.
|
||||
|
||||
Returns:
|
||||
dict[str, Tensor]: A dictionary of loss components.
|
||||
"""
|
||||
|
||||
img_metas, gt_bboxes, gt_labels, gt_keypoints, gt_areas = self.get_inputs(
|
||||
)
|
||||
gt_bboxes_ignore = getattr(self.inputs, 'gt_bboxes_ignore', None)
|
||||
|
||||
x = self.extract_feat(self.inputs)
|
||||
losses = self.bbox_head.forward_train(x, img_metas, gt_bboxes,
|
||||
gt_labels, gt_keypoints, gt_areas,
|
||||
gt_bboxes_ignore)
|
||||
loss = 0
|
||||
for k, v in losses.items():
|
||||
loss += v
|
||||
losses['loss'] = loss
|
||||
|
||||
return losses
|
||||
|
||||
def get_pred_numpy(self):
|
||||
"""Used for computing network flops.
|
||||
"""
|
||||
|
||||
img = self.inputs['image']
|
||||
batch_size, _, height, width = img.shape
|
||||
dummy_img_metas = [
|
||||
dict(
|
||||
batch_input_shape=(height, width),
|
||||
img_shape=(height, width, 3),
|
||||
scale_factor=(1., 1., 1., 1.)) for _ in range(batch_size)
|
||||
]
|
||||
x = self.extract_feat(img)
|
||||
outs = self.bbox_head(x, img_metas=dummy_img_metas)
|
||||
bbox_list = self.bbox_head.get_bboxes(
|
||||
*outs, dummy_img_metas, rescale=True)
|
||||
return bbox_list
|
||||
|
||||
def get_pred(self):
|
||||
"""
|
||||
"""
|
||||
img = self.inputs['image']
|
||||
batch_size, _, height, width = img.shape
|
||||
img_metas = [
|
||||
dict(
|
||||
batch_input_shape=(height, width),
|
||||
img_shape=(height, width, 3),
|
||||
scale_factor=self.inputs['scale_factor'][i])
|
||||
for i in range(batch_size)
|
||||
]
|
||||
kptpred = self.simple_test(
|
||||
self.inputs, img_metas=img_metas, rescale=True)
|
||||
keypoints = kptpred[0][1][0]
|
||||
bboxs = kptpred[0][0][0]
|
||||
keypoints[..., 2] = bboxs[:, None, 4]
|
||||
res_lst = [[keypoints, bboxs[:, 4]]]
|
||||
outputs = {'keypoint': res_lst}
|
||||
return outputs
|
||||
|
||||
def simple_test(self, inputs, img_metas, rescale=False):
|
||||
"""Test function without test time augmentation.
|
||||
|
||||
Args:
|
||||
inputs (list[paddle.Tensor]): List of multiple images.
|
||||
img_metas (list[dict]): List of image information.
|
||||
rescale (bool, optional): Whether to rescale the results.
|
||||
Defaults to False.
|
||||
|
||||
Returns:
|
||||
list[list[np.ndarray]]: BBox and keypoint results of each image
|
||||
and classes. The outer list corresponds to each image.
|
||||
The inner list corresponds to each class.
|
||||
"""
|
||||
batch_size = len(img_metas)
|
||||
assert batch_size == 1, 'Currently only batch_size 1 for inference ' \
|
||||
f'mode is supported. Found batch_size {batch_size}.'
|
||||
feat = self.extract_feat(inputs)
|
||||
results_list = self.bbox_head.simple_test(
|
||||
feat, img_metas, rescale=rescale)
|
||||
|
||||
bbox_kpt_results = [
|
||||
self.bbox_kpt2result(det_bboxes, det_labels, det_kpts,
|
||||
self.bbox_head.num_classes)
|
||||
for det_bboxes, det_labels, det_kpts in results_list
|
||||
]
|
||||
return bbox_kpt_results
|
||||
|
||||
def bbox_kpt2result(self, bboxes, labels, kpts, num_classes):
|
||||
"""Convert detection results to a list of numpy arrays.
|
||||
|
||||
Args:
|
||||
bboxes (paddle.Tensor | np.ndarray): shape (n, 5).
|
||||
labels (paddle.Tensor | np.ndarray): shape (n, ).
|
||||
kpts (paddle.Tensor | np.ndarray): shape (n, K, 3).
|
||||
num_classes (int): class number, including background class.
|
||||
|
||||
Returns:
|
||||
list(ndarray): bbox and keypoint results of each class.
|
||||
"""
|
||||
if bboxes.shape[0] == 0:
|
||||
return [np.zeros((0, 5), dtype=np.float32) for i in range(num_classes)], \
|
||||
[np.zeros((0, kpts.size(1), 3), dtype=np.float32)
|
||||
for i in range(num_classes)]
|
||||
else:
|
||||
if isinstance(bboxes, paddle.Tensor):
|
||||
bboxes = bboxes.numpy()
|
||||
labels = labels.numpy()
|
||||
kpts = kpts.numpy()
|
||||
return [bboxes[labels == i, :] for i in range(num_classes)], \
|
||||
[kpts[labels == i, :, :] for i in range(num_classes)]
|
||||
Reference in New Issue
Block a user