更换文档检测模型

This commit is contained in:
2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions

View File

@@ -0,0 +1,83 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import meta_arch
from . import faster_rcnn
from . import mask_rcnn
from . import yolo
from . import ppyoloe
from . import cascade_rcnn
from . import ssd
from . import fcos
from . import solov2
from . import ttfnet
from . import s2anet
from . import keypoint_hrhrnet
from . import keypoint_hrnet
from . import keypoint_vitpose
from . import jde
from . import deepsort
from . import fairmot
from . import centernet
from . import gfl
from . import picodet
from . import detr
from . import sparse_rcnn
from . import tood
from . import retinanet
from . import bytetrack
from . import yolox
from . import yolof
from . import pose3d_metro
from . import centertrack
from . import queryinst
from . import detr_ssod
from . import multi_stream_detector
from . import clrnet
from .meta_arch import *
from .faster_rcnn import *
from .mask_rcnn import *
from .yolo import *
from .ppyoloe import *
from .cascade_rcnn import *
from .ssd import *
from .fcos import *
from .solov2 import *
from .ttfnet import *
from .s2anet import *
from .keypoint_hrhrnet import *
from .keypoint_hrnet import *
from .keypoint_vitpose import *
from .jde import *
from .deepsort import *
from .fairmot import *
from .centernet import *
from .blazeface import *
from .gfl import *
from .picodet import *
from .detr import *
from .sparse_rcnn import *
from .tood import *
from .retinanet import *
from .bytetrack import *
from .yolox import *
from .yolof import *
from .pose3d_metro import *
from .centertrack import *
from .queryinst import *
from .keypoint_petr import *
from .detr_ssod import *
from .multi_stream_detector import *
from .clrnet import *

View File

@@ -0,0 +1,117 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
import paddle
import paddle.nn.functional as F
__all__ = ['BlazeFace']
@register
class BlazeFace(BaseArch):
"""
BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs,
see https://arxiv.org/abs/1907.05047
Args:
backbone (nn.Layer): backbone instance
neck (nn.Layer): neck instance
blaze_head (nn.Layer): `blazeHead` instance
post_process (object): `BBoxPostProcess` instance
"""
__category__ = 'architecture'
__inject__ = ['post_process']
def __init__(self, backbone, blaze_head, neck, post_process):
super(BlazeFace, self).__init__()
self.backbone = backbone
self.neck = neck
self.blaze_head = blaze_head
self.post_process = post_process
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
# fpn
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
# head
kwargs = {'input_shape': neck.out_shape}
blaze_head = create(cfg['blaze_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
'blaze_head': blaze_head,
}
def _forward(self):
# Backbone
body_feats = self.backbone(self.inputs)
# neck
neck_feats = self.neck(body_feats)
# blaze Head
if self.training:
return self.blaze_head(neck_feats, self.inputs['image'],
self.inputs['gt_bbox'],
self.inputs['gt_class'])
else:
preds, anchors = self.blaze_head(neck_feats, self.inputs['image'])
bbox, bbox_num, nms_keep_idx = self.post_process(
preds, anchors, self.inputs['im_shape'],
self.inputs['scale_factor'])
if self.use_extra_data:
extra_data = {} # record the bbox output before nms, such like scores and nms_keep_idx
"""extra_data:{
'scores': predict scores,
'nms_keep_idx': bbox index before nms,
}
"""
preds_logits = preds[1] # [[1xNumBBoxNumClass]]
extra_data['scores'] = F.softmax(paddle.concat(
preds_logits, axis=1)).transpose([0, 2, 1])
extra_data['logits'] = paddle.concat(
preds_logits, axis=1).transpose([0, 2, 1])
extra_data['nms_keep_idx'] = nms_keep_idx # bbox index before nms
return bbox, bbox_num, extra_data
else:
return bbox, bbox_num
def get_loss(self, ):
return {"loss": self._forward()}
def get_pred(self):
if self.use_extra_data:
bbox_pred, bbox_num, extra_data = self._forward()
output = {
"bbox": bbox_pred,
"bbox_num": bbox_num,
"extra_data": extra_data
}
else:
bbox_pred, bbox_num = self._forward()
output = {
"bbox": bbox_pred,
"bbox_num": bbox_num,
}
return output

View File

@@ -0,0 +1,83 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['ByteTrack']
@register
class ByteTrack(BaseArch):
"""
ByteTrack network, see https://arxiv.org/abs/2110.06864
Args:
detector (object): detector model instance
reid (object): reid model instance, default None
tracker (object): tracker instance
"""
__category__ = 'architecture'
def __init__(self,
detector='YOLOX',
reid=None,
tracker='JDETracker'):
super(ByteTrack, self).__init__()
self.detector = detector
self.reid = reid
self.tracker = tracker
@classmethod
def from_config(cls, cfg, *args, **kwargs):
detector = create(cfg['detector'])
if cfg['reid'] != 'None':
reid = create(cfg['reid'])
else:
reid = None
tracker = create(cfg['tracker'])
return {
"detector": detector,
"reid": reid,
"tracker": tracker,
}
def _forward(self):
det_outs = self.detector(self.inputs)
if self.training:
return det_outs
else:
if self.reid is not None:
assert 'crops' in self.inputs
crops = self.inputs['crops']
pred_embs = self.reid(crops)
else:
pred_embs = None
det_outs['embeddings'] = pred_embs
return det_outs
def get_loss(self):
return self._forward()
def get_pred(self):
return self._forward()

View File

@@ -0,0 +1,143 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['CascadeRCNN']
@register
class CascadeRCNN(BaseArch):
"""
Cascade R-CNN network, see https://arxiv.org/abs/1712.00726
Args:
backbone (object): backbone instance
rpn_head (object): `RPNHead` instance
bbox_head (object): `BBoxHead` instance
bbox_post_process (object): `BBoxPostProcess` instance
neck (object): 'FPN' instance
mask_head (object): `MaskHead` instance
mask_post_process (object): `MaskPostProcess` instance
"""
__category__ = 'architecture'
__inject__ = [
'bbox_post_process',
'mask_post_process',
]
def __init__(self,
backbone,
rpn_head,
bbox_head,
bbox_post_process,
neck=None,
mask_head=None,
mask_post_process=None):
super(CascadeRCNN, self).__init__()
self.backbone = backbone
self.rpn_head = rpn_head
self.bbox_head = bbox_head
self.bbox_post_process = bbox_post_process
self.neck = neck
self.mask_head = mask_head
self.mask_post_process = mask_post_process
self.with_mask = mask_head is not None
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = cfg['neck'] and create(cfg['neck'], **kwargs)
out_shape = neck and neck.out_shape or backbone.out_shape
kwargs = {'input_shape': out_shape}
rpn_head = create(cfg['rpn_head'], **kwargs)
bbox_head = create(cfg['bbox_head'], **kwargs)
out_shape = neck and out_shape or bbox_head.get_head().out_shape
kwargs = {'input_shape': out_shape}
mask_head = cfg['mask_head'] and create(cfg['mask_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"rpn_head": rpn_head,
"bbox_head": bbox_head,
"mask_head": mask_head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
if self.neck is not None:
body_feats = self.neck(body_feats)
if self.training:
rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs)
bbox_loss, bbox_feat = self.bbox_head(body_feats, rois, rois_num,
self.inputs)
rois, rois_num = self.bbox_head.get_assigned_rois()
bbox_targets = self.bbox_head.get_assigned_targets()
if self.with_mask:
mask_loss = self.mask_head(body_feats, rois, rois_num,
self.inputs, bbox_targets, bbox_feat)
return rpn_loss, bbox_loss, mask_loss
else:
return rpn_loss, bbox_loss, {}
else:
rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
preds, _ = self.bbox_head(body_feats, rois, rois_num, self.inputs)
refined_rois = self.bbox_head.get_refined_rois()
im_shape = self.inputs['im_shape']
scale_factor = self.inputs['scale_factor']
bbox, bbox_num, nms_keep_idx = self.bbox_post_process(
preds, (refined_rois, rois_num), im_shape, scale_factor)
# rescale the prediction back to origin image
bbox, bbox_pred, bbox_num = self.bbox_post_process.get_pred(
bbox, bbox_num, im_shape, scale_factor)
if not self.with_mask:
return bbox_pred, bbox_num, None
mask_out = self.mask_head(body_feats, bbox, bbox_num, self.inputs)
origin_shape = self.bbox_post_process.get_origin_shape()
mask_pred = self.mask_post_process(mask_out, bbox_pred, bbox_num,
origin_shape)
return bbox_pred, bbox_num, mask_pred
def get_loss(self, ):
rpn_loss, bbox_loss, mask_loss = self._forward()
loss = {}
loss.update(rpn_loss)
loss.update(bbox_loss)
if self.with_mask:
loss.update(mask_loss)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
return loss
def get_pred(self):
bbox_pred, bbox_num, mask_pred = self._forward()
output = {
'bbox': bbox_pred,
'bbox_num': bbox_num,
}
if self.with_mask:
output.update({'mask': mask_pred})
return output

View File

@@ -0,0 +1,103 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['CenterNet']
@register
class CenterNet(BaseArch):
"""
CenterNet network, see http://arxiv.org/abs/1904.07850
Args:
backbone (object): backbone instance
neck (object): FPN instance, default use 'CenterNetDLAFPN'
head (object): 'CenterNetHead' instance
post_process (object): 'CenterNetPostProcess' instance
for_mot (bool): whether return other features used in tracking model
"""
__category__ = 'architecture'
__inject__ = ['post_process']
__shared__ = ['for_mot']
def __init__(self,
backbone,
neck='CenterNetDLAFPN',
head='CenterNetHead',
post_process='CenterNetPostProcess',
for_mot=False):
super(CenterNet, self).__init__()
self.backbone = backbone
self.neck = neck
self.head = head
self.post_process = post_process
self.for_mot = for_mot
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = cfg['neck'] and create(cfg['neck'], **kwargs)
out_shape = neck and neck.out_shape or backbone.out_shape
kwargs = {'input_shape': out_shape}
head = create(cfg['head'], **kwargs)
return {'backbone': backbone, 'neck': neck, "head": head}
def _forward(self):
neck_feat = self.backbone(self.inputs)
if self.neck is not None:
neck_feat = self.neck(neck_feat)
head_out = self.head(neck_feat, self.inputs)
if self.for_mot:
head_out.update({'neck_feat': neck_feat})
elif self.training:
head_out['loss'] = head_out.pop('det_loss')
return head_out
def get_pred(self):
head_out = self._forward()
bbox, bbox_num, bbox_inds, topk_clses, topk_ys, topk_xs = self.post_process(
head_out['heatmap'],
head_out['size'],
head_out['offset'],
im_shape=self.inputs['im_shape'],
scale_factor=self.inputs['scale_factor'])
if self.for_mot:
output = {
"bbox": bbox,
"bbox_num": bbox_num,
"bbox_inds": bbox_inds,
"topk_clses": topk_clses,
"topk_ys": topk_ys,
"topk_xs": topk_xs,
"neck_feat": head_out['neck_feat']
}
else:
output = {"bbox": bbox, "bbox_num": bbox_num}
return output
def get_loss(self):
return self._forward()

View File

@@ -0,0 +1,176 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
import math
import numpy as np
import paddle
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
from ..keypoint_utils import affine_transform
from ppdet.data.transform.op_helper import gaussian_radius, gaussian2D, draw_umich_gaussian
__all__ = ['CenterTrack']
@register
class CenterTrack(BaseArch):
"""
CenterTrack network, see http://arxiv.org/abs/2004.01177
Args:
detector (object): 'CenterNet' instance
plugin_head (object): 'CenterTrackHead' instance
tracker (object): 'CenterTracker' instance
"""
__category__ = 'architecture'
__shared__ = ['mot_metric']
def __init__(self,
detector='CenterNet',
plugin_head='CenterTrackHead',
tracker='CenterTracker',
mot_metric=False):
super(CenterTrack, self).__init__()
self.detector = detector
self.plugin_head = plugin_head
self.tracker = tracker
self.mot_metric = mot_metric
self.pre_image = None
self.deploy = False
@classmethod
def from_config(cls, cfg, *args, **kwargs):
detector = create(cfg['detector'])
detector_out_shape = detector.neck and detector.neck.out_shape or detector.backbone.out_shape
kwargs = {'input_shape': detector_out_shape}
plugin_head = create(cfg['plugin_head'], **kwargs)
tracker = create(cfg['tracker'])
return {
'detector': detector,
'plugin_head': plugin_head,
'tracker': tracker,
}
def _forward(self):
if self.training:
det_outs = self.detector(self.inputs)
neck_feat = det_outs['neck_feat']
losses = {}
for k, v in det_outs.items():
if 'loss' not in k: continue
losses.update({k: v})
plugin_outs = self.plugin_head(neck_feat, self.inputs)
for k, v in plugin_outs.items():
if 'loss' not in k: continue
losses.update({k: v})
losses['loss'] = det_outs['det_loss'] + plugin_outs['plugin_loss']
return losses
else:
if not self.mot_metric:
# detection, support bs>=1
det_outs = self.detector(self.inputs)
return {
'bbox': det_outs['bbox'],
'bbox_num': det_outs['bbox_num']
}
else:
# MOT, only support bs=1
if not self.deploy:
if self.pre_image is None:
self.pre_image = self.inputs['image']
# initializing tracker for the first frame
self.tracker.init_track([])
self.inputs['pre_image'] = self.pre_image
self.pre_image = self.inputs[
'image'] # Note: update for next image
# render input heatmap from tracker status
pre_hm = self.get_additional_inputs(
self.tracker.tracks, self.inputs, with_hm=True)
self.inputs['pre_hm'] = paddle.to_tensor(pre_hm)
# model inference
det_outs = self.detector(self.inputs)
neck_feat = det_outs['neck_feat']
result = self.plugin_head(
neck_feat, self.inputs, det_outs['bbox'],
det_outs['bbox_inds'], det_outs['topk_clses'],
det_outs['topk_ys'], det_outs['topk_xs'])
if not self.deploy:
# convert the cropped and 4x downsampled output coordinate system
# back to the input image coordinate system
result = self.plugin_head.centertrack_post_process(
result, self.inputs, self.tracker.out_thresh)
return result
def get_pred(self):
return self._forward()
def get_loss(self):
return self._forward()
def reset_tracking(self):
self.tracker.reset()
self.pre_image = None
def get_additional_inputs(self, dets, meta, with_hm=True):
# Render input heatmap from previous trackings.
trans_input = meta['trans_input'][0].numpy()
inp_width, inp_height = int(meta['inp_width'][0]), int(meta[
'inp_height'][0])
input_hm = np.zeros((1, inp_height, inp_width), dtype=np.float32)
for det in dets:
if det['score'] < self.tracker.pre_thresh:
continue
bbox = affine_transform_bbox(det['bbox'], trans_input, inp_width,
inp_height)
h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
if (h > 0 and w > 0):
radius = gaussian_radius(
(math.ceil(h), math.ceil(w)), min_overlap=0.7)
radius = max(0, int(radius))
ct = np.array(
[(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
dtype=np.float32)
ct_int = ct.astype(np.int32)
if with_hm:
input_hm[0] = draw_umich_gaussian(input_hm[0], ct_int,
radius)
if with_hm:
input_hm = input_hm[np.newaxis]
return input_hm
def affine_transform_bbox(bbox, trans, width, height):
bbox = np.array(copy.deepcopy(bbox), dtype=np.float32)
bbox[:2] = affine_transform(bbox[:2], trans)
bbox[2:] = affine_transform(bbox[2:], trans)
bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, width - 1)
bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, height - 1)
return bbox

View File

@@ -0,0 +1,67 @@
from .meta_arch import BaseArch
from ppdet.core.workspace import register, create
from paddle import in_dynamic_mode
__all__ = ['CLRNet']
@register
class CLRNet(BaseArch):
__category__ = 'architecture'
def __init__(self,
backbone="CLRResNet",
neck="CLRFPN",
clr_head="CLRHead",
post_process=None):
super(CLRNet, self).__init__()
self.backbone = backbone
self.neck = neck
self.heads = clr_head
self.post_process = post_process
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
# fpn
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
# head
kwargs = {'input_shape': neck.out_shape}
clr_head = create(cfg['clr_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
'clr_head': clr_head,
}
def _forward(self):
# Backbone
body_feats = self.backbone(self.inputs['image'])
# neck
neck_feats = self.neck(body_feats)
# CRL Head
if self.training:
output = self.heads(neck_feats, self.inputs)
else:
output = self.heads(neck_feats)
output = {'lanes': output}
# TODO: hard code fix as_lanes=False problem in clrnet_head.py "get_lanes" function for static mode
if in_dynamic_mode():
output = self.heads.get_lanes(output['lanes'])
output = {
"lanes": output,
"img_path": self.inputs['full_img_path'],
"img_name": self.inputs['img_name']
}
return output
def get_loss(self):
return self._forward()
def get_pred(self):
return self._forward()

View File

@@ -0,0 +1,70 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
from ppdet.modeling.mot.utils import Detection, get_crops, scale_coords, clip_box
__all__ = ['DeepSORT']
@register
class DeepSORT(BaseArch):
"""
DeepSORT network, see https://arxiv.org/abs/1703.07402
Args:
detector (object): detector model instance
reid (object): reid model instance
tracker (object): tracker instance
"""
__category__ = 'architecture'
def __init__(self,
detector='YOLOv3',
reid='PCBPyramid',
tracker='DeepSORTTracker'):
super(DeepSORT, self).__init__()
self.detector = detector
self.reid = reid
self.tracker = tracker
@classmethod
def from_config(cls, cfg, *args, **kwargs):
if cfg['detector'] != 'None':
detector = create(cfg['detector'])
else:
detector = None
reid = create(cfg['reid'])
tracker = create(cfg['tracker'])
return {
"detector": detector,
"reid": reid,
"tracker": tracker,
}
def _forward(self):
crops = self.inputs['crops']
outs = {}
outs['embeddings'] = self.reid(crops)
return outs
def get_pred(self):
return self._forward()

View File

@@ -0,0 +1,118 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from .meta_arch import BaseArch
from ppdet.core.workspace import register, create
__all__ = ['DETR']
# Deformable DETR, DINO use the same architecture as DETR
@register
class DETR(BaseArch):
__category__ = 'architecture'
__inject__ = ['post_process', 'post_process_semi']
__shared__ = ['with_mask', 'exclude_post_process']
def __init__(self,
backbone,
transformer='DETRTransformer',
detr_head='DETRHead',
neck=None,
post_process='DETRPostProcess',
post_process_semi=None,
with_mask=False,
exclude_post_process=False):
super(DETR, self).__init__()
self.backbone = backbone
self.transformer = transformer
self.detr_head = detr_head
self.neck = neck
self.post_process = post_process
self.with_mask = with_mask
self.exclude_post_process = exclude_post_process
self.post_process_semi = post_process_semi
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
# neck
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs) if cfg['neck'] else None
# transformer
if neck is not None:
kwargs = {'input_shape': neck.out_shape}
transformer = create(cfg['transformer'], **kwargs)
# head
kwargs = {
'hidden_dim': transformer.hidden_dim,
'nhead': transformer.nhead,
'input_shape': backbone.out_shape
}
detr_head = create(cfg['detr_head'], **kwargs)
return {
'backbone': backbone,
'transformer': transformer,
"detr_head": detr_head,
"neck": neck
}
def _forward(self):
# Backbone
body_feats = self.backbone(self.inputs)
# Neck
if self.neck is not None:
body_feats = self.neck(body_feats)
# Transformer
pad_mask = self.inputs.get('pad_mask', None)
out_transformer = self.transformer(body_feats, pad_mask, self.inputs)
# DETR Head
if self.training:
detr_losses = self.detr_head(out_transformer, body_feats,
self.inputs)
detr_losses.update({
'loss': paddle.add_n(
[v for k, v in detr_losses.items() if 'log' not in k])
})
return detr_losses
else:
preds = self.detr_head(out_transformer, body_feats)
if self.exclude_post_process:
bbox, bbox_num, mask = preds
else:
bbox, bbox_num, mask = self.post_process(
preds, self.inputs['im_shape'], self.inputs['scale_factor'],
paddle.shape(self.inputs['image'])[2:])
output = {'bbox': bbox, 'bbox_num': bbox_num}
if self.with_mask:
output['mask'] = mask
return output
def get_loss(self):
return self._forward()
def get_pred(self):
return self._forward()

View File

@@ -0,0 +1,341 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from ppdet.core.workspace import register, create, merge_config
import paddle
import numpy as np
import paddle
import paddle.nn.functional as F
from ppdet.core.workspace import register, create
from ppdet.utils.logger import setup_logger
from ppdet.modeling.ssod.utils import filter_invalid
from .multi_stream_detector import MultiSteamDetector
logger = setup_logger(__name__)
__all__ = ['DETR_SSOD']
__shared__ = ['num_classes']
@register
class DETR_SSOD(MultiSteamDetector):
def __init__(self,
teacher,
student,
train_cfg=None,
test_cfg=None,
RTDETRTransformer=None,
num_classes=80):
super(DETR_SSOD, self).__init__(
dict(
teacher=teacher, student=student),
train_cfg=train_cfg,
test_cfg=test_cfg, )
self.ema_start_iters = train_cfg['ema_start_iters']
self.momentum = 0.9996
self.cls_thr = None
self.cls_thr_ig = None
self.num_classes = num_classes
if train_cfg is not None:
self.freeze("teacher")
self.unsup_weight = self.train_cfg['unsup_weight']
self.sup_weight = self.train_cfg['sup_weight']
self._teacher = None
self._student = None
self._transformer = None
@classmethod
def from_config(cls, cfg):
teacher = create(cfg['teacher'])
merge_config(cfg)
student = create(cfg['student'])
train_cfg = cfg['train_cfg']
test_cfg = cfg['test_cfg']
RTDETRTransformer = cfg['RTDETRTransformer']
return {
'teacher': teacher,
'student': student,
'train_cfg': train_cfg,
'test_cfg': test_cfg,
'RTDETRTransformer': RTDETRTransformer
}
def forward_train(self, inputs, **kwargs):
if isinstance(inputs, dict):
iter_id = inputs['iter_id']
elif isinstance(inputs, list):
iter_id = inputs[-1]
if iter_id == self.ema_start_iters:
self.update_ema_model(momentum=0)
elif iter_id > self.ema_start_iters:
self.update_ema_model(momentum=self.momentum)
if iter_id > self.ema_start_iters:
data_sup_w, data_sup_s, data_unsup_w, data_unsup_s, _ = inputs
if data_sup_w['image'].shape != data_sup_s['image'].shape:
data_sup_w, data_sup_s = align_weak_strong_shape(data_sup_w,
data_sup_s)
if 'gt_bbox' in data_unsup_s.keys():
del data_unsup_s['gt_bbox']
if 'gt_class' in data_unsup_s.keys():
del data_unsup_s['gt_class']
if 'gt_class' in data_unsup_w.keys():
del data_unsup_w['gt_class']
if 'gt_bbox' in data_unsup_w.keys():
del data_unsup_w['gt_bbox']
for k, v in data_sup_s.items():
if k in ['epoch_id']:
continue
elif k in ['gt_class', 'gt_bbox', 'is_crowd']:
data_sup_s[k].extend(data_sup_w[k])
else:
data_sup_s[k] = paddle.concat([v, data_sup_w[k]])
loss = {}
body_feats = self.student.backbone(data_sup_s)
if self.student.neck is not None:
body_feats = self.student.neck(body_feats)
out_transformer = self.student.transformer(body_feats, None,
data_sup_s)
sup_loss = self.student.detr_head(out_transformer, body_feats,
data_sup_s)
sup_loss.update({
'loss': paddle.add_n(
[v for k, v in sup_loss.items() if 'log' not in k])
})
sup_loss = {"sup_" + k: v for k, v in sup_loss.items()}
loss.update(**sup_loss)
unsup_loss = self.foward_unsup_train(data_unsup_w, data_unsup_s)
unsup_loss.update({
'loss': paddle.add_n(
[v for k, v in unsup_loss.items() if 'log' not in k])
})
unsup_loss = {"unsup_" + k: v for k, v in unsup_loss.items()}
unsup_loss.update({
'loss': paddle.add_n(
[v for k, v in unsup_loss.items() if 'log' not in k])
})
loss.update(**unsup_loss)
loss.update({'loss': loss['sup_loss'] + loss['unsup_loss']})
else:
if iter_id == self.ema_start_iters:
logger.info("start semi_supervised_traing")
data_sup_w, data_sup_s, data_unsup_w, data_unsup_s, _ = inputs
if data_sup_w['image'].shape != data_sup_s['image'].shape:
data_sup_w, data_sup_s = align_weak_strong_shape(data_sup_w,
data_sup_s)
for k, v in data_sup_s.items():
if k in ['epoch_id']:
continue
elif k in ['gt_class', 'gt_bbox', 'is_crowd']:
data_sup_s[k].extend(data_sup_w[k])
else:
data_sup_s[k] = paddle.concat([v, data_sup_w[k]])
loss = {}
sup_loss = self.student(data_sup_s)
unsup_loss = {
"unsup_" + k: v * paddle.to_tensor(0)
for k, v in sup_loss.items()
}
sup_loss = {"sup_" + k: v for k, v in sup_loss.items()}
loss.update(**sup_loss)
unsup_loss.update({
'loss': paddle.add_n(
[v * 0 for k, v in sup_loss.items() if 'log' not in k])
})
unsup_loss = {"unsup_" + k: v * 0 for k, v in unsup_loss.items()}
loss.update(**unsup_loss)
loss.update({'loss': loss['sup_loss']})
return loss
def foward_unsup_train(self, data_unsup_w, data_unsup_s):
with paddle.no_grad():
body_feats = self.teacher.backbone(data_unsup_w)
if self.teacher.neck is not None:
body_feats = self.teacher.neck(body_feats, is_teacher=True)
out_transformer = self.teacher.transformer(
body_feats, None, data_unsup_w, is_teacher=True)
preds = self.teacher.detr_head(out_transformer, body_feats)
bbox, bbox_num = self.teacher.post_process_semi(preds)
self.place = body_feats[0].place
proposal_bbox_list = bbox[:, -4:]
proposal_bbox_list = proposal_bbox_list.split(
tuple(np.array(bbox_num)), 0)
proposal_label_list = paddle.cast(bbox[:, :1], np.float32)
proposal_label_list = proposal_label_list.split(
tuple(np.array(bbox_num)), 0)
proposal_score_list = paddle.cast(bbox[:, 1:self.num_classes + 1],
np.float32)
proposal_score_list = proposal_score_list.split(
tuple(np.array(bbox_num)), 0)
proposal_bbox_list = [
paddle.to_tensor(
p, place=self.place) for p in proposal_bbox_list
]
proposal_label_list = [
paddle.to_tensor(
p, place=self.place) for p in proposal_label_list
]
# filter invalid box roughly
if isinstance(self.train_cfg['pseudo_label_initial_score_thr'], float):
thr = self.train_cfg['pseudo_label_initial_score_thr']
else:
# TODO: use dynamic threshold
raise NotImplementedError(
"Dynamic Threshold is not implemented yet.")
proposal_bbox_list, proposal_label_list, proposal_score_list = list(
zip(* [
filter_invalid(
proposal[:, :4],
proposal_label,
proposal_score,
thr=thr,
min_size=self.train_cfg['min_pseduo_box_size'], )
for proposal, proposal_label, proposal_score in
zip(proposal_bbox_list, proposal_label_list,
proposal_score_list)
]))
teacher_bboxes = list(proposal_bbox_list)
teacher_labels = proposal_label_list
teacher_info = [teacher_bboxes, teacher_labels]
student_unsup = data_unsup_s
return self.compute_pseudo_label_loss(student_unsup, teacher_info,
proposal_score_list)
def compute_pseudo_label_loss(self, student_unsup, teacher_info,
proposal_score_list):
pseudo_bboxes = list(teacher_info[0])
pseudo_labels = list(teacher_info[1])
losses = dict()
for i in range(len(pseudo_bboxes)):
if pseudo_labels[i].shape[0] == 0:
pseudo_bboxes[i] = paddle.zeros([0, 4]).numpy()
pseudo_labels[i] = paddle.zeros([0, 1]).numpy()
else:
pseudo_bboxes[i] = pseudo_bboxes[i][:, :4].numpy()
pseudo_labels[i] = pseudo_labels[i].numpy()
for i in range(len(pseudo_bboxes)):
pseudo_labels[i] = paddle.to_tensor(
pseudo_labels[i], dtype=paddle.int32, place=self.place)
pseudo_bboxes[i] = paddle.to_tensor(
pseudo_bboxes[i], dtype=paddle.float32, place=self.place)
student_unsup.update({
'gt_bbox': pseudo_bboxes,
'gt_class': pseudo_labels
})
pseudo_sum = 0
for i in range(len(pseudo_bboxes)):
pseudo_sum += pseudo_bboxes[i].sum()
if pseudo_sum == 0: #input fake data when there are no pseudo labels
pseudo_bboxes[0] = paddle.ones([1, 4]) - 0.5
pseudo_labels[0] = paddle.ones([1, 1]).astype('int32')
student_unsup.update({
'gt_bbox': pseudo_bboxes,
'gt_class': pseudo_labels
})
body_feats = self.student.backbone(student_unsup)
if self.student.neck is not None:
body_feats = self.student.neck(body_feats)
out_transformer = self.student.transformer(body_feats, None,
student_unsup)
losses = self.student.detr_head(out_transformer, body_feats,
student_unsup)
for n, v in losses.items():
losses[n] = v * 0
else:
gt_bbox = []
gt_class = []
images = []
proposal_score = []
for i in range(len(pseudo_bboxes)):
if pseudo_labels[i].shape[0] == 0:
continue
else:
proposal_score.append(proposal_score_list[i].max(-1)
.unsqueeze(-1))
gt_class.append(pseudo_labels[i])
gt_bbox.append(pseudo_bboxes[i])
images.append(student_unsup['image'][i])
images = paddle.stack(images)
student_unsup.update({
'image': images,
'gt_bbox': gt_bbox,
'gt_class': gt_class
})
body_feats = self.student.backbone(student_unsup)
if self.student.neck is not None:
body_feats = self.student.neck(body_feats)
out_transformer = self.student.transformer(body_feats, None,
student_unsup)
student_unsup.update({'gt_score': proposal_score})
losses = self.student.detr_head(out_transformer, body_feats,
student_unsup)
return losses
def box_cxcywh_to_xyxy(x):
x_c, y_c, w, h = x.unbind(-1)
b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (x_c + 0.5 * w), (y_c + 0.5 * h)]
return paddle.stack(b, axis=-1)
def box_xyxy_to_cxcywh(x):
x0, y0, x1, y1 = x.unbind(-1)
b = [(x0 + x1) / 2, (y0 + y1) / 2, (x1 - x0), (y1 - y0)]
return paddle.stack(b, axis=-1)
def get_size_with_aspect_ratio(image_size, size, max_size=None):
w, h = image_size
if max_size is not None:
min_original_size = float(min((w, h)))
max_original_size = float(max((w, h)))
if max_original_size / min_original_size * size > max_size:
size = int(round(max_size * min_original_size / max_original_size))
if (w <= h and w == size) or (h <= w and h == size):
return (w, h)
if w < h:
ow = size
oh = int(size * h / w)
else:
oh = size
ow = int(size * w / h)
return (ow, oh)
def align_weak_strong_shape(data_weak, data_strong):
shape_x = data_strong['image'].shape[2]
shape_y = data_strong['image'].shape[3]
target_size = [shape_x, shape_y]
data_weak['image'] = F.interpolate(
data_weak['image'],
size=target_size,
mode='bilinear',
align_corners=False)
return data_weak, data_strong

View File

@@ -0,0 +1,100 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['FairMOT']
@register
class FairMOT(BaseArch):
"""
FairMOT network, see http://arxiv.org/abs/2004.01888
Args:
detector (object): 'CenterNet' instance
reid (object): 'FairMOTEmbeddingHead' instance
tracker (object): 'JDETracker' instance
loss (object): 'FairMOTLoss' instance
"""
__category__ = 'architecture'
__inject__ = ['loss']
def __init__(self,
detector='CenterNet',
reid='FairMOTEmbeddingHead',
tracker='JDETracker',
loss='FairMOTLoss'):
super(FairMOT, self).__init__()
self.detector = detector
self.reid = reid
self.tracker = tracker
self.loss = loss
@classmethod
def from_config(cls, cfg, *args, **kwargs):
detector = create(cfg['detector'])
detector_out_shape = detector.neck and detector.neck.out_shape or detector.backbone.out_shape
kwargs = {'input_shape': detector_out_shape}
reid = create(cfg['reid'], **kwargs)
loss = create(cfg['loss'])
tracker = create(cfg['tracker'])
return {
'detector': detector,
'reid': reid,
'loss': loss,
'tracker': tracker
}
def _forward(self):
loss = dict()
# det_outs keys:
# train: neck_feat, det_loss, heatmap_loss, size_loss, offset_loss (optional: iou_loss)
# eval/infer: neck_feat, bbox, bbox_inds
det_outs = self.detector(self.inputs)
neck_feat = det_outs['neck_feat']
if self.training:
reid_loss = self.reid(neck_feat, self.inputs)
det_loss = det_outs['det_loss']
loss = self.loss(det_loss, reid_loss)
for k, v in det_outs.items():
if 'loss' not in k:
continue
loss.update({k: v})
loss.update({'reid_loss': reid_loss})
return loss
else:
pred_dets, pred_embs = self.reid(
neck_feat, self.inputs, det_outs['bbox'], det_outs['bbox_inds'],
det_outs['topk_clses'])
return pred_dets, pred_embs
def get_pred(self):
output = self._forward()
return output
def get_loss(self):
loss = self._forward()
return loss

View File

@@ -0,0 +1,167 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
import numpy as np
__all__ = ['FasterRCNN']
@register
class FasterRCNN(BaseArch):
"""
Faster R-CNN network, see https://arxiv.org/abs/1506.01497
Args:
backbone (object): backbone instance
rpn_head (object): `RPNHead` instance
bbox_head (object): `BBoxHead` instance
bbox_post_process (object): `BBoxPostProcess` instance
neck (object): 'FPN' instance
"""
__category__ = 'architecture'
__inject__ = ['bbox_post_process']
def __init__(self,
backbone,
rpn_head,
bbox_head,
bbox_post_process,
neck=None):
super(FasterRCNN, self).__init__()
self.backbone = backbone
self.neck = neck
self.rpn_head = rpn_head
self.bbox_head = bbox_head
self.bbox_post_process = bbox_post_process
def init_cot_head(self, relationship):
self.bbox_head.init_cot_head(relationship)
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = cfg['neck'] and create(cfg['neck'], **kwargs)
out_shape = neck and neck.out_shape or backbone.out_shape
kwargs = {'input_shape': out_shape}
rpn_head = create(cfg['rpn_head'], **kwargs)
bbox_head = create(cfg['bbox_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"rpn_head": rpn_head,
"bbox_head": bbox_head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
if self.neck is not None:
body_feats = self.neck(body_feats)
if self.training:
rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs)
bbox_loss, _ = self.bbox_head(body_feats, rois, rois_num,
self.inputs)
return rpn_loss, bbox_loss
else:
rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
preds, _ = self.bbox_head(body_feats, rois, rois_num, None)
im_shape = self.inputs['im_shape']
scale_factor = self.inputs['scale_factor']
bbox, bbox_num, nms_keep_idx = self.bbox_post_process(
preds, (rois, rois_num), im_shape, scale_factor)
# rescale the prediction back to origin image
bboxes, bbox_pred, bbox_num = self.bbox_post_process.get_pred(
bbox, bbox_num, im_shape, scale_factor)
if self.use_extra_data:
extra_data = {
} # record the bbox output before nms, such like scores and nms_keep_idx
"""extra_data:{
'scores': predict scores,
'nms_keep_idx': bbox index before nms,
}
"""
extra_data['scores'] = preds[1] # predict scores (probability)
# Todo: get logits output
extra_data[
'nms_keep_idx'] = nms_keep_idx # bbox index before nms
return bbox_pred, bbox_num, extra_data
else:
return bbox_pred, bbox_num
def get_loss(self, ):
rpn_loss, bbox_loss = self._forward()
loss = {}
loss.update(rpn_loss)
loss.update(bbox_loss)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
return loss
def get_pred(self):
if self.use_extra_data:
bbox_pred, bbox_num, extra_data = self._forward()
output = {
'bbox': bbox_pred,
'bbox_num': bbox_num,
'extra_data': extra_data
}
else:
bbox_pred, bbox_num = self._forward()
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
return output
def target_bbox_forward(self, data):
body_feats = self.backbone(data)
if self.neck is not None:
body_feats = self.neck(body_feats)
rois = [roi for roi in data['gt_bbox']]
rois_num = paddle.concat([paddle.shape(roi)[0:1] for roi in rois])
preds, _ = self.bbox_head(body_feats, rois, rois_num, None, cot=True)
return preds
def relationship_learning(self, loader, num_classes_novel):
print('computing relationship')
train_labels_list = []
label_list = []
for step_id, data in enumerate(loader):
_, bbox_prob = self.target_bbox_forward(data)
batch_size = data['im_id'].shape[0]
for i in range(batch_size):
num_bbox = data['gt_class'][i].shape[0]
train_labels = data['gt_class'][i]
train_labels_list.append(train_labels.numpy().squeeze(1))
base_labels = bbox_prob.detach().numpy()[:, :-1]
label_list.append(base_labels)
labels = np.concatenate(train_labels_list, 0)
probabilities = np.concatenate(label_list, 0)
N_t = np.max(labels) + 1
conditional = []
for i in range(N_t):
this_class = probabilities[labels == i]
average = np.mean(this_class, axis=0, keepdims=True)
conditional.append(average)
return np.concatenate(conditional)

View File

@@ -0,0 +1,222 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['FCOS', 'ARSL_FCOS']
@register
class FCOS(BaseArch):
"""
FCOS network, see https://arxiv.org/abs/1904.01355
Args:
backbone (object): backbone instance
neck (object): 'FPN' instance
fcos_head (object): 'FCOSHead' instance
ssod_loss (object): 'SSODFCOSLoss' instance, only used for semi-det(ssod) by DenseTeacher
"""
__category__ = 'architecture'
__inject__ = ['ssod_loss']
def __init__(self,
backbone='ResNet',
neck='FPN',
fcos_head='FCOSHead',
ssod_loss='SSODFCOSLoss'):
super(FCOS, self).__init__()
self.backbone = backbone
self.neck = neck
self.fcos_head = fcos_head
# for ssod, semi-det
self.is_teacher = False
self.ssod_loss = ssod_loss
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
kwargs = {'input_shape': neck.out_shape}
fcos_head = create(cfg['fcos_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"fcos_head": fcos_head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
fpn_feats = self.neck(body_feats)
self.is_teacher = self.inputs.get('is_teacher', False)
if self.training or self.is_teacher:
losses = self.fcos_head(fpn_feats, self.inputs)
return losses
else:
fcos_head_outs = self.fcos_head(fpn_feats)
bbox_pred, bbox_num = self.fcos_head.post_process(
fcos_head_outs, self.inputs['scale_factor'])
return {'bbox': bbox_pred, 'bbox_num': bbox_num}
def get_loss(self):
return self._forward()
def get_pred(self):
return self._forward()
def get_loss_keys(self):
return ['loss_cls', 'loss_box', 'loss_quality']
def get_ssod_loss(self, student_head_outs, teacher_head_outs, train_cfg):
ssod_losses = self.ssod_loss(student_head_outs, teacher_head_outs,
train_cfg)
return ssod_losses
@register
class ARSL_FCOS(BaseArch):
"""
FCOS ARSL network, see https://arxiv.org/abs/
Args:
backbone (object): backbone instance
neck (object): 'FPN' instance
fcos_head (object): 'FCOSHead_ARSL' instance
fcos_cr_loss (object): 'FCOSLossCR' instance, only used for semi-det(ssod) by ARSL
"""
__category__ = 'architecture'
__inject__ = ['fcos_cr_loss']
def __init__(self,
backbone,
neck,
fcos_head='FCOSHead_ARSL',
fcos_cr_loss='FCOSLossCR'):
super(ARSL_FCOS, self).__init__()
self.backbone = backbone
self.neck = neck
self.fcos_head = fcos_head
self.fcos_cr_loss = fcos_cr_loss
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
kwargs = {'input_shape': neck.out_shape}
fcos_head = create(cfg['fcos_head'], **kwargs)
# consistency regularization loss
fcos_cr_loss = create(cfg['fcos_cr_loss'])
return {
'backbone': backbone,
'neck': neck,
'fcos_head': fcos_head,
'fcos_cr_loss': fcos_cr_loss,
}
def forward(self, inputs, branch="supervised", teacher_prediction=None):
assert branch in ['supervised', 'semi_supervised'], \
print('In ARSL, type must be supervised or semi_supervised.')
if self.data_format == 'NHWC':
image = inputs['image']
inputs['image'] = paddle.transpose(image, [0, 2, 3, 1])
self.inputs = inputs
if self.training:
if branch == "supervised":
out = self.get_loss()
else:
out = self.get_pseudo_loss(teacher_prediction)
else:
# norm test
if branch == "supervised":
out = self.get_pred()
# predict pseudo labels
else:
out = self.get_pseudo_pred()
return out
# model forward
def model_forward(self):
body_feats = self.backbone(self.inputs)
fpn_feats = self.neck(body_feats)
fcos_head_outs = self.fcos_head(fpn_feats)
return fcos_head_outs
# supervised loss for labeled data
def get_loss(self):
loss = {}
tag_labels, tag_bboxes, tag_centerness = [], [], []
for i in range(len(self.fcos_head.fpn_stride)):
# labels, reg_target, centerness
k_lbl = 'labels{}'.format(i)
if k_lbl in self.inputs:
tag_labels.append(self.inputs[k_lbl])
k_box = 'reg_target{}'.format(i)
if k_box in self.inputs:
tag_bboxes.append(self.inputs[k_box])
k_ctn = 'centerness{}'.format(i)
if k_ctn in self.inputs:
tag_centerness.append(self.inputs[k_ctn])
fcos_head_outs = self.model_forward()
loss_fcos = self.fcos_head.get_loss(fcos_head_outs, tag_labels,
tag_bboxes, tag_centerness)
loss.update(loss_fcos)
return loss
# unsupervised loss for unlabeled data
def get_pseudo_loss(self, teacher_prediction):
loss = {}
fcos_head_outs = self.model_forward()
unsup_loss = self.fcos_cr_loss(fcos_head_outs, teacher_prediction)
for k in unsup_loss.keys():
loss[k + '_pseudo'] = unsup_loss[k]
return loss
# get detection results for test, decode and rescale the results to original size
def get_pred(self):
fcos_head_outs = self.model_forward()
scale_factor = self.inputs['scale_factor']
bbox_pred, bbox_num = self.fcos_head.post_process(fcos_head_outs,
scale_factor)
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
return output
# generate pseudo labels to guide student
def get_pseudo_pred(self):
fcos_head_outs = self.model_forward()
pred_cls, pred_loc, pred_iou = fcos_head_outs[1:] # 0 is locations
for lvl, _ in enumerate(pred_loc):
pred_loc[lvl] = pred_loc[lvl] / self.fcos_head.fpn_stride[lvl]
return [pred_cls, pred_loc, pred_iou, self.fcos_head.fpn_stride]

View File

@@ -0,0 +1,87 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['GFL']
@register
class GFL(BaseArch):
"""
Generalized Focal Loss network, see https://arxiv.org/abs/2006.04388
Args:
backbone (object): backbone instance
neck (object): 'FPN' instance
head (object): 'GFLHead' instance
"""
__category__ = 'architecture'
def __init__(self, backbone, neck, head='GFLHead'):
super(GFL, self).__init__()
self.backbone = backbone
self.neck = neck
self.head = head
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
kwargs = {'input_shape': neck.out_shape}
head = create(cfg['head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"head": head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
fpn_feats = self.neck(body_feats)
head_outs = self.head(fpn_feats)
if not self.training:
im_shape = self.inputs['im_shape']
scale_factor = self.inputs['scale_factor']
bboxes, bbox_num = self.head.post_process(head_outs, im_shape,
scale_factor)
return bboxes, bbox_num
else:
return head_outs
def get_loss(self, ):
loss = {}
head_outs = self._forward()
loss_gfl = self.head.get_loss(head_outs, self.inputs)
loss.update(loss_gfl)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
return loss
def get_pred(self):
bbox_pred, bbox_num = self._forward()
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
return output

View File

@@ -0,0 +1,110 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['JDE']
@register
class JDE(BaseArch):
__category__ = 'architecture'
__shared__ = ['metric']
"""
JDE network, see https://arxiv.org/abs/1909.12605v1
Args:
detector (object): detector model instance
reid (object): reid model instance
tracker (object): tracker instance
metric (str): 'MOTDet' for training and detection evaluation, 'ReID'
for ReID embedding evaluation, or 'MOT' for multi object tracking
evaluation.
"""
def __init__(self,
detector='YOLOv3',
reid='JDEEmbeddingHead',
tracker='JDETracker',
metric='MOT'):
super(JDE, self).__init__()
self.detector = detector
self.reid = reid
self.tracker = tracker
self.metric = metric
@classmethod
def from_config(cls, cfg, *args, **kwargs):
detector = create(cfg['detector'])
kwargs = {'input_shape': detector.neck.out_shape}
reid = create(cfg['reid'], **kwargs)
tracker = create(cfg['tracker'])
return {
"detector": detector,
"reid": reid,
"tracker": tracker,
}
def _forward(self):
det_outs = self.detector(self.inputs)
if self.training:
emb_feats = det_outs['emb_feats']
loss_confs = det_outs['det_losses']['loss_confs']
loss_boxes = det_outs['det_losses']['loss_boxes']
jde_losses = self.reid(
emb_feats,
self.inputs,
loss_confs=loss_confs,
loss_boxes=loss_boxes)
return jde_losses
else:
if self.metric == 'MOTDet':
det_results = {
'bbox': det_outs['bbox'],
'bbox_num': det_outs['bbox_num'],
}
return det_results
elif self.metric == 'MOT':
emb_feats = det_outs['emb_feats']
bboxes = det_outs['bbox']
boxes_idx = det_outs['boxes_idx']
nms_keep_idx = det_outs['nms_keep_idx']
pred_dets, pred_embs = self.reid(
emb_feats,
self.inputs,
bboxes=bboxes,
boxes_idx=boxes_idx,
nms_keep_idx=nms_keep_idx)
return pred_dets, pred_embs
else:
raise ValueError("Unknown metric {} for multi object tracking.".
format(self.metric))
def get_loss(self):
return self._forward()
def get_pred(self):
return self._forward()

View File

@@ -0,0 +1,287 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from scipy.optimize import linear_sum_assignment
from collections import abc, defaultdict
import numpy as np
import paddle
from ppdet.core.workspace import register, create, serializable
from .meta_arch import BaseArch
from .. import layers as L
from ..keypoint_utils import transpred
__all__ = ['HigherHRNet']
@register
class HigherHRNet(BaseArch):
__category__ = 'architecture'
def __init__(self,
backbone='HRNet',
hrhrnet_head='HrHRNetHead',
post_process='HrHRNetPostProcess',
eval_flip=True,
flip_perm=None,
max_num_people=30):
"""
HigherHRNet network, see https://arxiv.org/abs/1908.10357
HigherHRNet+swahr, see https://arxiv.org/abs/2012.15175
Args:
backbone (nn.Layer): backbone instance
hrhrnet_head (nn.Layer): keypoint_head instance
bbox_post_process (object): `BBoxPostProcess` instance
"""
super(HigherHRNet, self).__init__()
self.backbone = backbone
self.hrhrnet_head = hrhrnet_head
self.post_process = post_process
self.flip = eval_flip
self.flip_perm = paddle.to_tensor(flip_perm)
self.deploy = False
self.interpolate = L.Upsample(2, mode='bilinear')
self.pool = L.MaxPool(5, 1, 2)
self.max_num_people = max_num_people
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
# head
kwargs = {'input_shape': backbone.out_shape}
hrhrnet_head = create(cfg['hrhrnet_head'], **kwargs)
post_process = create(cfg['post_process'])
return {
'backbone': backbone,
"hrhrnet_head": hrhrnet_head,
"post_process": post_process,
}
def _forward(self):
if self.flip and not self.training and not self.deploy:
self.inputs['image'] = paddle.concat(
(self.inputs['image'], paddle.flip(self.inputs['image'], [3])))
body_feats = self.backbone(self.inputs)
if self.training:
return self.hrhrnet_head(body_feats, self.inputs)
else:
outputs = self.hrhrnet_head(body_feats)
if self.flip and not self.deploy:
outputs = [paddle.split(o, 2) for o in outputs]
output_rflip = [
paddle.flip(paddle.gather(o[1], self.flip_perm, 1), [3])
for o in outputs
]
output1 = [o[0] for o in outputs]
heatmap = (output1[0] + output_rflip[0]) / 2.
tagmaps = [output1[1], output_rflip[1]]
outputs = [heatmap] + tagmaps
outputs = self.get_topk(outputs)
if self.deploy:
return outputs
res_lst = []
h = self.inputs['im_shape'][0, 0].numpy().item()
w = self.inputs['im_shape'][0, 1].numpy().item()
kpts, scores = self.post_process(*outputs, h, w)
res_lst.append([kpts, scores])
return res_lst
def get_loss(self):
return self._forward()
def get_pred(self):
outputs = {}
res_lst = self._forward()
outputs['keypoint'] = res_lst
return outputs
def get_topk(self, outputs):
# resize to image size
outputs = [self.interpolate(x) for x in outputs]
if len(outputs) == 3:
tagmap = paddle.concat(
(outputs[1].unsqueeze(4), outputs[2].unsqueeze(4)), axis=4)
else:
tagmap = outputs[1].unsqueeze(4)
heatmap = outputs[0]
N, J = 1, self.hrhrnet_head.num_joints
heatmap_maxpool = self.pool(heatmap)
# topk
maxmap = heatmap * (heatmap == heatmap_maxpool)
maxmap = maxmap.reshape([N, J, -1])
heat_k, inds_k = maxmap.topk(self.max_num_people, axis=2)
outputs = [heatmap, tagmap, heat_k, inds_k]
return outputs
@register
@serializable
class HrHRNetPostProcess(object):
'''
HrHRNet postprocess contain:
1) get topk keypoints in the output heatmap
2) sample the tagmap's value corresponding to each of the topk coordinate
3) match different joints to combine to some people with Hungary algorithm
4) adjust the coordinate by +-0.25 to decrease error std
5) salvage missing joints by check positivity of heatmap - tagdiff_norm
Args:
max_num_people (int): max number of people support in postprocess
heat_thresh (float): value of topk below this threshhold will be ignored
tag_thresh (float): coord's value sampled in tagmap below this threshold belong to same people for init
inputs(list[heatmap]): the output list of model, [heatmap, heatmap_maxpool, tagmap], heatmap_maxpool used to get topk
original_height, original_width (float): the original image size
'''
def __init__(self, max_num_people=30, heat_thresh=0.1, tag_thresh=1.):
self.max_num_people = max_num_people
self.heat_thresh = heat_thresh
self.tag_thresh = tag_thresh
def lerp(self, j, y, x, heatmap):
H, W = heatmap.shape[-2:]
left = np.clip(x - 1, 0, W - 1)
right = np.clip(x + 1, 0, W - 1)
up = np.clip(y - 1, 0, H - 1)
down = np.clip(y + 1, 0, H - 1)
offset_y = np.where(heatmap[j, down, x] > heatmap[j, up, x], 0.25,
-0.25)
offset_x = np.where(heatmap[j, y, right] > heatmap[j, y, left], 0.25,
-0.25)
return offset_y + 0.5, offset_x + 0.5
def __call__(self, heatmap, tagmap, heat_k, inds_k, original_height,
original_width):
N, J, H, W = heatmap.shape
assert N == 1, "only support batch size 1"
heatmap = heatmap[0].cpu().detach().numpy()
tagmap = tagmap[0].cpu().detach().numpy()
heats = heat_k[0].cpu().detach().numpy()
inds_np = inds_k[0].cpu().detach().numpy()
y = inds_np // W
x = inds_np % W
tags = tagmap[np.arange(J)[None, :].repeat(self.max_num_people),
y.flatten(), x.flatten()].reshape(J, -1, tagmap.shape[-1])
coords = np.stack((y, x), axis=2)
# threshold
mask = heats > self.heat_thresh
# cluster
cluster = defaultdict(lambda: {
'coords': np.zeros((J, 2), dtype=np.float32),
'scores': np.zeros(J, dtype=np.float32),
'tags': []
})
for jid, m in enumerate(mask):
num_valid = m.sum()
if num_valid == 0:
continue
valid_inds = np.where(m)[0]
valid_tags = tags[jid, m, :]
if len(cluster) == 0: # initialize
for i in valid_inds:
tag = tags[jid, i]
key = tag[0]
cluster[key]['tags'].append(tag)
cluster[key]['scores'][jid] = heats[jid, i]
cluster[key]['coords'][jid] = coords[jid, i]
continue
candidates = list(cluster.keys())[:self.max_num_people]
centroids = [
np.mean(
cluster[k]['tags'], axis=0) for k in candidates
]
num_clusters = len(centroids)
# shape is (num_valid, num_clusters, tag_dim)
dist = valid_tags[:, None, :] - np.array(centroids)[None, ...]
l2_dist = np.linalg.norm(dist, ord=2, axis=2)
# modulate dist with heat value, see `use_detection_val`
cost = np.round(l2_dist) * 100 - heats[jid, m, None]
# pad the cost matrix, otherwise new pose are ignored
if num_valid > num_clusters:
cost = np.pad(cost, ((0, 0), (0, num_valid - num_clusters)),
'constant',
constant_values=((0, 0), (0, 1e-10)))
rows, cols = linear_sum_assignment(cost)
for y, x in zip(rows, cols):
tag = tags[jid, y]
if y < num_valid and x < num_clusters and \
l2_dist[y, x] < self.tag_thresh:
key = candidates[x] # merge to cluster
else:
key = tag[0] # initialize new cluster
cluster[key]['tags'].append(tag)
cluster[key]['scores'][jid] = heats[jid, y]
cluster[key]['coords'][jid] = coords[jid, y]
# shape is [k, J, 2] and [k, J]
pose_tags = np.array([cluster[k]['tags'] for k in cluster])
pose_coords = np.array([cluster[k]['coords'] for k in cluster])
pose_scores = np.array([cluster[k]['scores'] for k in cluster])
valid = pose_scores > 0
pose_kpts = np.zeros((pose_scores.shape[0], J, 3), dtype=np.float32)
if valid.sum() == 0:
return pose_kpts, pose_kpts
# refine coords
valid_coords = pose_coords[valid].astype(np.int32)
y = valid_coords[..., 0].flatten()
x = valid_coords[..., 1].flatten()
_, j = np.nonzero(valid)
offsets = self.lerp(j, y, x, heatmap)
pose_coords[valid, 0] += offsets[0]
pose_coords[valid, 1] += offsets[1]
# mean score before salvage
mean_score = pose_scores.mean(axis=1)
pose_kpts[valid, 2] = pose_scores[valid]
# salvage missing joints
if True:
for pid, coords in enumerate(pose_coords):
tag_mean = np.array(pose_tags[pid]).mean(axis=0)
norm = np.sum((tagmap - tag_mean)**2, axis=3)**0.5
score = heatmap - np.round(norm) # (J, H, W)
flat_score = score.reshape(J, -1)
max_inds = np.argmax(flat_score, axis=1)
max_scores = np.max(flat_score, axis=1)
salvage_joints = (pose_scores[pid] == 0) & (max_scores > 0)
if salvage_joints.sum() == 0:
continue
y = max_inds[salvage_joints] // W
x = max_inds[salvage_joints] % W
offsets = self.lerp(salvage_joints.nonzero()[0], y, x, heatmap)
y = y.astype(np.float32) + offsets[0]
x = x.astype(np.float32) + offsets[1]
pose_coords[pid][salvage_joints, 0] = y
pose_coords[pid][salvage_joints, 1] = x
pose_kpts[pid][salvage_joints, 2] = max_scores[salvage_joints]
pose_kpts[..., :2] = transpred(pose_coords[..., :2][..., ::-1],
original_height, original_width,
min(H, W))
return pose_kpts, mean_score

View File

@@ -0,0 +1,468 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import numpy as np
import math
import cv2
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
from ..keypoint_utils import transform_preds
from .. import layers as L
from paddle.nn import functional as F
__all__ = ['TopDownHRNet', 'TinyPose3DHRNet', 'TinyPose3DHRHeatmapNet']
@register
class TopDownHRNet(BaseArch):
__category__ = 'architecture'
__inject__ = ['loss']
def __init__(self,
width,
num_joints,
backbone='HRNet',
loss='KeyPointMSELoss',
post_process='HRNetPostProcess',
flip_perm=None,
flip=True,
shift_heatmap=True,
use_dark=True):
"""
HRNet network, see https://arxiv.org/abs/1902.09212
Args:
backbone (nn.Layer): backbone instance
post_process (object): `HRNetPostProcess` instance
flip_perm (list): The left-right joints exchange order list
use_dark(bool): Whether to use DARK in post processing
"""
super(TopDownHRNet, self).__init__()
self.backbone = backbone
self.post_process = HRNetPostProcess(use_dark)
self.loss = loss
self.flip_perm = flip_perm
self.flip = flip
self.final_conv = L.Conv2d(width, num_joints, 1, 1, 0, bias=True)
self.shift_heatmap = shift_heatmap
self.deploy = False
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
return {'backbone': backbone, }
def _forward(self):
feats = self.backbone(self.inputs)
hrnet_outputs = self.final_conv(feats[0])
if self.training:
return self.loss(hrnet_outputs, self.inputs)
elif self.deploy:
outshape = hrnet_outputs.shape
max_idx = paddle.argmax(
hrnet_outputs.reshape(
(outshape[0], outshape[1], outshape[2] * outshape[3])),
axis=-1)
return hrnet_outputs, max_idx
else:
if self.flip:
self.inputs['image'] = self.inputs['image'].flip([3])
feats = self.backbone(self.inputs)
output_flipped = self.final_conv(feats[0])
output_flipped = self.flip_back(output_flipped.numpy(),
self.flip_perm)
output_flipped = paddle.to_tensor(output_flipped.copy())
if self.shift_heatmap:
output_flipped[:, :, :, 1:] = output_flipped.clone(
)[:, :, :, 0:-1]
hrnet_outputs = (hrnet_outputs + output_flipped) * 0.5
imshape = (self.inputs['im_shape'].numpy()
)[:, ::-1] if 'im_shape' in self.inputs else None
center = self.inputs['center'].numpy(
) if 'center' in self.inputs else np.round(imshape / 2.)
scale = self.inputs['scale'].numpy(
) if 'scale' in self.inputs else imshape / 200.
outputs = self.post_process(hrnet_outputs, center, scale)
return outputs
def get_loss(self):
return self._forward()
def get_pred(self):
res_lst = self._forward()
outputs = {'keypoint': res_lst}
return outputs
def flip_back(self, output_flipped, matched_parts):
assert output_flipped.ndim == 4,\
'output_flipped should be [batch_size, num_joints, height, width]'
output_flipped = output_flipped[:, :, :, ::-1]
for pair in matched_parts:
tmp = output_flipped[:, pair[0], :, :].copy()
output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
output_flipped[:, pair[1], :, :] = tmp
return output_flipped
class HRNetPostProcess(object):
def __init__(self, use_dark=True):
self.use_dark = use_dark
def get_max_preds(self, heatmaps):
'''get predictions from score maps
Args:
heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
Returns:
preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints
'''
assert isinstance(heatmaps,
np.ndarray), 'heatmaps should be numpy.ndarray'
assert heatmaps.ndim == 4, 'batch_images should be 4-ndim'
batch_size = heatmaps.shape[0]
num_joints = heatmaps.shape[1]
width = heatmaps.shape[3]
heatmaps_reshaped = heatmaps.reshape((batch_size, num_joints, -1))
idx = np.argmax(heatmaps_reshaped, 2)
maxvals = np.amax(heatmaps_reshaped, 2)
maxvals = maxvals.reshape((batch_size, num_joints, 1))
idx = idx.reshape((batch_size, num_joints, 1))
preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
preds[:, :, 0] = (preds[:, :, 0]) % width
preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
pred_mask = pred_mask.astype(np.float32)
preds *= pred_mask
return preds, maxvals
def gaussian_blur(self, heatmap, kernel):
border = (kernel - 1) // 2
batch_size = heatmap.shape[0]
num_joints = heatmap.shape[1]
height = heatmap.shape[2]
width = heatmap.shape[3]
for i in range(batch_size):
for j in range(num_joints):
origin_max = np.max(heatmap[i, j])
dr = np.zeros((height + 2 * border, width + 2 * border))
dr[border:-border, border:-border] = heatmap[i, j].copy()
dr = cv2.GaussianBlur(dr, (kernel, kernel), 0)
heatmap[i, j] = dr[border:-border, border:-border].copy()
heatmap[i, j] *= origin_max / np.max(heatmap[i, j])
return heatmap
def dark_parse(self, hm, coord):
heatmap_height = hm.shape[0]
heatmap_width = hm.shape[1]
px = int(coord[0])
py = int(coord[1])
if 1 < px < heatmap_width - 2 and 1 < py < heatmap_height - 2:
dx = 0.5 * (hm[py][px + 1] - hm[py][px - 1])
dy = 0.5 * (hm[py + 1][px] - hm[py - 1][px])
dxx = 0.25 * (hm[py][px + 2] - 2 * hm[py][px] + hm[py][px - 2])
dxy = 0.25 * (hm[py+1][px+1] - hm[py-1][px+1] - hm[py+1][px-1] \
+ hm[py-1][px-1])
dyy = 0.25 * (
hm[py + 2 * 1][px] - 2 * hm[py][px] + hm[py - 2 * 1][px])
derivative = np.matrix([[dx], [dy]])
hessian = np.matrix([[dxx, dxy], [dxy, dyy]])
if dxx * dyy - dxy**2 != 0:
hessianinv = hessian.I
offset = -hessianinv * derivative
offset = np.squeeze(np.array(offset.T), axis=0)
coord += offset
return coord
def dark_postprocess(self, hm, coords, kernelsize):
'''DARK postpocessing, Zhang et al. Distribution-Aware Coordinate
Representation for Human Pose Estimation (CVPR 2020).
'''
hm = self.gaussian_blur(hm, kernelsize)
hm = np.maximum(hm, 1e-10)
hm = np.log(hm)
for n in range(coords.shape[0]):
for p in range(coords.shape[1]):
coords[n, p] = self.dark_parse(hm[n][p], coords[n][p])
return coords
def get_final_preds(self, heatmaps, center, scale, kernelsize=3):
"""the highest heatvalue location with a quarter offset in the
direction from the highest response to the second highest response.
Args:
heatmaps (numpy.ndarray): The predicted heatmaps
center (numpy.ndarray): The boxes center
scale (numpy.ndarray): The scale factor
Returns:
preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints
"""
coords, maxvals = self.get_max_preds(heatmaps)
heatmap_height = heatmaps.shape[2]
heatmap_width = heatmaps.shape[3]
if self.use_dark:
coords = self.dark_postprocess(heatmaps, coords, kernelsize)
else:
for n in range(coords.shape[0]):
for p in range(coords.shape[1]):
hm = heatmaps[n][p]
px = int(math.floor(coords[n][p][0] + 0.5))
py = int(math.floor(coords[n][p][1] + 0.5))
if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1:
diff = np.array([
hm[py][px + 1] - hm[py][px - 1],
hm[py + 1][px] - hm[py - 1][px]
])
coords[n][p] += np.sign(diff) * .25
preds = coords.copy()
# Transform back
for i in range(coords.shape[0]):
preds[i] = transform_preds(coords[i], center[i], scale[i],
[heatmap_width, heatmap_height])
return preds, maxvals
def __call__(self, output, center, scale):
preds, maxvals = self.get_final_preds(output.numpy(), center, scale)
outputs = [[
np.concatenate(
(preds, maxvals), axis=-1), np.mean(
maxvals, axis=1)
]]
return outputs
class TinyPose3DPostProcess(object):
def __init__(self):
pass
def __call__(self, output, center, scale):
"""
Args:
output (numpy.ndarray): numpy.ndarray([batch_size, num_joints, 3]), keypoints coords
scale (numpy.ndarray): The scale factor
Returns:
preds: numpy.ndarray([batch_size, num_joints, 3]), keypoints coords
"""
preds = output.numpy().copy()
# Transform back
for i in range(output.shape[0]): # batch_size
preds[i][:, 0] = preds[i][:, 0] * scale[i][0]
preds[i][:, 1] = preds[i][:, 1] * scale[i][1]
return preds
def soft_argmax(heatmaps, joint_num):
dims = heatmaps.shape
depth_dim = (int)(dims[1] / joint_num)
heatmaps = heatmaps.reshape((-1, joint_num, depth_dim * dims[2] * dims[3]))
heatmaps = F.softmax(heatmaps, 2)
heatmaps = heatmaps.reshape((-1, joint_num, depth_dim, dims[2], dims[3]))
accu_x = heatmaps.sum(axis=(2, 3))
accu_y = heatmaps.sum(axis=(2, 4))
accu_z = heatmaps.sum(axis=(3, 4))
accu_x = accu_x * paddle.arange(1, 33)
accu_y = accu_y * paddle.arange(1, 33)
accu_z = accu_z * paddle.arange(1, 33)
accu_x = accu_x.sum(axis=2, keepdim=True) - 1
accu_y = accu_y.sum(axis=2, keepdim=True) - 1
accu_z = accu_z.sum(axis=2, keepdim=True) - 1
coord_out = paddle.concat(
(accu_x, accu_y, accu_z), axis=2) # [batch_size, joint_num, 3]
return coord_out
@register
class TinyPose3DHRHeatmapNet(BaseArch):
__category__ = 'architecture'
__inject__ = ['loss']
def __init__(
self,
width, # 40, backbone输出的channel数目
num_joints,
backbone='HRNet',
loss='KeyPointRegressionMSELoss',
post_process=TinyPose3DPostProcess):
"""
Args:
backbone (nn.Layer): backbone instance
post_process (object): post process instance
"""
super(TinyPose3DHRHeatmapNet, self).__init__()
self.backbone = backbone
self.post_process = TinyPose3DPostProcess()
self.loss = loss
self.deploy = False
self.num_joints = num_joints
self.final_conv = L.Conv2d(width, num_joints * 32, 1, 1, 0, bias=True)
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
return {'backbone': backbone, }
def _forward(self):
feats = self.backbone(self.inputs) # feats:[[batch_size, 40, 32, 24]]
hrnet_outputs = self.final_conv(feats[0])
res = soft_argmax(hrnet_outputs, self.num_joints)
return res
def get_loss(self):
pose3d = self._forward()
loss = self.loss(pose3d, None, self.inputs)
outputs = {'loss': loss}
return outputs
def get_pred(self):
res_lst = self._forward()
outputs = {'pose3d': res_lst}
return outputs
def flip_back(self, output_flipped, matched_parts):
assert output_flipped.ndim == 4,\
'output_flipped should be [batch_size, num_joints, height, width]'
output_flipped = output_flipped[:, :, :, ::-1]
for pair in matched_parts:
tmp = output_flipped[:, pair[0], :, :].copy()
output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
output_flipped[:, pair[1], :, :] = tmp
return output_flipped
@register
class TinyPose3DHRNet(BaseArch):
__category__ = 'architecture'
__inject__ = ['loss']
def __init__(self,
width,
num_joints,
fc_channel=768,
backbone='HRNet',
loss='KeyPointRegressionMSELoss',
post_process=TinyPose3DPostProcess):
"""
Args:
backbone (nn.Layer): backbone instance
post_process (object): post process instance
"""
super(TinyPose3DHRNet, self).__init__()
self.backbone = backbone
self.post_process = TinyPose3DPostProcess()
self.loss = loss
self.deploy = False
self.num_joints = num_joints
self.final_conv = L.Conv2d(width, num_joints, 1, 1, 0, bias=True)
self.flatten = paddle.nn.Flatten(start_axis=2, stop_axis=3)
self.fc1 = paddle.nn.Linear(fc_channel, 256)
self.act1 = paddle.nn.ReLU()
self.fc2 = paddle.nn.Linear(256, 64)
self.act2 = paddle.nn.ReLU()
self.fc3 = paddle.nn.Linear(64, 3)
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
return {'backbone': backbone, }
def _forward(self):
'''
self.inputs is a dict
'''
feats = self.backbone(
self.inputs) # feats:[[batch_size, 40, width/4, height/4]]
hrnet_outputs = self.final_conv(
feats[0]) # hrnet_outputs: [batch_size, num_joints*32,32,32]
flatten_res = self.flatten(
hrnet_outputs) # [batch_size,num_joints*32,32*32]
res = self.fc1(flatten_res)
res = self.act1(res)
res = self.fc2(res)
res = self.act2(res)
res = self.fc3(res)
if self.training:
return self.loss(res, self.inputs)
else: # export model need
return res
def get_loss(self):
return self._forward()
def get_pred(self):
res_lst = self._forward()
outputs = {'pose3d': res_lst}
return outputs
def flip_back(self, output_flipped, matched_parts):
assert output_flipped.ndim == 4,\
'output_flipped should be [batch_size, num_joints, height, width]'
output_flipped = output_flipped[:, :, :, ::-1]
for pair in matched_parts:
tmp = output_flipped[:, pair[0], :, :].copy()
output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
output_flipped[:, pair[1], :, :] = tmp
return output_flipped

View File

@@ -0,0 +1,217 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
this code is base on https://github.com/hikvision-research/opera/blob/main/opera/models/detectors/petr.py
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from ppdet.core.workspace import register
from .meta_arch import BaseArch
from .. import layers as L
__all__ = ['PETR']
@register
class PETR(BaseArch):
__category__ = 'architecture'
__inject__ = ['backbone', 'neck', 'bbox_head']
def __init__(self,
backbone='ResNet',
neck='ChannelMapper',
bbox_head='PETRHead'):
"""
PETR, see https://openaccess.thecvf.com/content/CVPR2022/papers/Shi_End-to-End_Multi-Person_Pose_Estimation_With_Transformers_CVPR_2022_paper.pdf
Args:
backbone (nn.Layer): backbone instance
neck (nn.Layer): neck between backbone and head
bbox_head (nn.Layer): model output and loss
"""
super(PETR, self).__init__()
self.backbone = backbone
if neck is not None:
self.with_neck = True
self.neck = neck
self.bbox_head = bbox_head
self.deploy = False
def extract_feat(self, img):
"""Directly extract features from the backbone+neck."""
x = self.backbone(img)
if self.with_neck:
x = self.neck(x)
return x
def get_inputs(self):
img_metas = []
gt_bboxes = []
gt_labels = []
gt_keypoints = []
gt_areas = []
pad_gt_mask = self.inputs['pad_gt_mask'].astype("bool").squeeze(-1)
for idx, im_shape in enumerate(self.inputs['im_shape']):
img_meta = {
'img_shape': im_shape.astype("int32").tolist() + [1, ],
'batch_input_shape': self.inputs['image'].shape[-2:],
'image_name': self.inputs['image_file'][idx]
}
img_metas.append(img_meta)
if (not pad_gt_mask[idx].any()):
gt_keypoints.append(self.inputs['gt_joints'][idx][:1])
gt_labels.append(self.inputs['gt_class'][idx][:1])
gt_bboxes.append(self.inputs['gt_bbox'][idx][:1])
gt_areas.append(self.inputs['gt_areas'][idx][:1])
continue
gt_keypoints.append(self.inputs['gt_joints'][idx][pad_gt_mask[idx]])
gt_labels.append(self.inputs['gt_class'][idx][pad_gt_mask[idx]])
gt_bboxes.append(self.inputs['gt_bbox'][idx][pad_gt_mask[idx]])
gt_areas.append(self.inputs['gt_areas'][idx][pad_gt_mask[idx]])
return img_metas, gt_bboxes, gt_labels, gt_keypoints, gt_areas
def get_loss(self):
"""
Args:
img (Tensor): Input images of shape (N, C, H, W).
Typically these should be mean centered and std scaled.
img_metas (list[dict]): A List of image info dict where each dict
has: 'img_shape', 'scale_factor', 'flip', and may also contain
'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
For details on the values of these keys see
:class:`mmdet.datasets.pipelines.Collect`.
gt_bboxes (list[Tensor]): Each item are the truth boxes for each
image in [tl_x, tl_y, br_x, br_y] format.
gt_labels (list[Tensor]): Class indices corresponding to each box.
gt_keypoints (list[Tensor]): Each item are the truth keypoints for
each image in [p^{1}_x, p^{1}_y, p^{1}_v, ..., p^{K}_x,
p^{K}_y, p^{K}_v] format.
gt_areas (list[Tensor]): mask areas corresponding to each box.
gt_bboxes_ignore (None | list[Tensor]): Specify which bounding
boxes can be ignored when computing the loss.
Returns:
dict[str, Tensor]: A dictionary of loss components.
"""
img_metas, gt_bboxes, gt_labels, gt_keypoints, gt_areas = self.get_inputs(
)
gt_bboxes_ignore = getattr(self.inputs, 'gt_bboxes_ignore', None)
x = self.extract_feat(self.inputs)
losses = self.bbox_head.forward_train(x, img_metas, gt_bboxes,
gt_labels, gt_keypoints, gt_areas,
gt_bboxes_ignore)
loss = 0
for k, v in losses.items():
loss += v
losses['loss'] = loss
return losses
def get_pred_numpy(self):
"""Used for computing network flops.
"""
img = self.inputs['image']
batch_size, _, height, width = img.shape
dummy_img_metas = [
dict(
batch_input_shape=(height, width),
img_shape=(height, width, 3),
scale_factor=(1., 1., 1., 1.)) for _ in range(batch_size)
]
x = self.extract_feat(img)
outs = self.bbox_head(x, img_metas=dummy_img_metas)
bbox_list = self.bbox_head.get_bboxes(
*outs, dummy_img_metas, rescale=True)
return bbox_list
def get_pred(self):
"""
"""
img = self.inputs['image']
batch_size, _, height, width = img.shape
img_metas = [
dict(
batch_input_shape=(height, width),
img_shape=(height, width, 3),
scale_factor=self.inputs['scale_factor'][i])
for i in range(batch_size)
]
kptpred = self.simple_test(
self.inputs, img_metas=img_metas, rescale=True)
keypoints = kptpred[0][1][0]
bboxs = kptpred[0][0][0]
keypoints[..., 2] = bboxs[:, None, 4]
res_lst = [[keypoints, bboxs[:, 4]]]
outputs = {'keypoint': res_lst}
return outputs
def simple_test(self, inputs, img_metas, rescale=False):
"""Test function without test time augmentation.
Args:
inputs (list[paddle.Tensor]): List of multiple images.
img_metas (list[dict]): List of image information.
rescale (bool, optional): Whether to rescale the results.
Defaults to False.
Returns:
list[list[np.ndarray]]: BBox and keypoint results of each image
and classes. The outer list corresponds to each image.
The inner list corresponds to each class.
"""
batch_size = len(img_metas)
assert batch_size == 1, 'Currently only batch_size 1 for inference ' \
f'mode is supported. Found batch_size {batch_size}.'
feat = self.extract_feat(inputs)
results_list = self.bbox_head.simple_test(
feat, img_metas, rescale=rescale)
bbox_kpt_results = [
self.bbox_kpt2result(det_bboxes, det_labels, det_kpts,
self.bbox_head.num_classes)
for det_bboxes, det_labels, det_kpts in results_list
]
return bbox_kpt_results
def bbox_kpt2result(self, bboxes, labels, kpts, num_classes):
"""Convert detection results to a list of numpy arrays.
Args:
bboxes (paddle.Tensor | np.ndarray): shape (n, 5).
labels (paddle.Tensor | np.ndarray): shape (n, ).
kpts (paddle.Tensor | np.ndarray): shape (n, K, 3).
num_classes (int): class number, including background class.
Returns:
list(ndarray): bbox and keypoint results of each class.
"""
if bboxes.shape[0] == 0:
return [np.zeros((0, 5), dtype=np.float32) for i in range(num_classes)], \
[np.zeros((0, kpts.size(1), 3), dtype=np.float32)
for i in range(num_classes)]
else:
if isinstance(bboxes, paddle.Tensor):
bboxes = bboxes.numpy()
labels = labels.numpy()
kpts = kpts.numpy()
return [bboxes[labels == i, :] for i in range(num_classes)], \
[kpts[labels == i, :, :] for i in range(num_classes)]

View File

@@ -0,0 +1,317 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import numpy as np
import math
import cv2
from ppdet.core.workspace import register, create, serializable
from .meta_arch import BaseArch
from ..keypoint_utils import transform_preds
from .. import layers as L
__all__ = ['VitPose_TopDown', 'VitPosePostProcess']
@register
class VitPose_TopDown(BaseArch):
__category__ = 'architecture'
__inject__ = ['loss']
def __init__(self, backbone, head, loss, post_process, flip_test):
"""
VitPose network, see https://arxiv.org/pdf/2204.12484v2.pdf
Args:
backbone (nn.Layer): backbone instance
post_process (object): `HRNetPostProcess` instance
"""
super(VitPose_TopDown, self).__init__()
self.backbone = backbone
self.head = head
self.loss = loss
self.post_process = post_process
self.flip_test = flip_test
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
#head
head = create(cfg['head'])
#post_process
post_process = create(cfg['post_process'])
return {
'backbone': backbone,
'head': head,
'post_process': post_process
}
def _forward_train(self):
feats = self.backbone.forward_features(self.inputs['image'])
vitpost_output = self.head(feats)
return self.loss(vitpost_output, self.inputs)
def _forward_test(self):
feats = self.backbone.forward_features(self.inputs['image'])
output_heatmap = self.head(feats)
if self.flip_test:
img_flipped = self.inputs['image'].flip(3)
features_flipped = self.backbone.forward_features(img_flipped)
output_flipped_heatmap = self.head.inference_model(features_flipped,
self.flip_test)
output_heatmap = (output_heatmap + output_flipped_heatmap) * 0.5
imshape = (self.inputs['im_shape'].numpy()
)[:, ::-1] if 'im_shape' in self.inputs else None
center = self.inputs['center'].numpy(
) if 'center' in self.inputs else np.round(imshape / 2.)
scale = self.inputs['scale'].numpy(
) if 'scale' in self.inputs else imshape / 200.
result = self.post_process(output_heatmap.cpu().numpy(), center, scale)
return result
def get_loss(self):
return self._forward_train()
def get_pred(self):
res_lst = self._forward_test()
outputs = {'keypoint': res_lst}
return outputs
@register
@serializable
class VitPosePostProcess(object):
def __init__(self, use_dark=False):
self.use_dark = use_dark
def get_max_preds(self, heatmaps):
'''get predictions from score maps
Args:
heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
Returns:
preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints
'''
assert isinstance(heatmaps,
np.ndarray), 'heatmaps should be numpy.ndarray'
assert heatmaps.ndim == 4, 'batch_images should be 4-ndim'
batch_size = heatmaps.shape[0]
num_joints = heatmaps.shape[1]
width = heatmaps.shape[3]
heatmaps_reshaped = heatmaps.reshape((batch_size, num_joints, -1))
idx = np.argmax(heatmaps_reshaped, 2)
maxvals = np.amax(heatmaps_reshaped, 2)
maxvals = maxvals.reshape((batch_size, num_joints, 1))
idx = idx.reshape((batch_size, num_joints, 1))
preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
preds[:, :, 0] = (preds[:, :, 0]) % width
preds[:, :, 1] = np.floor((preds[:, :, 1]) // width)
pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
pred_mask = pred_mask.astype(np.float32)
preds *= pred_mask
return preds, maxvals
def post_datk_udp(self, coords, batch_heatmaps, kernel=3):
"""DARK post-pocessing. Implemented by udp. Paper ref: Huang et al. The
Devil is in the Details: Delving into Unbiased Data Processing for Human
Pose Estimation (CVPR 2020). Zhang et al. Distribution-Aware Coordinate
Representation for Human Pose Estimation (CVPR 2020).
Note:
- batch size: B
- num keypoints: K
- num persons: N
- height of heatmaps: H
- width of heatmaps: W
B=1 for bottom_up paradigm where all persons share the same heatmap.
B=N for top_down paradigm where each person has its own heatmaps.
Args:
coords (np.ndarray[N, K, 2]): Initial coordinates of human pose.
batch_heatmaps (np.ndarray[B, K, H, W]): batch_heatmaps
kernel (int): Gaussian kernel size (K) for modulation.
Returns:
np.ndarray([N, K, 2]): Refined coordinates.
"""
if not isinstance(batch_heatmaps, np.ndarray):
batch_heatmaps = batch_heatmaps.cpu().numpy()
B, K, H, W = batch_heatmaps.shape
N = coords.shape[0]
assert (B == 1 or B == N)
for heatmaps in batch_heatmaps:
for heatmap in heatmaps:
cv2.GaussianBlur(heatmap, (kernel, kernel), 0, heatmap)
np.clip(batch_heatmaps, 0.001, 50, batch_heatmaps)
np.log(batch_heatmaps, batch_heatmaps)
batch_heatmaps_pad = np.pad(batch_heatmaps, ((0, 0), (0, 0), (1, 1),
(1, 1)),
mode='edge').flatten()
index = coords[..., 0] + 1 + (coords[..., 1] + 1) * (W + 2)
index += (W + 2) * (H + 2) * np.arange(0, B * K).reshape(-1, K)
index = index.astype(int).reshape(-1, 1)
i_ = batch_heatmaps_pad[index]
ix1 = batch_heatmaps_pad[index + 1]
iy1 = batch_heatmaps_pad[index + W + 2]
ix1y1 = batch_heatmaps_pad[index + W + 3]
ix1_y1_ = batch_heatmaps_pad[index - W - 3]
ix1_ = batch_heatmaps_pad[index - 1]
iy1_ = batch_heatmaps_pad[index - 2 - W]
dx = 0.5 * (ix1 - ix1_)
dy = 0.5 * (iy1 - iy1_)
derivative = np.concatenate([dx, dy], axis=1)
derivative = derivative.reshape(N, K, 2, 1)
dxx = ix1 - 2 * i_ + ix1_
dyy = iy1 - 2 * i_ + iy1_
dxy = 0.5 * (ix1y1 - ix1 - iy1 + i_ + i_ - ix1_ - iy1_ + ix1_y1_)
hessian = np.concatenate([dxx, dxy, dxy, dyy], axis=1)
hessian = hessian.reshape(N, K, 2, 2)
hessian = np.linalg.inv(hessian + np.finfo(np.float32).eps * np.eye(2))
coords -= np.einsum('ijmn,ijnk->ijmk', hessian, derivative).squeeze()
return coords
def transform_preds_udp(self,
coords,
center,
scale,
output_size,
use_udp=True):
"""Get final keypoint predictions from heatmaps and apply scaling and
translation to map them back to the image.
Note:
num_keypoints: K
Args:
coords (np.ndarray[K, ndims]):
* If ndims=2, corrds are predicted keypoint location.
* If ndims=4, corrds are composed of (x, y, scores, tags)
* If ndims=5, corrds are composed of (x, y, scores, tags,
flipped_tags)
center (np.ndarray[2, ]): Center of the bounding box (x, y).
scale (np.ndarray[2, ]): Scale of the bounding box
wrt [width, height].
output_size (np.ndarray[2, ] | list(2,)): Size of the
destination heatmaps.
use_udp (bool): Use unbiased data processing
Returns:
np.ndarray: Predicted coordinates in the images.
"""
assert coords.shape[1] in (2, 4, 5)
assert len(center) == 2
assert len(scale) == 2
assert len(output_size) == 2
# Recover the scale which is normalized by a factor of 200.
scale = scale * 200.0
if use_udp:
scale_x = scale[0] / (output_size[0] - 1.0)
scale_y = scale[1] / (output_size[1] - 1.0)
else:
scale_x = scale[0] / output_size[0]
scale_y = scale[1] / output_size[1]
target_coords = np.ones_like(coords)
target_coords[:, 0] = coords[:, 0] * scale_x + center[0] - scale[
0] * 0.5
target_coords[:, 1] = coords[:, 1] * scale_y + center[1] - scale[
1] * 0.5
return target_coords
def get_final_preds(self, heatmaps, center, scale, kernelsize=11):
"""the highest heatvalue location with a quarter offset in the
direction from the highest response to the second highest response.
Args:
heatmaps (numpy.ndarray): The predicted heatmaps
center (numpy.ndarray): The boxes center
scale (numpy.ndarray): The scale factor
Returns:
preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints
"""
coords, maxvals = self.get_max_preds(heatmaps)
N, K, H, W = heatmaps.shape
if self.use_dark:
coords = self.post_datk_udp(coords, heatmaps, kernelsize)
preds = coords.copy()
# Transform back to the image
for i in range(N):
preds[i] = self.transform_preds_udp(preds[i], center[i],
scale[i], [W, H])
else:
for n in range(coords.shape[0]):
for p in range(coords.shape[1]):
hm = heatmaps[n][p]
px = int(math.floor(coords[n][p][0] + 0.5))
py = int(math.floor(coords[n][p][1] + 0.5))
if 1 < px < W - 1 and 1 < py < H - 1:
diff = np.array([
hm[py][px + 1] - hm[py][px - 1],
hm[py + 1][px] - hm[py - 1][px]
])
coords[n][p] += np.sign(diff) * .25
preds = coords.copy()
# Transform back
for i in range(coords.shape[0]):
preds[i] = transform_preds(coords[i], center[i], scale[i],
[W, H])
return preds, maxvals
def __call__(self, output, center, scale):
preds, maxvals = self.get_final_preds(output, center, scale)
outputs = [[
np.concatenate(
(preds, maxvals), axis=-1), np.mean(
maxvals, axis=1)
]]
return outputs

View File

@@ -0,0 +1,152 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['MaskRCNN']
@register
class MaskRCNN(BaseArch):
"""
Mask R-CNN network, see https://arxiv.org/abs/1703.06870
Args:
backbone (object): backbone instance
rpn_head (object): `RPNHead` instance
bbox_head (object): `BBoxHead` instance
mask_head (object): `MaskHead` instance
bbox_post_process (object): `BBoxPostProcess` instance
mask_post_process (object): `MaskPostProcess` instance
neck (object): 'FPN' instance
"""
__category__ = 'architecture'
__inject__ = [
'bbox_post_process',
'mask_post_process',
]
def __init__(self,
backbone,
rpn_head,
bbox_head,
mask_head,
bbox_post_process,
mask_post_process,
neck=None):
super(MaskRCNN, self).__init__()
self.backbone = backbone
self.neck = neck
self.rpn_head = rpn_head
self.bbox_head = bbox_head
self.mask_head = mask_head
self.bbox_post_process = bbox_post_process
self.mask_post_process = mask_post_process
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = cfg['neck'] and create(cfg['neck'], **kwargs)
out_shape = neck and neck.out_shape or backbone.out_shape
kwargs = {'input_shape': out_shape}
rpn_head = create(cfg['rpn_head'], **kwargs)
bbox_head = create(cfg['bbox_head'], **kwargs)
out_shape = neck and out_shape or bbox_head.get_head().out_shape
kwargs = {'input_shape': out_shape}
mask_head = create(cfg['mask_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"rpn_head": rpn_head,
"bbox_head": bbox_head,
"mask_head": mask_head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
if self.neck is not None:
body_feats = self.neck(body_feats)
if self.training:
rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs)
bbox_loss, bbox_feat = self.bbox_head(body_feats, rois, rois_num,
self.inputs)
rois, rois_num = self.bbox_head.get_assigned_rois()
bbox_targets = self.bbox_head.get_assigned_targets()
# Mask Head needs bbox_feat in Mask RCNN
mask_loss = self.mask_head(body_feats, rois, rois_num, self.inputs,
bbox_targets, bbox_feat)
return rpn_loss, bbox_loss, mask_loss
else:
rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
preds, feat_func = self.bbox_head(body_feats, rois, rois_num, None)
im_shape = self.inputs['im_shape']
scale_factor = self.inputs['scale_factor']
bbox, bbox_num, nms_keep_idx = self.bbox_post_process(
preds, (rois, rois_num), im_shape, scale_factor)
mask_out = self.mask_head(
body_feats, bbox, bbox_num, self.inputs, feat_func=feat_func)
# rescale the prediction back to origin image
bbox, bbox_pred, bbox_num = self.bbox_post_process.get_pred(
bbox, bbox_num, im_shape, scale_factor)
origin_shape = self.bbox_post_process.get_origin_shape()
mask_pred = self.mask_post_process(mask_out, bbox_pred, bbox_num,
origin_shape)
if self.use_extra_data:
extra_data = {} # record the bbox output before nms, such like scores and nms_keep_idx
"""extra_data:{
'scores': predict scores,
'nms_keep_idx': bbox index before nms,
}
"""
extra_data['scores'] = preds[1] # predict scores (probability)
# Todo: get logits output
extra_data['nms_keep_idx'] = nms_keep_idx # bbox index before nms
return bbox_pred, bbox_num, mask_pred, extra_data
else:
return bbox_pred, bbox_num, mask_pred
def get_loss(self, ):
bbox_loss, mask_loss, rpn_loss = self._forward()
loss = {}
loss.update(rpn_loss)
loss.update(bbox_loss)
loss.update(mask_loss)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
return loss
def get_pred(self):
if self.use_extra_data:
bbox_pred, bbox_num, mask_pred, extra_data = self._forward()
output = {'bbox': bbox_pred, 'bbox_num': bbox_num, 'mask': mask_pred, 'extra_data': extra_data}
else:
bbox_pred, bbox_num, mask_pred = self._forward()
output = {'bbox': bbox_pred, 'bbox_num': bbox_num, 'mask': mask_pred}
return output

View File

@@ -0,0 +1,132 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
import paddle.nn as nn
import typing
from ppdet.core.workspace import register
from ppdet.modeling.post_process import nms
__all__ = ['BaseArch']
@register
class BaseArch(nn.Layer):
def __init__(self, data_format='NCHW', use_extra_data=False):
super(BaseArch, self).__init__()
self.data_format = data_format
self.inputs = {}
self.fuse_norm = False
self.use_extra_data = use_extra_data
def load_meanstd(self, cfg_transform):
scale = 1.
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
for item in cfg_transform:
if 'NormalizeImage' in item:
mean = np.array(
item['NormalizeImage']['mean'], dtype=np.float32)
std = np.array(item['NormalizeImage']['std'], dtype=np.float32)
if item['NormalizeImage'].get('is_scale', True):
scale = 1. / 255.
break
if self.data_format == 'NHWC':
self.scale = paddle.to_tensor(scale / std).reshape((1, 1, 1, 3))
self.bias = paddle.to_tensor(-mean / std).reshape((1, 1, 1, 3))
else:
self.scale = paddle.to_tensor(scale / std).reshape((1, 3, 1, 1))
self.bias = paddle.to_tensor(-mean / std).reshape((1, 3, 1, 1))
def forward(self, inputs):
if self.data_format == 'NHWC':
image = inputs['image']
inputs['image'] = paddle.transpose(image, [0, 2, 3, 1])
if self.fuse_norm:
image = inputs['image']
self.inputs['image'] = image * self.scale + self.bias
self.inputs['im_shape'] = inputs['im_shape']
self.inputs['scale_factor'] = inputs['scale_factor']
else:
self.inputs = inputs
self.model_arch()
if self.training:
out = self.get_loss()
else:
inputs_list = []
# multi-scale input
if not isinstance(inputs, typing.Sequence):
inputs_list.append(inputs)
else:
inputs_list.extend(inputs)
outs = []
for inp in inputs_list:
if self.fuse_norm:
self.inputs['image'] = inp['image'] * self.scale + self.bias
self.inputs['im_shape'] = inp['im_shape']
self.inputs['scale_factor'] = inp['scale_factor']
else:
self.inputs = inp
outs.append(self.get_pred())
# multi-scale test
if len(outs) > 1:
out = self.merge_multi_scale_predictions(outs)
else:
out = outs[0]
return out
def merge_multi_scale_predictions(self, outs):
# default values for architectures not included in following list
num_classes = 80
nms_threshold = 0.5
keep_top_k = 100
if self.__class__.__name__ in ('CascadeRCNN', 'FasterRCNN', 'MaskRCNN'):
num_classes = self.bbox_head.num_classes
keep_top_k = self.bbox_post_process.nms.keep_top_k
nms_threshold = self.bbox_post_process.nms.nms_threshold
else:
raise Exception(
"Multi scale test only supports CascadeRCNN, FasterRCNN and MaskRCNN for now"
)
final_boxes = []
all_scale_outs = paddle.concat([o['bbox'] for o in outs]).numpy()
for c in range(num_classes):
idxs = all_scale_outs[:, 0] == c
if np.count_nonzero(idxs) == 0:
continue
r = nms(all_scale_outs[idxs, 1:], nms_threshold)
final_boxes.append(
np.concatenate([np.full((r.shape[0], 1), c), r], 1))
out = np.concatenate(final_boxes)
out = np.concatenate(sorted(
out, key=lambda e: e[1])[-keep_top_k:]).reshape((-1, 6))
out = {
'bbox': paddle.to_tensor(out),
'bbox_num': paddle.to_tensor(np.array([out.shape[0], ]))
}
return out
def build_inputs(self, data, input_def):
inputs = {}
for i, k in enumerate(input_def):
inputs[k] = data[i]
return inputs
def model_arch(self, ):
pass
def get_loss(self, ):
raise NotImplementedError("Should implement get_loss method!")
def get_pred(self, ):
raise NotImplementedError("Should implement get_pred method!")

View File

@@ -0,0 +1,69 @@
from typing import Dict
from collections import OrderedDict
from ppdet.modeling.architectures.meta_arch import BaseArch
class MultiSteamDetector(BaseArch):
def __init__(self,
model: Dict[str, BaseArch],
train_cfg=None,
test_cfg=None):
super(MultiSteamDetector, self).__init__()
self.submodules = list(model.keys())
for k, v in model.items():
setattr(self, k, v)
self.train_cfg = train_cfg
self.test_cfg = test_cfg
self.inference_on = self.test_cfg.get("inference_on",
self.submodules[0])
self.first_load = True
def forward(self, inputs, return_loss=True, **kwargs):
"""Calls either :func:`forward_train` or :func:`forward_test` depending
on whether ``return_loss`` is ``True``.
Note this setting will change the expected inputs. When
``return_loss=True``, img and img_meta are single-nested (i.e. Tensor
and List[dict]), and when ``resturn_loss=False``, img and img_meta
should be double nested (i.e. List[Tensor], List[List[dict]]), with
the outer list indicating test time augmentations.
"""
if return_loss:
return self.forward_train(inputs, **kwargs)
else:
return self.forward_test(inputs, **kwargs)
def get_loss(self, **kwargs):
# losses = self(**data)
return self.forward_train(self, **kwargs)
def model(self, **kwargs) -> BaseArch:
if "submodule" in kwargs:
assert (kwargs["submodule"] in self.submodules
), "Detector does not contain submodule {}".format(kwargs[
"submodule"])
model: BaseArch = getattr(self, kwargs["submodule"])
else:
model: BaseArch = getattr(self, self.inference_on)
return model
def freeze(self, model_ref: str):
assert model_ref in self.submodules
model = getattr(self, model_ref)
model.eval()
for param in model.parameters():
param.stop_gradient = True
def update_ema_model(self, momentum=0.9996):
# print(momentum)
model_dict = self.student.state_dict()
new_dict = OrderedDict()
for key, value in self.teacher.state_dict().items():
if key in model_dict.keys():
new_dict[key] = (model_dict[key] *
(1 - momentum) + value * momentum)
else:
raise Exception("{} is not found in student model".format(key))
self.teacher.set_dict(new_dict)

View File

@@ -0,0 +1,99 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['PicoDet']
@register
class PicoDet(BaseArch):
"""
Generalized Focal Loss network, see https://arxiv.org/abs/2006.04388
Args:
backbone (object): backbone instance
neck (object): 'FPN' instance
head (object): 'PicoHead' instance
"""
__category__ = 'architecture'
def __init__(self, backbone, neck, head='PicoHead', nms_cpu=False):
super(PicoDet, self).__init__()
self.backbone = backbone
self.neck = neck
self.head = head
self.export_post_process = True
self.export_nms = True
self.nms_cpu = nms_cpu
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
kwargs = {'input_shape': neck.out_shape}
head = create(cfg['head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"head": head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
fpn_feats = self.neck(body_feats)
head_outs = self.head(fpn_feats, self.export_post_process)
if self.training or not self.export_post_process:
return head_outs, None
else:
scale_factor = self.inputs['scale_factor']
bboxes, bbox_num = self.head.post_process(
head_outs,
scale_factor,
export_nms=self.export_nms,
nms_cpu=self.nms_cpu)
return bboxes, bbox_num
def get_loss(self, ):
loss = {}
head_outs, _ = self._forward()
loss_gfl = self.head.get_loss(head_outs, self.inputs)
loss.update(loss_gfl)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
return loss
def get_pred(self):
if not self.export_post_process:
return {'picodet': self._forward()[0]}
elif self.export_nms:
bbox_pred, bbox_num = self._forward()
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
return output
else:
bboxes, mlvl_scores = self._forward()
output = {'bbox': bboxes, 'scores': mlvl_scores}
return output

View File

@@ -0,0 +1,114 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
from .. import layers as L
__all__ = ['METRO_Body']
def orthographic_projection(X, camera):
"""Perform orthographic projection of 3D points X using the camera parameters
Args:
X: size = [B, N, 3]
camera: size = [B, 3]
Returns:
Projected 2D points -- size = [B, N, 2]
"""
camera = camera.reshape((-1, 1, 3))
X_trans = X[:, :, :2] + camera[:, :, 1:]
shape = paddle.shape(X_trans)
X_2d = (camera[:, :, 0] * X_trans.reshape((shape[0], -1))).reshape(shape)
return X_2d
@register
class METRO_Body(BaseArch):
__category__ = 'architecture'
__inject__ = ['loss']
def __init__(
self,
num_joints,
backbone='HRNet',
trans_encoder='',
loss='Pose3DLoss', ):
"""
Modified from METRO network, see https://arxiv.org/abs/2012.09760
Args:
backbone (nn.Layer): backbone instance
"""
super(METRO_Body, self).__init__()
self.num_joints = num_joints
self.backbone = backbone
self.loss = loss
self.deploy = False
self.trans_encoder = trans_encoder
self.conv_learn_tokens = paddle.nn.Conv1D(49, num_joints + 10, 1)
self.cam_param_fc = paddle.nn.Linear(3, 2)
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
trans_encoder = create(cfg['trans_encoder'])
return {'backbone': backbone, 'trans_encoder': trans_encoder}
def _forward(self):
batch_size = self.inputs['image'].shape[0]
image_feat = self.backbone(self.inputs)
image_feat_flatten = image_feat.reshape((batch_size, 2048, 49))
image_feat_flatten = image_feat_flatten.transpose(perm=(0, 2, 1))
# and apply a conv layer to learn image token for each 3d joint/vertex position
features = self.conv_learn_tokens(image_feat_flatten) # (B, J, C)
if self.training:
# apply mask vertex/joint modeling
# meta_masks is a tensor of all the masks, randomly generated in dataloader
# we pre-define a [MASK] token, which is a floating-value vector with 0.01s
meta_masks = self.inputs['mjm_mask'].expand((-1, -1, 2048))
constant_tensor = paddle.ones_like(features) * 0.01
features = features * meta_masks + constant_tensor * (1 - meta_masks
)
pred_out = self.trans_encoder(features)
pred_3d_joints = pred_out[:, :self.num_joints, :]
cam_features = pred_out[:, self.num_joints:, :]
# learn camera parameters
pred_2d_joints = self.cam_param_fc(cam_features)
return pred_3d_joints, pred_2d_joints
def get_loss(self):
preds_3d, preds_2d = self._forward()
loss = self.loss(preds_3d, preds_2d, self.inputs)
output = {'loss': loss}
return output
def get_pred(self):
preds_3d, preds_2d = self._forward()
outputs = {'pose3d': preds_3d, 'pose2d': preds_2d}
return outputs

View File

@@ -0,0 +1,260 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
import paddle
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['PPYOLOE', 'PPYOLOEWithAuxHead']
# PP-YOLOE and PP-YOLOE+ are recommended to use this architecture, especially when use distillation or aux head
# PP-YOLOE and PP-YOLOE+ can also use the same architecture of YOLOv3 in yolo.py when not use distillation or aux head
@register
class PPYOLOE(BaseArch):
"""
PPYOLOE network, see https://arxiv.org/abs/2203.16250
Args:
backbone (nn.Layer): backbone instance
neck (nn.Layer): neck instance
yolo_head (nn.Layer): anchor_head instance
post_process (object): `BBoxPostProcess` instance
ssod_loss (object): 'SSODPPYOLOELoss' instance, only used for semi-det(ssod)
for_distill (bool): whether for distillation
feat_distill_place (str): distill which feature for distillation
for_mot (bool): whether return other features for multi-object tracking
models, default False in pure object detection models.
"""
__category__ = 'architecture'
__shared__ = ['for_distill']
__inject__ = ['post_process', 'ssod_loss']
def __init__(self,
backbone='CSPResNet',
neck='CustomCSPPAN',
yolo_head='PPYOLOEHead',
post_process='BBoxPostProcess',
ssod_loss='SSODPPYOLOELoss',
for_distill=False,
feat_distill_place='neck_feats',
for_mot=False):
super(PPYOLOE, self).__init__()
self.backbone = backbone
self.neck = neck
self.yolo_head = yolo_head
self.post_process = post_process
self.for_mot = for_mot
# for ssod, semi-det
self.is_teacher = False
self.ssod_loss = ssod_loss
# distill
self.for_distill = for_distill
self.feat_distill_place = feat_distill_place
if for_distill:
assert feat_distill_place in ['backbone_feats', 'neck_feats']
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
kwargs = {'input_shape': neck.out_shape}
yolo_head = create(cfg['yolo_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"yolo_head": yolo_head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
neck_feats = self.neck(body_feats, self.for_mot)
self.is_teacher = self.inputs.get('is_teacher', False) # for semi-det
if self.training or self.is_teacher:
yolo_losses = self.yolo_head(neck_feats, self.inputs)
if self.for_distill:
if self.feat_distill_place == 'backbone_feats':
self.yolo_head.distill_pairs['backbone_feats'] = body_feats
elif self.feat_distill_place == 'neck_feats':
self.yolo_head.distill_pairs['neck_feats'] = neck_feats
else:
raise ValueError
return yolo_losses
else:
yolo_head_outs = self.yolo_head(neck_feats)
if self.post_process is not None:
bbox, bbox_num, nms_keep_idx = self.post_process(
yolo_head_outs, self.yolo_head.mask_anchors,
self.inputs['im_shape'], self.inputs['scale_factor'])
else:
bbox, bbox_num, nms_keep_idx = self.yolo_head.post_process(
yolo_head_outs, self.inputs['scale_factor'])
if self.use_extra_data:
extra_data = {} # record the bbox output before nms, such like scores and nms_keep_idx
"""extra_data:{
'scores': predict scores,
'nms_keep_idx': bbox index before nms,
}
"""
extra_data['scores'] = yolo_head_outs[0] # predict scores (probability)
extra_data['nms_keep_idx'] = nms_keep_idx
output = {'bbox': bbox, 'bbox_num': bbox_num, 'extra_data': extra_data}
else:
output = {'bbox': bbox, 'bbox_num': bbox_num}
return output
def get_loss(self):
return self._forward()
def get_pred(self):
return self._forward()
def get_loss_keys(self):
return ['loss_cls', 'loss_iou', 'loss_dfl', 'loss_contrast']
def get_ssod_loss(self, student_head_outs, teacher_head_outs, train_cfg):
ssod_losses = self.ssod_loss(student_head_outs, teacher_head_outs,
train_cfg)
return ssod_losses
@register
class PPYOLOEWithAuxHead(BaseArch):
__category__ = 'architecture'
__inject__ = ['post_process']
def __init__(self,
backbone='CSPResNet',
neck='CustomCSPPAN',
yolo_head='PPYOLOEHead',
aux_head='SimpleConvHead',
post_process='BBoxPostProcess',
for_mot=False,
detach_epoch=5):
"""
PPYOLOE network, see https://arxiv.org/abs/2203.16250
Args:
backbone (nn.Layer): backbone instance
neck (nn.Layer): neck instance
yolo_head (nn.Layer): anchor_head instance
post_process (object): `BBoxPostProcess` instance
for_mot (bool): whether return other features for multi-object tracking
models, default False in pure object detection models.
"""
super(PPYOLOEWithAuxHead, self).__init__()
self.backbone = backbone
self.neck = neck
self.aux_neck = copy.deepcopy(self.neck)
self.yolo_head = yolo_head
self.aux_head = aux_head
self.post_process = post_process
self.for_mot = for_mot
self.detach_epoch = detach_epoch
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
# fpn
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
aux_neck = copy.deepcopy(neck)
# head
kwargs = {'input_shape': neck.out_shape}
yolo_head = create(cfg['yolo_head'], **kwargs)
aux_head = create(cfg['aux_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"yolo_head": yolo_head,
'aux_head': aux_head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
neck_feats = self.neck(body_feats, self.for_mot)
if self.training:
if self.inputs['epoch_id'] >= self.detach_epoch:
aux_neck_feats = self.aux_neck([f.detach() for f in body_feats])
dual_neck_feats = (paddle.concat(
[f.detach(), aux_f], axis=1) for f, aux_f in
zip(neck_feats, aux_neck_feats))
else:
aux_neck_feats = self.aux_neck(body_feats)
dual_neck_feats = (paddle.concat(
[f, aux_f], axis=1) for f, aux_f in
zip(neck_feats, aux_neck_feats))
aux_cls_scores, aux_bbox_preds = self.aux_head(dual_neck_feats)
loss = self.yolo_head(
neck_feats,
self.inputs,
aux_pred=[aux_cls_scores, aux_bbox_preds])
return loss
else:
yolo_head_outs = self.yolo_head(neck_feats)
if self.post_process is not None:
bbox, bbox_num, nms_keep_idx = self.post_process(
yolo_head_outs, self.yolo_head.mask_anchors,
self.inputs['im_shape'], self.inputs['scale_factor'])
else:
bbox, bbox_num, nms_keep_idx = self.yolo_head.post_process(
yolo_head_outs, self.inputs['scale_factor'])
if self.use_extra_data:
extra_data = {} # record the bbox output before nms, such like scores and nms_keep_idx
"""extra_data:{
'scores': predict scores,
'nms_keep_idx': bbox index before nms,
}
"""
extra_data['scores'] = yolo_head_outs[0] # predict scores (probability)
# Todo: get logits output
extra_data['nms_keep_idx'] = nms_keep_idx
output = {'bbox': bbox, 'bbox_num': bbox_num, 'extra_data': extra_data}
else:
output = {'bbox': bbox, 'bbox_num': bbox_num}
return output
def get_loss(self):
return self._forward()
def get_pred(self):
return self._forward()

View File

@@ -0,0 +1,104 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['QueryInst']
@register
class QueryInst(BaseArch):
__category__ = 'architecture'
__inject__ = ['post_process']
def __init__(self,
backbone,
neck,
rpn_head,
roi_head,
post_process='SparsePostProcess'):
super(QueryInst, self).__init__()
self.backbone = backbone
self.neck = neck
self.rpn_head = rpn_head
self.roi_head = roi_head
self.post_process = post_process
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
kwargs = {'input_shape': neck.out_shape}
rpn_head = create(cfg['rpn_head'], **kwargs)
roi_head = create(cfg['roi_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
'rpn_head': rpn_head,
"roi_head": roi_head
}
def _forward(self, targets=None):
features = self.backbone(self.inputs)
features = self.neck(features)
proposal_bboxes, proposal_features = self.rpn_head(self.inputs[
'img_whwh'])
outputs = self.roi_head(features, proposal_bboxes, proposal_features,
targets)
if self.training:
return outputs
else:
bbox_pred, bbox_num, mask_pred = self.post_process(
outputs['class_logits'], outputs['bbox_pred'],
self.inputs['scale_factor_whwh'], self.inputs['ori_shape'],
outputs['mask_logits'])
return bbox_pred, bbox_num, mask_pred
def get_loss(self):
targets = []
for i in range(len(self.inputs['img_whwh'])):
boxes = self.inputs['gt_bbox'][i]
labels = self.inputs['gt_class'][i].squeeze(-1)
img_whwh = self.inputs['img_whwh'][i]
if boxes.shape[0] != 0:
img_whwh_tgt = img_whwh.unsqueeze(0).tile([boxes.shape[0], 1])
else:
img_whwh_tgt = paddle.zeros_like(boxes)
gt_segm = self.inputs['gt_segm'][i].astype('float32')
targets.append({
'boxes': boxes,
'labels': labels,
'img_whwh': img_whwh,
'img_whwh_tgt': img_whwh_tgt,
'gt_segm': gt_segm
})
losses = self._forward(targets)
losses.update({'loss': sum(losses.values())})
return losses
def get_pred(self):
bbox_pred, bbox_num, mask_pred = self._forward()
return {'bbox': bbox_pred, 'bbox_num': bbox_num, 'mask': mask_pred}

View File

@@ -0,0 +1,84 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
import paddle
import paddle.nn.functional as F
__all__ = ['RetinaNet']
@register
class RetinaNet(BaseArch):
__category__ = 'architecture'
def __init__(self, backbone, neck, head):
super(RetinaNet, self).__init__()
self.backbone = backbone
self.neck = neck
self.head = head
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
kwargs = {'input_shape': neck.out_shape}
head = create(cfg['head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
'head': head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
neck_feats = self.neck(body_feats)
if self.training:
return self.head(neck_feats, self.inputs)
else:
head_outs = self.head(neck_feats)
bbox, bbox_num, nms_keep_idx = self.head.post_process(
head_outs, self.inputs['im_shape'], self.inputs['scale_factor'])
if self.use_extra_data:
extra_data = {} # record the bbox output before nms, such like scores and nms_keep_idx
"""extra_data:{
'scores': predict scores,
'nms_keep_idx': bbox index before nms,
}
"""
preds_logits = self.head.decode_cls_logits(head_outs[0])
preds_scores = F.sigmoid(preds_logits)
extra_data['logits'] = preds_logits
extra_data['scores'] = preds_scores
extra_data['nms_keep_idx'] = nms_keep_idx # bbox index before nms
return {'bbox': bbox, 'bbox_num': bbox_num, "extra_data": extra_data}
else:
return {'bbox': bbox, 'bbox_num': bbox_num}
def get_loss(self):
return self._forward()
def get_pred(self):
return self._forward()

View File

@@ -0,0 +1,83 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['S2ANet']
@register
class S2ANet(BaseArch):
__category__ = 'architecture'
__inject__ = ['head']
def __init__(self, backbone, neck, head):
"""
S2ANet, see https://arxiv.org/pdf/2008.09397.pdf
Args:
backbone (object): backbone instance
neck (object): `FPN` instance
head (object): `Head` instance
"""
super(S2ANet, self).__init__()
self.backbone = backbone
self.neck = neck
self.s2anet_head = head
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = cfg['neck'] and create(cfg['neck'], **kwargs)
out_shape = neck and neck.out_shape or backbone.out_shape
kwargs = {'input_shape': out_shape}
head = create(cfg['head'], **kwargs)
return {'backbone': backbone, 'neck': neck, "head": head}
def _forward(self):
body_feats = self.backbone(self.inputs)
if self.neck is not None:
body_feats = self.neck(body_feats)
if self.training:
loss = self.s2anet_head(body_feats, self.inputs)
return loss
else:
head_outs = self.s2anet_head(body_feats)
# post_process
bboxes, bbox_num = self.s2anet_head.get_bboxes(head_outs)
# rescale the prediction back to origin image
im_shape = self.inputs['im_shape']
scale_factor = self.inputs['scale_factor']
bboxes = self.s2anet_head.get_pred(bboxes, bbox_num, im_shape,
scale_factor)
# output
output = {'bbox': bboxes, 'bbox_num': bbox_num}
return output
def get_loss(self, ):
loss = self._forward()
return loss
def get_pred(self):
output = self._forward()
return output

View File

@@ -0,0 +1,110 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['SOLOv2']
@register
class SOLOv2(BaseArch):
"""
SOLOv2 network, see https://arxiv.org/abs/2003.10152
Args:
backbone (object): an backbone instance
solov2_head (object): an `SOLOv2Head` instance
mask_head (object): an `SOLOv2MaskHead` instance
neck (object): neck of network, such as feature pyramid network instance
"""
__category__ = 'architecture'
def __init__(self, backbone, solov2_head, mask_head, neck=None):
super(SOLOv2, self).__init__()
self.backbone = backbone
self.neck = neck
self.solov2_head = solov2_head
self.mask_head = mask_head
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
kwargs = {'input_shape': neck.out_shape}
solov2_head = create(cfg['solov2_head'], **kwargs)
mask_head = create(cfg['mask_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
'solov2_head': solov2_head,
'mask_head': mask_head,
}
def model_arch(self):
body_feats = self.backbone(self.inputs)
body_feats = self.neck(body_feats)
self.seg_pred = self.mask_head(body_feats)
self.cate_pred_list, self.kernel_pred_list = self.solov2_head(
body_feats)
def get_loss(self, ):
loss = {}
# get gt_ins_labels, gt_cate_labels, etc.
gt_ins_labels, gt_cate_labels, gt_grid_orders = [], [], []
fg_num = self.inputs['fg_num']
for i in range(len(self.solov2_head.seg_num_grids)):
ins_label = 'ins_label{}'.format(i)
if ins_label in self.inputs:
gt_ins_labels.append(self.inputs[ins_label])
cate_label = 'cate_label{}'.format(i)
if cate_label in self.inputs:
gt_cate_labels.append(self.inputs[cate_label])
grid_order = 'grid_order{}'.format(i)
if grid_order in self.inputs:
gt_grid_orders.append(self.inputs[grid_order])
loss_solov2 = self.solov2_head.get_loss(
self.cate_pred_list, self.kernel_pred_list, self.seg_pred,
gt_ins_labels, gt_cate_labels, gt_grid_orders, fg_num)
loss.update(loss_solov2)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
return loss
def get_pred(self):
seg_masks, cate_labels, cate_scores, bbox_num = self.solov2_head.get_prediction(
self.cate_pred_list, self.kernel_pred_list, self.seg_pred,
self.inputs['im_shape'], self.inputs['scale_factor'])
outs = {
"segm": seg_masks,
"bbox_num": bbox_num,
'cate_label': cate_labels,
'cate_score': cate_scores
}
return outs

View File

@@ -0,0 +1,99 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ["SparseRCNN"]
@register
class SparseRCNN(BaseArch):
__category__ = 'architecture'
__inject__ = ["postprocess"]
def __init__(self,
backbone,
neck,
head="SparsercnnHead",
postprocess="SparsePostProcess"):
super(SparseRCNN, self).__init__()
self.backbone = backbone
self.neck = neck
self.head = head
self.postprocess = postprocess
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
kwargs = {'roi_input_shape': neck.out_shape}
head = create(cfg['head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"head": head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
fpn_feats = self.neck(body_feats)
head_outs = self.head(fpn_feats, self.inputs["img_whwh"])
if not self.training:
bbox_pred, bbox_num = self.postprocess(
head_outs["pred_logits"], head_outs["pred_boxes"],
self.inputs["scale_factor_whwh"], self.inputs["ori_shape"])
return bbox_pred, bbox_num
else:
return head_outs
def get_loss(self):
batch_gt_class = self.inputs["gt_class"]
batch_gt_box = self.inputs["gt_bbox"]
batch_whwh = self.inputs["img_whwh"]
targets = []
for i in range(len(batch_gt_class)):
boxes = batch_gt_box[i]
labels = batch_gt_class[i].squeeze(-1)
img_whwh = batch_whwh[i]
img_whwh_tgt = img_whwh.unsqueeze(0).tile([int(boxes.shape[0]), 1])
targets.append({
"boxes": boxes,
"labels": labels,
"img_whwh": img_whwh,
"img_whwh_tgt": img_whwh_tgt
})
outputs = self._forward()
loss_dict = self.head.get_loss(outputs, targets)
acc = loss_dict["acc"]
loss_dict.pop("acc")
total_loss = sum(loss_dict.values())
loss_dict.update({"loss": total_loss, "acc": acc})
return loss_dict
def get_pred(self):
bbox_pred, bbox_num = self._forward()
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
return output

View File

@@ -0,0 +1,118 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
import paddle
import paddle.nn.functional as F
__all__ = ['SSD']
@register
class SSD(BaseArch):
"""
Single Shot MultiBox Detector, see https://arxiv.org/abs/1512.02325
Args:
backbone (nn.Layer): backbone instance
ssd_head (nn.Layer): `SSDHead` instance
post_process (object): `BBoxPostProcess` instance
"""
__category__ = 'architecture'
__inject__ = ['post_process']
def __init__(self, backbone, ssd_head, post_process, r34_backbone=False):
super(SSD, self).__init__()
self.backbone = backbone
self.ssd_head = ssd_head
self.post_process = post_process
self.r34_backbone = r34_backbone
if self.r34_backbone:
from ppdet.modeling.backbones.resnet import ResNet
assert isinstance(self.backbone, ResNet) and \
self.backbone.depth == 34, \
"If you set r34_backbone=True, please use ResNet-34 as backbone."
self.backbone.res_layers[2].blocks[0].branch2a.conv._stride = [1, 1]
self.backbone.res_layers[2].blocks[0].short.conv._stride = [1, 1]
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
# head
kwargs = {'input_shape': backbone.out_shape}
ssd_head = create(cfg['ssd_head'], **kwargs)
return {
'backbone': backbone,
"ssd_head": ssd_head,
}
def _forward(self):
# Backbone
body_feats = self.backbone(self.inputs)
# SSD Head
if self.training:
return self.ssd_head(body_feats, self.inputs['image'],
self.inputs['gt_bbox'],
self.inputs['gt_class'])
else:
preds, anchors = self.ssd_head(body_feats, self.inputs['image'])
bbox, bbox_num, nms_keep_idx = self.post_process(
preds, anchors, self.inputs['im_shape'],
self.inputs['scale_factor'])
if self.use_extra_data:
extra_data = {} # record the bbox output before nms, such like scores and nms_keep_idx
"""extra_data:{
'scores': predict scores,
'nms_keep_idx': bbox index before nms,
}
"""
preds_logits = preds[1] # [[1xNumBBoxNumClass]]
extra_data['scores'] = F.softmax(paddle.concat(
preds_logits, axis=1)).transpose([0, 2, 1])
extra_data['logits'] = paddle.concat(
preds_logits, axis=1).transpose([0, 2, 1])
extra_data['nms_keep_idx'] = nms_keep_idx # bbox index before nms
return bbox, bbox_num, extra_data
else:
return bbox, bbox_num
def get_loss(self, ):
return {"loss": self._forward()}
def get_pred(self):
if self.use_extra_data:
bbox_pred, bbox_num, extra_data = self._forward()
output = {
"bbox": bbox_pred,
"bbox_num": bbox_num,
"extra_data": extra_data
}
else:
bbox_pred, bbox_num = self._forward()
output = {
"bbox": bbox_pred,
"bbox_num": bbox_num,
}
return output

View File

@@ -0,0 +1,77 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['TOOD']
@register
class TOOD(BaseArch):
"""
TOOD: Task-aligned One-stage Object Detection, see https://arxiv.org/abs/2108.07755
Args:
backbone (nn.Layer): backbone instance
neck (nn.Layer): 'FPN' instance
head (nn.Layer): 'TOODHead' instance
"""
__category__ = 'architecture'
def __init__(self, backbone, neck, head):
super(TOOD, self).__init__()
self.backbone = backbone
self.neck = neck
self.head = head
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
kwargs = {'input_shape': neck.out_shape}
head = create(cfg['head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"head": head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
fpn_feats = self.neck(body_feats)
head_outs = self.head(fpn_feats)
if not self.training:
bboxes, bbox_num = self.head.post_process(
head_outs, self.inputs['im_shape'], self.inputs['scale_factor'])
return bboxes, bbox_num
else:
loss = self.head.get_loss(head_outs, self.inputs)
return loss
def get_loss(self):
return self._forward()
def get_pred(self):
bbox_pred, bbox_num = self._forward()
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
return output

View File

@@ -0,0 +1,98 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['TTFNet']
@register
class TTFNet(BaseArch):
"""
TTFNet network, see https://arxiv.org/abs/1909.00700
Args:
backbone (object): backbone instance
neck (object): 'TTFFPN' instance
ttf_head (object): 'TTFHead' instance
post_process (object): 'BBoxPostProcess' instance
"""
__category__ = 'architecture'
__inject__ = ['post_process']
def __init__(self,
backbone='DarkNet',
neck='TTFFPN',
ttf_head='TTFHead',
post_process='BBoxPostProcess'):
super(TTFNet, self).__init__()
self.backbone = backbone
self.neck = neck
self.ttf_head = ttf_head
self.post_process = post_process
@classmethod
def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
kwargs = {'input_shape': neck.out_shape}
ttf_head = create(cfg['ttf_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"ttf_head": ttf_head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
body_feats = self.neck(body_feats)
hm, wh = self.ttf_head(body_feats)
if self.training:
return hm, wh
else:
bbox, bbox_num = self.post_process(hm, wh, self.inputs['im_shape'],
self.inputs['scale_factor'])
return bbox, bbox_num
def get_loss(self, ):
loss = {}
heatmap = self.inputs['ttf_heatmap']
box_target = self.inputs['ttf_box_target']
reg_weight = self.inputs['ttf_reg_weight']
hm, wh = self._forward()
head_loss = self.ttf_head.get_loss(hm, wh, heatmap, box_target,
reg_weight)
loss.update(head_loss)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
return loss
def get_pred(self):
bbox_pred, bbox_num = self._forward()
output = {
"bbox": bbox_pred,
"bbox_num": bbox_num,
}
return output

View File

@@ -0,0 +1,150 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
from ..post_process import JDEBBoxPostProcess
__all__ = ['YOLOv3']
# YOLOv3,PP-YOLO,PP-YOLOv2,PP-YOLOE,PP-YOLOE+ use the same architecture as YOLOv3
# PP-YOLOE and PP-YOLOE+ are recommended to use PPYOLOE architecture in ppyoloe.py, especially when use distillation or aux head
@register
class YOLOv3(BaseArch):
__category__ = 'architecture'
__shared__ = ['data_format']
__inject__ = ['post_process']
def __init__(self,
backbone='DarkNet',
neck='YOLOv3FPN',
yolo_head='YOLOv3Head',
post_process='BBoxPostProcess',
data_format='NCHW',
for_mot=False):
"""
YOLOv3 network, see https://arxiv.org/abs/1804.02767
Args:
backbone (nn.Layer): backbone instance
neck (nn.Layer): neck instance
yolo_head (nn.Layer): anchor_head instance
bbox_post_process (object): `BBoxPostProcess` instance
data_format (str): data format, NCHW or NHWC
for_mot (bool): whether return other features for multi-object tracking
models, default False in pure object detection models.
"""
super(YOLOv3, self).__init__(data_format=data_format)
self.backbone = backbone
self.neck = neck
self.yolo_head = yolo_head
self.post_process = post_process
self.for_mot = for_mot
self.return_idx = isinstance(post_process, JDEBBoxPostProcess)
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
# fpn
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
# head
kwargs = {'input_shape': neck.out_shape}
yolo_head = create(cfg['yolo_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"yolo_head": yolo_head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
if self.for_mot:
neck_feats = self.neck(body_feats, self.for_mot)
else:
neck_feats = self.neck(body_feats)
if isinstance(neck_feats, dict):
assert self.for_mot == True
emb_feats = neck_feats['emb_feats']
neck_feats = neck_feats['yolo_feats']
if self.training:
yolo_losses = self.yolo_head(neck_feats, self.inputs)
if self.for_mot:
return {'det_losses': yolo_losses, 'emb_feats': emb_feats}
else:
return yolo_losses
else:
yolo_head_outs = self.yolo_head(neck_feats)
if self.for_mot:
# the detection part of JDE MOT model
boxes_idx, bbox, bbox_num, nms_keep_idx = self.post_process(
yolo_head_outs, self.yolo_head.mask_anchors)
output = {
'bbox': bbox,
'bbox_num': bbox_num,
'boxes_idx': boxes_idx,
'nms_keep_idx': nms_keep_idx,
'emb_feats': emb_feats,
}
else:
if self.return_idx:
# the detection part of JDE MOT model
_, bbox, bbox_num, nms_keep_idx = self.post_process(
yolo_head_outs, self.yolo_head.mask_anchors)
elif self.post_process is not None:
# anchor based YOLOs: YOLOv3,PP-YOLO,PP-YOLOv2 use mask_anchors
bbox, bbox_num, nms_keep_idx = self.post_process(
yolo_head_outs, self.yolo_head.mask_anchors,
self.inputs['im_shape'], self.inputs['scale_factor'])
else:
# anchor free YOLOs: PP-YOLOE, PP-YOLOE+
bbox, bbox_num, nms_keep_idx = self.yolo_head.post_process(
yolo_head_outs, self.inputs['scale_factor'])
if self.use_extra_data:
extra_data = {} # record the bbox output before nms, such like scores and nms_keep_idx
"""extra_data:{
'scores': predict scores,
'nms_keep_idx': bbox index before nms,
}
"""
extra_data['scores'] = yolo_head_outs[0] # predict scores (probability)
# Todo: get logits output
extra_data['nms_keep_idx'] = nms_keep_idx
# Todo support for mask_anchors yolo
output = {'bbox': bbox, 'bbox_num': bbox_num, 'extra_data': extra_data}
else:
output = {'bbox': bbox, 'bbox_num': bbox_num}
return output
def get_loss(self):
return self._forward()
def get_pred(self):
return self._forward()

View File

@@ -0,0 +1,88 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
__all__ = ['YOLOF']
@register
class YOLOF(BaseArch):
__category__ = 'architecture'
def __init__(self,
backbone='ResNet',
neck='DilatedEncoder',
head='YOLOFHead',
for_mot=False):
"""
YOLOF network, see https://arxiv.org/abs/2103.09460
Args:
backbone (nn.Layer): backbone instance
neck (nn.Layer): DilatedEncoder instance
head (nn.Layer): YOLOFHead instance
for_mot (bool): whether return other features for multi-object tracking
models, default False in pure object detection models.
"""
super(YOLOF, self).__init__()
self.backbone = backbone
self.neck = neck
self.head = head
self.for_mot = for_mot
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
# fpn
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
# head
kwargs = {'input_shape': neck.out_shape}
head = create(cfg['head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"head": head,
}
def _forward(self):
body_feats = self.backbone(self.inputs)
neck_feats = self.neck(body_feats, self.for_mot)
if self.training:
yolo_losses = self.head(neck_feats, self.inputs)
return yolo_losses
else:
yolo_head_outs = self.head(neck_feats)
bbox, bbox_num = self.head.post_process(yolo_head_outs,
self.inputs['im_shape'],
self.inputs['scale_factor'])
output = {'bbox': bbox, 'bbox_num': bbox_num}
return output
def get_loss(self):
return self._forward()
def get_pred(self):
return self._forward()

View File

@@ -0,0 +1,138 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from ppdet.core.workspace import register, create
from .meta_arch import BaseArch
import random
import paddle
import paddle.nn.functional as F
import paddle.distributed as dist
__all__ = ['YOLOX']
@register
class YOLOX(BaseArch):
"""
YOLOX network, see https://arxiv.org/abs/2107.08430
Args:
backbone (nn.Layer): backbone instance
neck (nn.Layer): neck instance
head (nn.Layer): head instance
for_mot (bool): whether used for MOT or not
input_size (list[int]): initial scale, will be reset by self._preprocess()
size_stride (int): stride of the size range
size_range (list[int]): multi-scale range for training
random_interval (int): interval of iter to change self._input_size
"""
__category__ = 'architecture'
def __init__(self,
backbone='CSPDarkNet',
neck='YOLOCSPPAN',
head='YOLOXHead',
for_mot=False,
input_size=[640, 640],
size_stride=32,
size_range=[15, 25],
random_interval=10):
super(YOLOX, self).__init__()
self.backbone = backbone
self.neck = neck
self.head = head
self.for_mot = for_mot
self.input_size = input_size
self._input_size = paddle.to_tensor(input_size)
self.size_stride = size_stride
self.size_range = size_range
self.random_interval = random_interval
self._step = 0
@classmethod
def from_config(cls, cfg, *args, **kwargs):
# backbone
backbone = create(cfg['backbone'])
# fpn
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
# head
kwargs = {'input_shape': neck.out_shape}
head = create(cfg['head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"head": head,
}
def _forward(self):
if self.training:
self._preprocess()
body_feats = self.backbone(self.inputs)
neck_feats = self.neck(body_feats, self.for_mot)
if self.training:
yolox_losses = self.head(neck_feats, self.inputs)
yolox_losses.update({'size': self._input_size[0]})
return yolox_losses
else:
head_outs = self.head(neck_feats)
bbox, bbox_num = self.head.post_process(
head_outs, self.inputs['im_shape'], self.inputs['scale_factor'])
return {'bbox': bbox, 'bbox_num': bbox_num}
def get_loss(self):
return self._forward()
def get_pred(self):
return self._forward()
def _preprocess(self):
# YOLOX multi-scale training, interpolate resize before inputs of the network.
self._get_size()
scale_y = self._input_size[0] / self.input_size[0]
scale_x = self._input_size[1] / self.input_size[1]
if scale_x != 1 or scale_y != 1:
self.inputs['image'] = F.interpolate(
self.inputs['image'],
size=self._input_size,
mode='bilinear',
align_corners=False)
gt_bboxes = self.inputs['gt_bbox']
for i in range(len(gt_bboxes)):
if len(gt_bboxes[i]) > 0:
gt_bboxes[i][:, 0::2] = gt_bboxes[i][:, 0::2] * scale_x
gt_bboxes[i][:, 1::2] = gt_bboxes[i][:, 1::2] * scale_y
self.inputs['gt_bbox'] = gt_bboxes
def _get_size(self):
# random_interval = 10 as default, every 10 iters to change self._input_size
image_ratio = self.input_size[1] * 1.0 / self.input_size[0]
if self._step % self.random_interval == 0:
size_factor = random.randint(*self.size_range)
size = [
self.size_stride * size_factor,
self.size_stride * int(size_factor * image_ratio)
]
self._input_size = paddle.to_tensor(size)
self._step += 1