更换文档检测模型
This commit is contained in:
83
paddle_detection/ppdet/modeling/architectures/__init__.py
Normal file
83
paddle_detection/ppdet/modeling/architectures/__init__.py
Normal file
@@ -0,0 +1,83 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from . import meta_arch
|
||||
from . import faster_rcnn
|
||||
from . import mask_rcnn
|
||||
from . import yolo
|
||||
from . import ppyoloe
|
||||
from . import cascade_rcnn
|
||||
from . import ssd
|
||||
from . import fcos
|
||||
from . import solov2
|
||||
from . import ttfnet
|
||||
from . import s2anet
|
||||
from . import keypoint_hrhrnet
|
||||
from . import keypoint_hrnet
|
||||
from . import keypoint_vitpose
|
||||
from . import jde
|
||||
from . import deepsort
|
||||
from . import fairmot
|
||||
from . import centernet
|
||||
from . import gfl
|
||||
from . import picodet
|
||||
from . import detr
|
||||
from . import sparse_rcnn
|
||||
from . import tood
|
||||
from . import retinanet
|
||||
from . import bytetrack
|
||||
from . import yolox
|
||||
from . import yolof
|
||||
from . import pose3d_metro
|
||||
from . import centertrack
|
||||
from . import queryinst
|
||||
from . import detr_ssod
|
||||
from . import multi_stream_detector
|
||||
from . import clrnet
|
||||
|
||||
from .meta_arch import *
|
||||
from .faster_rcnn import *
|
||||
from .mask_rcnn import *
|
||||
from .yolo import *
|
||||
from .ppyoloe import *
|
||||
from .cascade_rcnn import *
|
||||
from .ssd import *
|
||||
from .fcos import *
|
||||
from .solov2 import *
|
||||
from .ttfnet import *
|
||||
from .s2anet import *
|
||||
from .keypoint_hrhrnet import *
|
||||
from .keypoint_hrnet import *
|
||||
from .keypoint_vitpose import *
|
||||
from .jde import *
|
||||
from .deepsort import *
|
||||
from .fairmot import *
|
||||
from .centernet import *
|
||||
from .blazeface import *
|
||||
from .gfl import *
|
||||
from .picodet import *
|
||||
from .detr import *
|
||||
from .sparse_rcnn import *
|
||||
from .tood import *
|
||||
from .retinanet import *
|
||||
from .bytetrack import *
|
||||
from .yolox import *
|
||||
from .yolof import *
|
||||
from .pose3d_metro import *
|
||||
from .centertrack import *
|
||||
from .queryinst import *
|
||||
from .keypoint_petr import *
|
||||
from .detr_ssod import *
|
||||
from .multi_stream_detector import *
|
||||
from .clrnet import *
|
||||
117
paddle_detection/ppdet/modeling/architectures/blazeface.py
Normal file
117
paddle_detection/ppdet/modeling/architectures/blazeface.py
Normal file
@@ -0,0 +1,117 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
import paddle
|
||||
import paddle.nn.functional as F
|
||||
|
||||
__all__ = ['BlazeFace']
|
||||
|
||||
|
||||
@register
|
||||
class BlazeFace(BaseArch):
|
||||
"""
|
||||
BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs,
|
||||
see https://arxiv.org/abs/1907.05047
|
||||
|
||||
Args:
|
||||
backbone (nn.Layer): backbone instance
|
||||
neck (nn.Layer): neck instance
|
||||
blaze_head (nn.Layer): `blazeHead` instance
|
||||
post_process (object): `BBoxPostProcess` instance
|
||||
"""
|
||||
|
||||
__category__ = 'architecture'
|
||||
__inject__ = ['post_process']
|
||||
|
||||
def __init__(self, backbone, blaze_head, neck, post_process):
|
||||
super(BlazeFace, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.neck = neck
|
||||
self.blaze_head = blaze_head
|
||||
self.post_process = post_process
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
# backbone
|
||||
backbone = create(cfg['backbone'])
|
||||
# fpn
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = create(cfg['neck'], **kwargs)
|
||||
# head
|
||||
kwargs = {'input_shape': neck.out_shape}
|
||||
blaze_head = create(cfg['blaze_head'], **kwargs)
|
||||
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'neck': neck,
|
||||
'blaze_head': blaze_head,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
# Backbone
|
||||
body_feats = self.backbone(self.inputs)
|
||||
# neck
|
||||
neck_feats = self.neck(body_feats)
|
||||
# blaze Head
|
||||
if self.training:
|
||||
return self.blaze_head(neck_feats, self.inputs['image'],
|
||||
self.inputs['gt_bbox'],
|
||||
self.inputs['gt_class'])
|
||||
else:
|
||||
preds, anchors = self.blaze_head(neck_feats, self.inputs['image'])
|
||||
bbox, bbox_num, nms_keep_idx = self.post_process(
|
||||
preds, anchors, self.inputs['im_shape'],
|
||||
self.inputs['scale_factor'])
|
||||
if self.use_extra_data:
|
||||
extra_data = {} # record the bbox output before nms, such like scores and nms_keep_idx
|
||||
"""extra_data:{
|
||||
'scores': predict scores,
|
||||
'nms_keep_idx': bbox index before nms,
|
||||
}
|
||||
"""
|
||||
preds_logits = preds[1] # [[1xNumBBoxNumClass]]
|
||||
extra_data['scores'] = F.softmax(paddle.concat(
|
||||
preds_logits, axis=1)).transpose([0, 2, 1])
|
||||
extra_data['logits'] = paddle.concat(
|
||||
preds_logits, axis=1).transpose([0, 2, 1])
|
||||
extra_data['nms_keep_idx'] = nms_keep_idx # bbox index before nms
|
||||
return bbox, bbox_num, extra_data
|
||||
else:
|
||||
return bbox, bbox_num
|
||||
|
||||
def get_loss(self, ):
|
||||
return {"loss": self._forward()}
|
||||
|
||||
def get_pred(self):
|
||||
if self.use_extra_data:
|
||||
bbox_pred, bbox_num, extra_data = self._forward()
|
||||
output = {
|
||||
"bbox": bbox_pred,
|
||||
"bbox_num": bbox_num,
|
||||
"extra_data": extra_data
|
||||
}
|
||||
else:
|
||||
bbox_pred, bbox_num = self._forward()
|
||||
output = {
|
||||
"bbox": bbox_pred,
|
||||
"bbox_num": bbox_num,
|
||||
}
|
||||
|
||||
return output
|
||||
83
paddle_detection/ppdet/modeling/architectures/bytetrack.py
Normal file
83
paddle_detection/ppdet/modeling/architectures/bytetrack.py
Normal file
@@ -0,0 +1,83 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
|
||||
__all__ = ['ByteTrack']
|
||||
|
||||
|
||||
@register
|
||||
class ByteTrack(BaseArch):
|
||||
"""
|
||||
ByteTrack network, see https://arxiv.org/abs/2110.06864
|
||||
|
||||
Args:
|
||||
detector (object): detector model instance
|
||||
reid (object): reid model instance, default None
|
||||
tracker (object): tracker instance
|
||||
"""
|
||||
__category__ = 'architecture'
|
||||
|
||||
def __init__(self,
|
||||
detector='YOLOX',
|
||||
reid=None,
|
||||
tracker='JDETracker'):
|
||||
super(ByteTrack, self).__init__()
|
||||
self.detector = detector
|
||||
self.reid = reid
|
||||
self.tracker = tracker
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
detector = create(cfg['detector'])
|
||||
|
||||
if cfg['reid'] != 'None':
|
||||
reid = create(cfg['reid'])
|
||||
else:
|
||||
reid = None
|
||||
|
||||
tracker = create(cfg['tracker'])
|
||||
|
||||
return {
|
||||
"detector": detector,
|
||||
"reid": reid,
|
||||
"tracker": tracker,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
det_outs = self.detector(self.inputs)
|
||||
|
||||
if self.training:
|
||||
return det_outs
|
||||
else:
|
||||
if self.reid is not None:
|
||||
assert 'crops' in self.inputs
|
||||
crops = self.inputs['crops']
|
||||
pred_embs = self.reid(crops)
|
||||
else:
|
||||
pred_embs = None
|
||||
det_outs['embeddings'] = pred_embs
|
||||
return det_outs
|
||||
|
||||
def get_loss(self):
|
||||
return self._forward()
|
||||
|
||||
def get_pred(self):
|
||||
return self._forward()
|
||||
|
||||
143
paddle_detection/ppdet/modeling/architectures/cascade_rcnn.py
Normal file
143
paddle_detection/ppdet/modeling/architectures/cascade_rcnn.py
Normal file
@@ -0,0 +1,143 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
|
||||
__all__ = ['CascadeRCNN']
|
||||
|
||||
|
||||
@register
|
||||
class CascadeRCNN(BaseArch):
|
||||
"""
|
||||
Cascade R-CNN network, see https://arxiv.org/abs/1712.00726
|
||||
|
||||
Args:
|
||||
backbone (object): backbone instance
|
||||
rpn_head (object): `RPNHead` instance
|
||||
bbox_head (object): `BBoxHead` instance
|
||||
bbox_post_process (object): `BBoxPostProcess` instance
|
||||
neck (object): 'FPN' instance
|
||||
mask_head (object): `MaskHead` instance
|
||||
mask_post_process (object): `MaskPostProcess` instance
|
||||
"""
|
||||
__category__ = 'architecture'
|
||||
__inject__ = [
|
||||
'bbox_post_process',
|
||||
'mask_post_process',
|
||||
]
|
||||
|
||||
def __init__(self,
|
||||
backbone,
|
||||
rpn_head,
|
||||
bbox_head,
|
||||
bbox_post_process,
|
||||
neck=None,
|
||||
mask_head=None,
|
||||
mask_post_process=None):
|
||||
super(CascadeRCNN, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.rpn_head = rpn_head
|
||||
self.bbox_head = bbox_head
|
||||
self.bbox_post_process = bbox_post_process
|
||||
self.neck = neck
|
||||
self.mask_head = mask_head
|
||||
self.mask_post_process = mask_post_process
|
||||
self.with_mask = mask_head is not None
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
backbone = create(cfg['backbone'])
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = cfg['neck'] and create(cfg['neck'], **kwargs)
|
||||
|
||||
out_shape = neck and neck.out_shape or backbone.out_shape
|
||||
kwargs = {'input_shape': out_shape}
|
||||
rpn_head = create(cfg['rpn_head'], **kwargs)
|
||||
bbox_head = create(cfg['bbox_head'], **kwargs)
|
||||
|
||||
out_shape = neck and out_shape or bbox_head.get_head().out_shape
|
||||
kwargs = {'input_shape': out_shape}
|
||||
mask_head = cfg['mask_head'] and create(cfg['mask_head'], **kwargs)
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'neck': neck,
|
||||
"rpn_head": rpn_head,
|
||||
"bbox_head": bbox_head,
|
||||
"mask_head": mask_head,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
body_feats = self.backbone(self.inputs)
|
||||
if self.neck is not None:
|
||||
body_feats = self.neck(body_feats)
|
||||
|
||||
if self.training:
|
||||
rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs)
|
||||
bbox_loss, bbox_feat = self.bbox_head(body_feats, rois, rois_num,
|
||||
self.inputs)
|
||||
rois, rois_num = self.bbox_head.get_assigned_rois()
|
||||
bbox_targets = self.bbox_head.get_assigned_targets()
|
||||
if self.with_mask:
|
||||
mask_loss = self.mask_head(body_feats, rois, rois_num,
|
||||
self.inputs, bbox_targets, bbox_feat)
|
||||
return rpn_loss, bbox_loss, mask_loss
|
||||
else:
|
||||
return rpn_loss, bbox_loss, {}
|
||||
else:
|
||||
rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
|
||||
preds, _ = self.bbox_head(body_feats, rois, rois_num, self.inputs)
|
||||
refined_rois = self.bbox_head.get_refined_rois()
|
||||
|
||||
im_shape = self.inputs['im_shape']
|
||||
scale_factor = self.inputs['scale_factor']
|
||||
|
||||
bbox, bbox_num, nms_keep_idx = self.bbox_post_process(
|
||||
preds, (refined_rois, rois_num), im_shape, scale_factor)
|
||||
# rescale the prediction back to origin image
|
||||
bbox, bbox_pred, bbox_num = self.bbox_post_process.get_pred(
|
||||
bbox, bbox_num, im_shape, scale_factor)
|
||||
if not self.with_mask:
|
||||
return bbox_pred, bbox_num, None
|
||||
mask_out = self.mask_head(body_feats, bbox, bbox_num, self.inputs)
|
||||
origin_shape = self.bbox_post_process.get_origin_shape()
|
||||
mask_pred = self.mask_post_process(mask_out, bbox_pred, bbox_num,
|
||||
origin_shape)
|
||||
return bbox_pred, bbox_num, mask_pred
|
||||
|
||||
def get_loss(self, ):
|
||||
rpn_loss, bbox_loss, mask_loss = self._forward()
|
||||
loss = {}
|
||||
loss.update(rpn_loss)
|
||||
loss.update(bbox_loss)
|
||||
if self.with_mask:
|
||||
loss.update(mask_loss)
|
||||
total_loss = paddle.add_n(list(loss.values()))
|
||||
loss.update({'loss': total_loss})
|
||||
return loss
|
||||
|
||||
def get_pred(self):
|
||||
bbox_pred, bbox_num, mask_pred = self._forward()
|
||||
output = {
|
||||
'bbox': bbox_pred,
|
||||
'bbox_num': bbox_num,
|
||||
}
|
||||
if self.with_mask:
|
||||
output.update({'mask': mask_pred})
|
||||
return output
|
||||
103
paddle_detection/ppdet/modeling/architectures/centernet.py
Normal file
103
paddle_detection/ppdet/modeling/architectures/centernet.py
Normal file
@@ -0,0 +1,103 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
|
||||
__all__ = ['CenterNet']
|
||||
|
||||
|
||||
@register
|
||||
class CenterNet(BaseArch):
|
||||
"""
|
||||
CenterNet network, see http://arxiv.org/abs/1904.07850
|
||||
|
||||
Args:
|
||||
backbone (object): backbone instance
|
||||
neck (object): FPN instance, default use 'CenterNetDLAFPN'
|
||||
head (object): 'CenterNetHead' instance
|
||||
post_process (object): 'CenterNetPostProcess' instance
|
||||
for_mot (bool): whether return other features used in tracking model
|
||||
|
||||
"""
|
||||
__category__ = 'architecture'
|
||||
__inject__ = ['post_process']
|
||||
__shared__ = ['for_mot']
|
||||
|
||||
def __init__(self,
|
||||
backbone,
|
||||
neck='CenterNetDLAFPN',
|
||||
head='CenterNetHead',
|
||||
post_process='CenterNetPostProcess',
|
||||
for_mot=False):
|
||||
super(CenterNet, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.neck = neck
|
||||
self.head = head
|
||||
self.post_process = post_process
|
||||
self.for_mot = for_mot
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
backbone = create(cfg['backbone'])
|
||||
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = cfg['neck'] and create(cfg['neck'], **kwargs)
|
||||
|
||||
out_shape = neck and neck.out_shape or backbone.out_shape
|
||||
kwargs = {'input_shape': out_shape}
|
||||
head = create(cfg['head'], **kwargs)
|
||||
|
||||
return {'backbone': backbone, 'neck': neck, "head": head}
|
||||
|
||||
def _forward(self):
|
||||
neck_feat = self.backbone(self.inputs)
|
||||
if self.neck is not None:
|
||||
neck_feat = self.neck(neck_feat)
|
||||
head_out = self.head(neck_feat, self.inputs)
|
||||
if self.for_mot:
|
||||
head_out.update({'neck_feat': neck_feat})
|
||||
elif self.training:
|
||||
head_out['loss'] = head_out.pop('det_loss')
|
||||
return head_out
|
||||
|
||||
def get_pred(self):
|
||||
head_out = self._forward()
|
||||
bbox, bbox_num, bbox_inds, topk_clses, topk_ys, topk_xs = self.post_process(
|
||||
head_out['heatmap'],
|
||||
head_out['size'],
|
||||
head_out['offset'],
|
||||
im_shape=self.inputs['im_shape'],
|
||||
scale_factor=self.inputs['scale_factor'])
|
||||
|
||||
if self.for_mot:
|
||||
output = {
|
||||
"bbox": bbox,
|
||||
"bbox_num": bbox_num,
|
||||
"bbox_inds": bbox_inds,
|
||||
"topk_clses": topk_clses,
|
||||
"topk_ys": topk_ys,
|
||||
"topk_xs": topk_xs,
|
||||
"neck_feat": head_out['neck_feat']
|
||||
}
|
||||
else:
|
||||
output = {"bbox": bbox, "bbox_num": bbox_num}
|
||||
return output
|
||||
|
||||
def get_loss(self):
|
||||
return self._forward()
|
||||
176
paddle_detection/ppdet/modeling/architectures/centertrack.py
Normal file
176
paddle_detection/ppdet/modeling/architectures/centertrack.py
Normal file
@@ -0,0 +1,176 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import copy
|
||||
import math
|
||||
import numpy as np
|
||||
import paddle
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
|
||||
from ..keypoint_utils import affine_transform
|
||||
from ppdet.data.transform.op_helper import gaussian_radius, gaussian2D, draw_umich_gaussian
|
||||
|
||||
__all__ = ['CenterTrack']
|
||||
|
||||
|
||||
@register
|
||||
class CenterTrack(BaseArch):
|
||||
"""
|
||||
CenterTrack network, see http://arxiv.org/abs/2004.01177
|
||||
|
||||
Args:
|
||||
detector (object): 'CenterNet' instance
|
||||
plugin_head (object): 'CenterTrackHead' instance
|
||||
tracker (object): 'CenterTracker' instance
|
||||
"""
|
||||
__category__ = 'architecture'
|
||||
__shared__ = ['mot_metric']
|
||||
|
||||
def __init__(self,
|
||||
detector='CenterNet',
|
||||
plugin_head='CenterTrackHead',
|
||||
tracker='CenterTracker',
|
||||
mot_metric=False):
|
||||
super(CenterTrack, self).__init__()
|
||||
self.detector = detector
|
||||
self.plugin_head = plugin_head
|
||||
self.tracker = tracker
|
||||
self.mot_metric = mot_metric
|
||||
self.pre_image = None
|
||||
self.deploy = False
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
detector = create(cfg['detector'])
|
||||
detector_out_shape = detector.neck and detector.neck.out_shape or detector.backbone.out_shape
|
||||
|
||||
kwargs = {'input_shape': detector_out_shape}
|
||||
plugin_head = create(cfg['plugin_head'], **kwargs)
|
||||
tracker = create(cfg['tracker'])
|
||||
|
||||
return {
|
||||
'detector': detector,
|
||||
'plugin_head': plugin_head,
|
||||
'tracker': tracker,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
if self.training:
|
||||
det_outs = self.detector(self.inputs)
|
||||
neck_feat = det_outs['neck_feat']
|
||||
|
||||
losses = {}
|
||||
for k, v in det_outs.items():
|
||||
if 'loss' not in k: continue
|
||||
losses.update({k: v})
|
||||
|
||||
plugin_outs = self.plugin_head(neck_feat, self.inputs)
|
||||
for k, v in plugin_outs.items():
|
||||
if 'loss' not in k: continue
|
||||
losses.update({k: v})
|
||||
|
||||
losses['loss'] = det_outs['det_loss'] + plugin_outs['plugin_loss']
|
||||
return losses
|
||||
|
||||
else:
|
||||
if not self.mot_metric:
|
||||
# detection, support bs>=1
|
||||
det_outs = self.detector(self.inputs)
|
||||
return {
|
||||
'bbox': det_outs['bbox'],
|
||||
'bbox_num': det_outs['bbox_num']
|
||||
}
|
||||
|
||||
else:
|
||||
# MOT, only support bs=1
|
||||
if not self.deploy:
|
||||
if self.pre_image is None:
|
||||
self.pre_image = self.inputs['image']
|
||||
# initializing tracker for the first frame
|
||||
self.tracker.init_track([])
|
||||
self.inputs['pre_image'] = self.pre_image
|
||||
self.pre_image = self.inputs[
|
||||
'image'] # Note: update for next image
|
||||
|
||||
# render input heatmap from tracker status
|
||||
pre_hm = self.get_additional_inputs(
|
||||
self.tracker.tracks, self.inputs, with_hm=True)
|
||||
self.inputs['pre_hm'] = paddle.to_tensor(pre_hm)
|
||||
|
||||
# model inference
|
||||
det_outs = self.detector(self.inputs)
|
||||
neck_feat = det_outs['neck_feat']
|
||||
result = self.plugin_head(
|
||||
neck_feat, self.inputs, det_outs['bbox'],
|
||||
det_outs['bbox_inds'], det_outs['topk_clses'],
|
||||
det_outs['topk_ys'], det_outs['topk_xs'])
|
||||
|
||||
if not self.deploy:
|
||||
# convert the cropped and 4x downsampled output coordinate system
|
||||
# back to the input image coordinate system
|
||||
result = self.plugin_head.centertrack_post_process(
|
||||
result, self.inputs, self.tracker.out_thresh)
|
||||
return result
|
||||
|
||||
def get_pred(self):
|
||||
return self._forward()
|
||||
|
||||
def get_loss(self):
|
||||
return self._forward()
|
||||
|
||||
def reset_tracking(self):
|
||||
self.tracker.reset()
|
||||
self.pre_image = None
|
||||
|
||||
def get_additional_inputs(self, dets, meta, with_hm=True):
|
||||
# Render input heatmap from previous trackings.
|
||||
trans_input = meta['trans_input'][0].numpy()
|
||||
inp_width, inp_height = int(meta['inp_width'][0]), int(meta[
|
||||
'inp_height'][0])
|
||||
input_hm = np.zeros((1, inp_height, inp_width), dtype=np.float32)
|
||||
|
||||
for det in dets:
|
||||
if det['score'] < self.tracker.pre_thresh:
|
||||
continue
|
||||
bbox = affine_transform_bbox(det['bbox'], trans_input, inp_width,
|
||||
inp_height)
|
||||
h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
|
||||
if (h > 0 and w > 0):
|
||||
radius = gaussian_radius(
|
||||
(math.ceil(h), math.ceil(w)), min_overlap=0.7)
|
||||
radius = max(0, int(radius))
|
||||
ct = np.array(
|
||||
[(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
|
||||
dtype=np.float32)
|
||||
ct_int = ct.astype(np.int32)
|
||||
if with_hm:
|
||||
input_hm[0] = draw_umich_gaussian(input_hm[0], ct_int,
|
||||
radius)
|
||||
if with_hm:
|
||||
input_hm = input_hm[np.newaxis]
|
||||
return input_hm
|
||||
|
||||
|
||||
def affine_transform_bbox(bbox, trans, width, height):
|
||||
bbox = np.array(copy.deepcopy(bbox), dtype=np.float32)
|
||||
bbox[:2] = affine_transform(bbox[:2], trans)
|
||||
bbox[2:] = affine_transform(bbox[2:], trans)
|
||||
bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, width - 1)
|
||||
bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, height - 1)
|
||||
return bbox
|
||||
67
paddle_detection/ppdet/modeling/architectures/clrnet.py
Normal file
67
paddle_detection/ppdet/modeling/architectures/clrnet.py
Normal file
@@ -0,0 +1,67 @@
|
||||
from .meta_arch import BaseArch
|
||||
from ppdet.core.workspace import register, create
|
||||
from paddle import in_dynamic_mode
|
||||
|
||||
__all__ = ['CLRNet']
|
||||
|
||||
|
||||
@register
|
||||
class CLRNet(BaseArch):
|
||||
__category__ = 'architecture'
|
||||
|
||||
def __init__(self,
|
||||
backbone="CLRResNet",
|
||||
neck="CLRFPN",
|
||||
clr_head="CLRHead",
|
||||
post_process=None):
|
||||
super(CLRNet, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.neck = neck
|
||||
self.heads = clr_head
|
||||
self.post_process = post_process
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
# backbone
|
||||
backbone = create(cfg['backbone'])
|
||||
# fpn
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = create(cfg['neck'], **kwargs)
|
||||
# head
|
||||
kwargs = {'input_shape': neck.out_shape}
|
||||
clr_head = create(cfg['clr_head'], **kwargs)
|
||||
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'neck': neck,
|
||||
'clr_head': clr_head,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
# Backbone
|
||||
body_feats = self.backbone(self.inputs['image'])
|
||||
# neck
|
||||
neck_feats = self.neck(body_feats)
|
||||
# CRL Head
|
||||
|
||||
if self.training:
|
||||
output = self.heads(neck_feats, self.inputs)
|
||||
else:
|
||||
output = self.heads(neck_feats)
|
||||
output = {'lanes': output}
|
||||
# TODO: hard code fix as_lanes=False problem in clrnet_head.py "get_lanes" function for static mode
|
||||
if in_dynamic_mode():
|
||||
output = self.heads.get_lanes(output['lanes'])
|
||||
output = {
|
||||
"lanes": output,
|
||||
"img_path": self.inputs['full_img_path'],
|
||||
"img_name": self.inputs['img_name']
|
||||
}
|
||||
|
||||
return output
|
||||
|
||||
def get_loss(self):
|
||||
return self._forward()
|
||||
|
||||
def get_pred(self):
|
||||
return self._forward()
|
||||
70
paddle_detection/ppdet/modeling/architectures/deepsort.py
Normal file
70
paddle_detection/ppdet/modeling/architectures/deepsort.py
Normal file
@@ -0,0 +1,70 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
from ppdet.modeling.mot.utils import Detection, get_crops, scale_coords, clip_box
|
||||
|
||||
__all__ = ['DeepSORT']
|
||||
|
||||
|
||||
@register
|
||||
class DeepSORT(BaseArch):
|
||||
"""
|
||||
DeepSORT network, see https://arxiv.org/abs/1703.07402
|
||||
|
||||
Args:
|
||||
detector (object): detector model instance
|
||||
reid (object): reid model instance
|
||||
tracker (object): tracker instance
|
||||
"""
|
||||
__category__ = 'architecture'
|
||||
|
||||
def __init__(self,
|
||||
detector='YOLOv3',
|
||||
reid='PCBPyramid',
|
||||
tracker='DeepSORTTracker'):
|
||||
super(DeepSORT, self).__init__()
|
||||
self.detector = detector
|
||||
self.reid = reid
|
||||
self.tracker = tracker
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
if cfg['detector'] != 'None':
|
||||
detector = create(cfg['detector'])
|
||||
else:
|
||||
detector = None
|
||||
reid = create(cfg['reid'])
|
||||
tracker = create(cfg['tracker'])
|
||||
|
||||
return {
|
||||
"detector": detector,
|
||||
"reid": reid,
|
||||
"tracker": tracker,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
crops = self.inputs['crops']
|
||||
outs = {}
|
||||
outs['embeddings'] = self.reid(crops)
|
||||
return outs
|
||||
|
||||
def get_pred(self):
|
||||
return self._forward()
|
||||
118
paddle_detection/ppdet/modeling/architectures/detr.py
Normal file
118
paddle_detection/ppdet/modeling/architectures/detr.py
Normal file
@@ -0,0 +1,118 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from .meta_arch import BaseArch
|
||||
from ppdet.core.workspace import register, create
|
||||
|
||||
__all__ = ['DETR']
|
||||
# Deformable DETR, DINO use the same architecture as DETR
|
||||
|
||||
|
||||
@register
|
||||
class DETR(BaseArch):
|
||||
__category__ = 'architecture'
|
||||
__inject__ = ['post_process', 'post_process_semi']
|
||||
__shared__ = ['with_mask', 'exclude_post_process']
|
||||
|
||||
def __init__(self,
|
||||
backbone,
|
||||
transformer='DETRTransformer',
|
||||
detr_head='DETRHead',
|
||||
neck=None,
|
||||
post_process='DETRPostProcess',
|
||||
post_process_semi=None,
|
||||
with_mask=False,
|
||||
exclude_post_process=False):
|
||||
super(DETR, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.transformer = transformer
|
||||
self.detr_head = detr_head
|
||||
self.neck = neck
|
||||
self.post_process = post_process
|
||||
self.with_mask = with_mask
|
||||
self.exclude_post_process = exclude_post_process
|
||||
self.post_process_semi = post_process_semi
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
# backbone
|
||||
backbone = create(cfg['backbone'])
|
||||
# neck
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = create(cfg['neck'], **kwargs) if cfg['neck'] else None
|
||||
|
||||
# transformer
|
||||
if neck is not None:
|
||||
kwargs = {'input_shape': neck.out_shape}
|
||||
transformer = create(cfg['transformer'], **kwargs)
|
||||
# head
|
||||
kwargs = {
|
||||
'hidden_dim': transformer.hidden_dim,
|
||||
'nhead': transformer.nhead,
|
||||
'input_shape': backbone.out_shape
|
||||
}
|
||||
detr_head = create(cfg['detr_head'], **kwargs)
|
||||
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'transformer': transformer,
|
||||
"detr_head": detr_head,
|
||||
"neck": neck
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
# Backbone
|
||||
body_feats = self.backbone(self.inputs)
|
||||
|
||||
# Neck
|
||||
if self.neck is not None:
|
||||
body_feats = self.neck(body_feats)
|
||||
|
||||
# Transformer
|
||||
pad_mask = self.inputs.get('pad_mask', None)
|
||||
out_transformer = self.transformer(body_feats, pad_mask, self.inputs)
|
||||
|
||||
# DETR Head
|
||||
if self.training:
|
||||
detr_losses = self.detr_head(out_transformer, body_feats,
|
||||
self.inputs)
|
||||
detr_losses.update({
|
||||
'loss': paddle.add_n(
|
||||
[v for k, v in detr_losses.items() if 'log' not in k])
|
||||
})
|
||||
return detr_losses
|
||||
else:
|
||||
preds = self.detr_head(out_transformer, body_feats)
|
||||
if self.exclude_post_process:
|
||||
bbox, bbox_num, mask = preds
|
||||
else:
|
||||
bbox, bbox_num, mask = self.post_process(
|
||||
preds, self.inputs['im_shape'], self.inputs['scale_factor'],
|
||||
paddle.shape(self.inputs['image'])[2:])
|
||||
|
||||
output = {'bbox': bbox, 'bbox_num': bbox_num}
|
||||
if self.with_mask:
|
||||
output['mask'] = mask
|
||||
return output
|
||||
|
||||
def get_loss(self):
|
||||
return self._forward()
|
||||
|
||||
def get_pred(self):
|
||||
return self._forward()
|
||||
341
paddle_detection/ppdet/modeling/architectures/detr_ssod.py
Normal file
341
paddle_detection/ppdet/modeling/architectures/detr_ssod.py
Normal file
@@ -0,0 +1,341 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from ppdet.core.workspace import register, create, merge_config
|
||||
import paddle
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
import paddle.nn.functional as F
|
||||
from ppdet.core.workspace import register, create
|
||||
from ppdet.utils.logger import setup_logger
|
||||
from ppdet.modeling.ssod.utils import filter_invalid
|
||||
from .multi_stream_detector import MultiSteamDetector
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
__all__ = ['DETR_SSOD']
|
||||
__shared__ = ['num_classes']
|
||||
|
||||
|
||||
@register
|
||||
class DETR_SSOD(MultiSteamDetector):
|
||||
def __init__(self,
|
||||
teacher,
|
||||
student,
|
||||
train_cfg=None,
|
||||
test_cfg=None,
|
||||
RTDETRTransformer=None,
|
||||
num_classes=80):
|
||||
super(DETR_SSOD, self).__init__(
|
||||
dict(
|
||||
teacher=teacher, student=student),
|
||||
train_cfg=train_cfg,
|
||||
test_cfg=test_cfg, )
|
||||
self.ema_start_iters = train_cfg['ema_start_iters']
|
||||
self.momentum = 0.9996
|
||||
self.cls_thr = None
|
||||
self.cls_thr_ig = None
|
||||
self.num_classes = num_classes
|
||||
if train_cfg is not None:
|
||||
self.freeze("teacher")
|
||||
self.unsup_weight = self.train_cfg['unsup_weight']
|
||||
self.sup_weight = self.train_cfg['sup_weight']
|
||||
self._teacher = None
|
||||
self._student = None
|
||||
self._transformer = None
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg):
|
||||
teacher = create(cfg['teacher'])
|
||||
merge_config(cfg)
|
||||
student = create(cfg['student'])
|
||||
train_cfg = cfg['train_cfg']
|
||||
test_cfg = cfg['test_cfg']
|
||||
RTDETRTransformer = cfg['RTDETRTransformer']
|
||||
return {
|
||||
'teacher': teacher,
|
||||
'student': student,
|
||||
'train_cfg': train_cfg,
|
||||
'test_cfg': test_cfg,
|
||||
'RTDETRTransformer': RTDETRTransformer
|
||||
}
|
||||
|
||||
def forward_train(self, inputs, **kwargs):
|
||||
if isinstance(inputs, dict):
|
||||
iter_id = inputs['iter_id']
|
||||
elif isinstance(inputs, list):
|
||||
iter_id = inputs[-1]
|
||||
if iter_id == self.ema_start_iters:
|
||||
self.update_ema_model(momentum=0)
|
||||
elif iter_id > self.ema_start_iters:
|
||||
self.update_ema_model(momentum=self.momentum)
|
||||
if iter_id > self.ema_start_iters:
|
||||
data_sup_w, data_sup_s, data_unsup_w, data_unsup_s, _ = inputs
|
||||
|
||||
if data_sup_w['image'].shape != data_sup_s['image'].shape:
|
||||
data_sup_w, data_sup_s = align_weak_strong_shape(data_sup_w,
|
||||
data_sup_s)
|
||||
|
||||
if 'gt_bbox' in data_unsup_s.keys():
|
||||
del data_unsup_s['gt_bbox']
|
||||
if 'gt_class' in data_unsup_s.keys():
|
||||
del data_unsup_s['gt_class']
|
||||
if 'gt_class' in data_unsup_w.keys():
|
||||
del data_unsup_w['gt_class']
|
||||
if 'gt_bbox' in data_unsup_w.keys():
|
||||
del data_unsup_w['gt_bbox']
|
||||
for k, v in data_sup_s.items():
|
||||
if k in ['epoch_id']:
|
||||
continue
|
||||
elif k in ['gt_class', 'gt_bbox', 'is_crowd']:
|
||||
data_sup_s[k].extend(data_sup_w[k])
|
||||
else:
|
||||
data_sup_s[k] = paddle.concat([v, data_sup_w[k]])
|
||||
|
||||
loss = {}
|
||||
body_feats = self.student.backbone(data_sup_s)
|
||||
if self.student.neck is not None:
|
||||
body_feats = self.student.neck(body_feats)
|
||||
out_transformer = self.student.transformer(body_feats, None,
|
||||
data_sup_s)
|
||||
sup_loss = self.student.detr_head(out_transformer, body_feats,
|
||||
data_sup_s)
|
||||
sup_loss.update({
|
||||
'loss': paddle.add_n(
|
||||
[v for k, v in sup_loss.items() if 'log' not in k])
|
||||
})
|
||||
sup_loss = {"sup_" + k: v for k, v in sup_loss.items()}
|
||||
|
||||
loss.update(**sup_loss)
|
||||
unsup_loss = self.foward_unsup_train(data_unsup_w, data_unsup_s)
|
||||
unsup_loss.update({
|
||||
'loss': paddle.add_n(
|
||||
[v for k, v in unsup_loss.items() if 'log' not in k])
|
||||
})
|
||||
unsup_loss = {"unsup_" + k: v for k, v in unsup_loss.items()}
|
||||
unsup_loss.update({
|
||||
'loss': paddle.add_n(
|
||||
[v for k, v in unsup_loss.items() if 'log' not in k])
|
||||
})
|
||||
loss.update(**unsup_loss)
|
||||
loss.update({'loss': loss['sup_loss'] + loss['unsup_loss']})
|
||||
else:
|
||||
if iter_id == self.ema_start_iters:
|
||||
logger.info("start semi_supervised_traing")
|
||||
data_sup_w, data_sup_s, data_unsup_w, data_unsup_s, _ = inputs
|
||||
|
||||
if data_sup_w['image'].shape != data_sup_s['image'].shape:
|
||||
data_sup_w, data_sup_s = align_weak_strong_shape(data_sup_w,
|
||||
data_sup_s)
|
||||
for k, v in data_sup_s.items():
|
||||
if k in ['epoch_id']:
|
||||
continue
|
||||
elif k in ['gt_class', 'gt_bbox', 'is_crowd']:
|
||||
data_sup_s[k].extend(data_sup_w[k])
|
||||
else:
|
||||
data_sup_s[k] = paddle.concat([v, data_sup_w[k]])
|
||||
loss = {}
|
||||
sup_loss = self.student(data_sup_s)
|
||||
unsup_loss = {
|
||||
"unsup_" + k: v * paddle.to_tensor(0)
|
||||
for k, v in sup_loss.items()
|
||||
}
|
||||
sup_loss = {"sup_" + k: v for k, v in sup_loss.items()}
|
||||
loss.update(**sup_loss)
|
||||
unsup_loss.update({
|
||||
'loss': paddle.add_n(
|
||||
[v * 0 for k, v in sup_loss.items() if 'log' not in k])
|
||||
})
|
||||
unsup_loss = {"unsup_" + k: v * 0 for k, v in unsup_loss.items()}
|
||||
loss.update(**unsup_loss)
|
||||
loss.update({'loss': loss['sup_loss']})
|
||||
return loss
|
||||
|
||||
def foward_unsup_train(self, data_unsup_w, data_unsup_s):
|
||||
|
||||
with paddle.no_grad():
|
||||
body_feats = self.teacher.backbone(data_unsup_w)
|
||||
if self.teacher.neck is not None:
|
||||
body_feats = self.teacher.neck(body_feats, is_teacher=True)
|
||||
out_transformer = self.teacher.transformer(
|
||||
body_feats, None, data_unsup_w, is_teacher=True)
|
||||
preds = self.teacher.detr_head(out_transformer, body_feats)
|
||||
bbox, bbox_num = self.teacher.post_process_semi(preds)
|
||||
self.place = body_feats[0].place
|
||||
|
||||
proposal_bbox_list = bbox[:, -4:]
|
||||
proposal_bbox_list = proposal_bbox_list.split(
|
||||
tuple(np.array(bbox_num)), 0)
|
||||
|
||||
proposal_label_list = paddle.cast(bbox[:, :1], np.float32)
|
||||
proposal_label_list = proposal_label_list.split(
|
||||
tuple(np.array(bbox_num)), 0)
|
||||
proposal_score_list = paddle.cast(bbox[:, 1:self.num_classes + 1],
|
||||
np.float32)
|
||||
proposal_score_list = proposal_score_list.split(
|
||||
tuple(np.array(bbox_num)), 0)
|
||||
proposal_bbox_list = [
|
||||
paddle.to_tensor(
|
||||
p, place=self.place) for p in proposal_bbox_list
|
||||
]
|
||||
proposal_label_list = [
|
||||
paddle.to_tensor(
|
||||
p, place=self.place) for p in proposal_label_list
|
||||
]
|
||||
# filter invalid box roughly
|
||||
if isinstance(self.train_cfg['pseudo_label_initial_score_thr'], float):
|
||||
thr = self.train_cfg['pseudo_label_initial_score_thr']
|
||||
else:
|
||||
# TODO: use dynamic threshold
|
||||
raise NotImplementedError(
|
||||
"Dynamic Threshold is not implemented yet.")
|
||||
proposal_bbox_list, proposal_label_list, proposal_score_list = list(
|
||||
zip(* [
|
||||
filter_invalid(
|
||||
proposal[:, :4],
|
||||
proposal_label,
|
||||
proposal_score,
|
||||
thr=thr,
|
||||
min_size=self.train_cfg['min_pseduo_box_size'], )
|
||||
for proposal, proposal_label, proposal_score in
|
||||
zip(proposal_bbox_list, proposal_label_list,
|
||||
proposal_score_list)
|
||||
]))
|
||||
|
||||
teacher_bboxes = list(proposal_bbox_list)
|
||||
teacher_labels = proposal_label_list
|
||||
teacher_info = [teacher_bboxes, teacher_labels]
|
||||
student_unsup = data_unsup_s
|
||||
return self.compute_pseudo_label_loss(student_unsup, teacher_info,
|
||||
proposal_score_list)
|
||||
|
||||
def compute_pseudo_label_loss(self, student_unsup, teacher_info,
|
||||
proposal_score_list):
|
||||
|
||||
pseudo_bboxes = list(teacher_info[0])
|
||||
pseudo_labels = list(teacher_info[1])
|
||||
losses = dict()
|
||||
for i in range(len(pseudo_bboxes)):
|
||||
if pseudo_labels[i].shape[0] == 0:
|
||||
pseudo_bboxes[i] = paddle.zeros([0, 4]).numpy()
|
||||
pseudo_labels[i] = paddle.zeros([0, 1]).numpy()
|
||||
else:
|
||||
pseudo_bboxes[i] = pseudo_bboxes[i][:, :4].numpy()
|
||||
pseudo_labels[i] = pseudo_labels[i].numpy()
|
||||
for i in range(len(pseudo_bboxes)):
|
||||
pseudo_labels[i] = paddle.to_tensor(
|
||||
pseudo_labels[i], dtype=paddle.int32, place=self.place)
|
||||
pseudo_bboxes[i] = paddle.to_tensor(
|
||||
pseudo_bboxes[i], dtype=paddle.float32, place=self.place)
|
||||
student_unsup.update({
|
||||
'gt_bbox': pseudo_bboxes,
|
||||
'gt_class': pseudo_labels
|
||||
})
|
||||
pseudo_sum = 0
|
||||
for i in range(len(pseudo_bboxes)):
|
||||
pseudo_sum += pseudo_bboxes[i].sum()
|
||||
if pseudo_sum == 0: #input fake data when there are no pseudo labels
|
||||
pseudo_bboxes[0] = paddle.ones([1, 4]) - 0.5
|
||||
pseudo_labels[0] = paddle.ones([1, 1]).astype('int32')
|
||||
student_unsup.update({
|
||||
'gt_bbox': pseudo_bboxes,
|
||||
'gt_class': pseudo_labels
|
||||
})
|
||||
body_feats = self.student.backbone(student_unsup)
|
||||
if self.student.neck is not None:
|
||||
body_feats = self.student.neck(body_feats)
|
||||
out_transformer = self.student.transformer(body_feats, None,
|
||||
student_unsup)
|
||||
losses = self.student.detr_head(out_transformer, body_feats,
|
||||
student_unsup)
|
||||
for n, v in losses.items():
|
||||
losses[n] = v * 0
|
||||
else:
|
||||
gt_bbox = []
|
||||
gt_class = []
|
||||
images = []
|
||||
proposal_score = []
|
||||
for i in range(len(pseudo_bboxes)):
|
||||
if pseudo_labels[i].shape[0] == 0:
|
||||
continue
|
||||
else:
|
||||
proposal_score.append(proposal_score_list[i].max(-1)
|
||||
.unsqueeze(-1))
|
||||
gt_class.append(pseudo_labels[i])
|
||||
gt_bbox.append(pseudo_bboxes[i])
|
||||
images.append(student_unsup['image'][i])
|
||||
images = paddle.stack(images)
|
||||
student_unsup.update({
|
||||
'image': images,
|
||||
'gt_bbox': gt_bbox,
|
||||
'gt_class': gt_class
|
||||
})
|
||||
body_feats = self.student.backbone(student_unsup)
|
||||
if self.student.neck is not None:
|
||||
body_feats = self.student.neck(body_feats)
|
||||
out_transformer = self.student.transformer(body_feats, None,
|
||||
student_unsup)
|
||||
student_unsup.update({'gt_score': proposal_score})
|
||||
losses = self.student.detr_head(out_transformer, body_feats,
|
||||
student_unsup)
|
||||
return losses
|
||||
|
||||
|
||||
def box_cxcywh_to_xyxy(x):
|
||||
x_c, y_c, w, h = x.unbind(-1)
|
||||
b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (x_c + 0.5 * w), (y_c + 0.5 * h)]
|
||||
return paddle.stack(b, axis=-1)
|
||||
|
||||
|
||||
def box_xyxy_to_cxcywh(x):
|
||||
x0, y0, x1, y1 = x.unbind(-1)
|
||||
b = [(x0 + x1) / 2, (y0 + y1) / 2, (x1 - x0), (y1 - y0)]
|
||||
return paddle.stack(b, axis=-1)
|
||||
|
||||
|
||||
def get_size_with_aspect_ratio(image_size, size, max_size=None):
|
||||
w, h = image_size
|
||||
if max_size is not None:
|
||||
min_original_size = float(min((w, h)))
|
||||
max_original_size = float(max((w, h)))
|
||||
if max_original_size / min_original_size * size > max_size:
|
||||
size = int(round(max_size * min_original_size / max_original_size))
|
||||
|
||||
if (w <= h and w == size) or (h <= w and h == size):
|
||||
return (w, h)
|
||||
|
||||
if w < h:
|
||||
ow = size
|
||||
oh = int(size * h / w)
|
||||
else:
|
||||
oh = size
|
||||
ow = int(size * w / h)
|
||||
|
||||
return (ow, oh)
|
||||
|
||||
|
||||
def align_weak_strong_shape(data_weak, data_strong):
|
||||
shape_x = data_strong['image'].shape[2]
|
||||
shape_y = data_strong['image'].shape[3]
|
||||
|
||||
target_size = [shape_x, shape_y]
|
||||
data_weak['image'] = F.interpolate(
|
||||
data_weak['image'],
|
||||
size=target_size,
|
||||
mode='bilinear',
|
||||
align_corners=False)
|
||||
return data_weak, data_strong
|
||||
100
paddle_detection/ppdet/modeling/architectures/fairmot.py
Normal file
100
paddle_detection/ppdet/modeling/architectures/fairmot.py
Normal file
@@ -0,0 +1,100 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
|
||||
__all__ = ['FairMOT']
|
||||
|
||||
|
||||
@register
|
||||
class FairMOT(BaseArch):
|
||||
"""
|
||||
FairMOT network, see http://arxiv.org/abs/2004.01888
|
||||
|
||||
Args:
|
||||
detector (object): 'CenterNet' instance
|
||||
reid (object): 'FairMOTEmbeddingHead' instance
|
||||
tracker (object): 'JDETracker' instance
|
||||
loss (object): 'FairMOTLoss' instance
|
||||
|
||||
"""
|
||||
|
||||
__category__ = 'architecture'
|
||||
__inject__ = ['loss']
|
||||
|
||||
def __init__(self,
|
||||
detector='CenterNet',
|
||||
reid='FairMOTEmbeddingHead',
|
||||
tracker='JDETracker',
|
||||
loss='FairMOTLoss'):
|
||||
super(FairMOT, self).__init__()
|
||||
self.detector = detector
|
||||
self.reid = reid
|
||||
self.tracker = tracker
|
||||
self.loss = loss
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
detector = create(cfg['detector'])
|
||||
detector_out_shape = detector.neck and detector.neck.out_shape or detector.backbone.out_shape
|
||||
|
||||
kwargs = {'input_shape': detector_out_shape}
|
||||
reid = create(cfg['reid'], **kwargs)
|
||||
loss = create(cfg['loss'])
|
||||
tracker = create(cfg['tracker'])
|
||||
|
||||
return {
|
||||
'detector': detector,
|
||||
'reid': reid,
|
||||
'loss': loss,
|
||||
'tracker': tracker
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
loss = dict()
|
||||
# det_outs keys:
|
||||
# train: neck_feat, det_loss, heatmap_loss, size_loss, offset_loss (optional: iou_loss)
|
||||
# eval/infer: neck_feat, bbox, bbox_inds
|
||||
det_outs = self.detector(self.inputs)
|
||||
neck_feat = det_outs['neck_feat']
|
||||
if self.training:
|
||||
reid_loss = self.reid(neck_feat, self.inputs)
|
||||
|
||||
det_loss = det_outs['det_loss']
|
||||
loss = self.loss(det_loss, reid_loss)
|
||||
for k, v in det_outs.items():
|
||||
if 'loss' not in k:
|
||||
continue
|
||||
loss.update({k: v})
|
||||
loss.update({'reid_loss': reid_loss})
|
||||
return loss
|
||||
else:
|
||||
pred_dets, pred_embs = self.reid(
|
||||
neck_feat, self.inputs, det_outs['bbox'], det_outs['bbox_inds'],
|
||||
det_outs['topk_clses'])
|
||||
return pred_dets, pred_embs
|
||||
|
||||
def get_pred(self):
|
||||
output = self._forward()
|
||||
return output
|
||||
|
||||
def get_loss(self):
|
||||
loss = self._forward()
|
||||
return loss
|
||||
167
paddle_detection/ppdet/modeling/architectures/faster_rcnn.py
Normal file
167
paddle_detection/ppdet/modeling/architectures/faster_rcnn.py
Normal file
@@ -0,0 +1,167 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
import numpy as np
|
||||
|
||||
__all__ = ['FasterRCNN']
|
||||
|
||||
|
||||
@register
|
||||
class FasterRCNN(BaseArch):
|
||||
"""
|
||||
Faster R-CNN network, see https://arxiv.org/abs/1506.01497
|
||||
|
||||
Args:
|
||||
backbone (object): backbone instance
|
||||
rpn_head (object): `RPNHead` instance
|
||||
bbox_head (object): `BBoxHead` instance
|
||||
bbox_post_process (object): `BBoxPostProcess` instance
|
||||
neck (object): 'FPN' instance
|
||||
"""
|
||||
__category__ = 'architecture'
|
||||
__inject__ = ['bbox_post_process']
|
||||
|
||||
def __init__(self,
|
||||
backbone,
|
||||
rpn_head,
|
||||
bbox_head,
|
||||
bbox_post_process,
|
||||
neck=None):
|
||||
super(FasterRCNN, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.neck = neck
|
||||
self.rpn_head = rpn_head
|
||||
self.bbox_head = bbox_head
|
||||
self.bbox_post_process = bbox_post_process
|
||||
|
||||
def init_cot_head(self, relationship):
|
||||
self.bbox_head.init_cot_head(relationship)
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
backbone = create(cfg['backbone'])
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = cfg['neck'] and create(cfg['neck'], **kwargs)
|
||||
|
||||
out_shape = neck and neck.out_shape or backbone.out_shape
|
||||
kwargs = {'input_shape': out_shape}
|
||||
rpn_head = create(cfg['rpn_head'], **kwargs)
|
||||
bbox_head = create(cfg['bbox_head'], **kwargs)
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'neck': neck,
|
||||
"rpn_head": rpn_head,
|
||||
"bbox_head": bbox_head,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
body_feats = self.backbone(self.inputs)
|
||||
if self.neck is not None:
|
||||
body_feats = self.neck(body_feats)
|
||||
if self.training:
|
||||
rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs)
|
||||
bbox_loss, _ = self.bbox_head(body_feats, rois, rois_num,
|
||||
self.inputs)
|
||||
return rpn_loss, bbox_loss
|
||||
else:
|
||||
rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
|
||||
preds, _ = self.bbox_head(body_feats, rois, rois_num, None)
|
||||
im_shape = self.inputs['im_shape']
|
||||
scale_factor = self.inputs['scale_factor']
|
||||
bbox, bbox_num, nms_keep_idx = self.bbox_post_process(
|
||||
preds, (rois, rois_num), im_shape, scale_factor)
|
||||
|
||||
# rescale the prediction back to origin image
|
||||
bboxes, bbox_pred, bbox_num = self.bbox_post_process.get_pred(
|
||||
bbox, bbox_num, im_shape, scale_factor)
|
||||
|
||||
if self.use_extra_data:
|
||||
extra_data = {
|
||||
} # record the bbox output before nms, such like scores and nms_keep_idx
|
||||
"""extra_data:{
|
||||
'scores': predict scores,
|
||||
'nms_keep_idx': bbox index before nms,
|
||||
}
|
||||
"""
|
||||
extra_data['scores'] = preds[1] # predict scores (probability)
|
||||
# Todo: get logits output
|
||||
extra_data[
|
||||
'nms_keep_idx'] = nms_keep_idx # bbox index before nms
|
||||
return bbox_pred, bbox_num, extra_data
|
||||
else:
|
||||
return bbox_pred, bbox_num
|
||||
|
||||
def get_loss(self, ):
|
||||
rpn_loss, bbox_loss = self._forward()
|
||||
loss = {}
|
||||
loss.update(rpn_loss)
|
||||
loss.update(bbox_loss)
|
||||
total_loss = paddle.add_n(list(loss.values()))
|
||||
loss.update({'loss': total_loss})
|
||||
return loss
|
||||
|
||||
def get_pred(self):
|
||||
if self.use_extra_data:
|
||||
bbox_pred, bbox_num, extra_data = self._forward()
|
||||
output = {
|
||||
'bbox': bbox_pred,
|
||||
'bbox_num': bbox_num,
|
||||
'extra_data': extra_data
|
||||
}
|
||||
else:
|
||||
bbox_pred, bbox_num = self._forward()
|
||||
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
|
||||
return output
|
||||
|
||||
def target_bbox_forward(self, data):
|
||||
body_feats = self.backbone(data)
|
||||
if self.neck is not None:
|
||||
body_feats = self.neck(body_feats)
|
||||
rois = [roi for roi in data['gt_bbox']]
|
||||
rois_num = paddle.concat([paddle.shape(roi)[0:1] for roi in rois])
|
||||
|
||||
preds, _ = self.bbox_head(body_feats, rois, rois_num, None, cot=True)
|
||||
return preds
|
||||
|
||||
def relationship_learning(self, loader, num_classes_novel):
|
||||
print('computing relationship')
|
||||
train_labels_list = []
|
||||
label_list = []
|
||||
|
||||
for step_id, data in enumerate(loader):
|
||||
_, bbox_prob = self.target_bbox_forward(data)
|
||||
batch_size = data['im_id'].shape[0]
|
||||
for i in range(batch_size):
|
||||
num_bbox = data['gt_class'][i].shape[0]
|
||||
train_labels = data['gt_class'][i]
|
||||
train_labels_list.append(train_labels.numpy().squeeze(1))
|
||||
base_labels = bbox_prob.detach().numpy()[:, :-1]
|
||||
label_list.append(base_labels)
|
||||
|
||||
labels = np.concatenate(train_labels_list, 0)
|
||||
probabilities = np.concatenate(label_list, 0)
|
||||
N_t = np.max(labels) + 1
|
||||
conditional = []
|
||||
for i in range(N_t):
|
||||
this_class = probabilities[labels == i]
|
||||
average = np.mean(this_class, axis=0, keepdims=True)
|
||||
conditional.append(average)
|
||||
return np.concatenate(conditional)
|
||||
222
paddle_detection/ppdet/modeling/architectures/fcos.py
Normal file
222
paddle_detection/ppdet/modeling/architectures/fcos.py
Normal file
@@ -0,0 +1,222 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
|
||||
__all__ = ['FCOS', 'ARSL_FCOS']
|
||||
|
||||
|
||||
@register
|
||||
class FCOS(BaseArch):
|
||||
"""
|
||||
FCOS network, see https://arxiv.org/abs/1904.01355
|
||||
|
||||
Args:
|
||||
backbone (object): backbone instance
|
||||
neck (object): 'FPN' instance
|
||||
fcos_head (object): 'FCOSHead' instance
|
||||
ssod_loss (object): 'SSODFCOSLoss' instance, only used for semi-det(ssod) by DenseTeacher
|
||||
"""
|
||||
|
||||
__category__ = 'architecture'
|
||||
__inject__ = ['ssod_loss']
|
||||
|
||||
def __init__(self,
|
||||
backbone='ResNet',
|
||||
neck='FPN',
|
||||
fcos_head='FCOSHead',
|
||||
ssod_loss='SSODFCOSLoss'):
|
||||
super(FCOS, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.neck = neck
|
||||
self.fcos_head = fcos_head
|
||||
|
||||
# for ssod, semi-det
|
||||
self.is_teacher = False
|
||||
self.ssod_loss = ssod_loss
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
backbone = create(cfg['backbone'])
|
||||
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = create(cfg['neck'], **kwargs)
|
||||
|
||||
kwargs = {'input_shape': neck.out_shape}
|
||||
fcos_head = create(cfg['fcos_head'], **kwargs)
|
||||
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'neck': neck,
|
||||
"fcos_head": fcos_head,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
body_feats = self.backbone(self.inputs)
|
||||
fpn_feats = self.neck(body_feats)
|
||||
|
||||
self.is_teacher = self.inputs.get('is_teacher', False)
|
||||
if self.training or self.is_teacher:
|
||||
losses = self.fcos_head(fpn_feats, self.inputs)
|
||||
return losses
|
||||
else:
|
||||
fcos_head_outs = self.fcos_head(fpn_feats)
|
||||
bbox_pred, bbox_num = self.fcos_head.post_process(
|
||||
fcos_head_outs, self.inputs['scale_factor'])
|
||||
return {'bbox': bbox_pred, 'bbox_num': bbox_num}
|
||||
|
||||
def get_loss(self):
|
||||
return self._forward()
|
||||
|
||||
def get_pred(self):
|
||||
return self._forward()
|
||||
|
||||
def get_loss_keys(self):
|
||||
return ['loss_cls', 'loss_box', 'loss_quality']
|
||||
|
||||
def get_ssod_loss(self, student_head_outs, teacher_head_outs, train_cfg):
|
||||
ssod_losses = self.ssod_loss(student_head_outs, teacher_head_outs,
|
||||
train_cfg)
|
||||
return ssod_losses
|
||||
|
||||
|
||||
@register
|
||||
class ARSL_FCOS(BaseArch):
|
||||
"""
|
||||
FCOS ARSL network, see https://arxiv.org/abs/
|
||||
|
||||
Args:
|
||||
backbone (object): backbone instance
|
||||
neck (object): 'FPN' instance
|
||||
fcos_head (object): 'FCOSHead_ARSL' instance
|
||||
fcos_cr_loss (object): 'FCOSLossCR' instance, only used for semi-det(ssod) by ARSL
|
||||
"""
|
||||
|
||||
__category__ = 'architecture'
|
||||
__inject__ = ['fcos_cr_loss']
|
||||
|
||||
def __init__(self,
|
||||
backbone,
|
||||
neck,
|
||||
fcos_head='FCOSHead_ARSL',
|
||||
fcos_cr_loss='FCOSLossCR'):
|
||||
super(ARSL_FCOS, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.neck = neck
|
||||
self.fcos_head = fcos_head
|
||||
self.fcos_cr_loss = fcos_cr_loss
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
backbone = create(cfg['backbone'])
|
||||
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = create(cfg['neck'], **kwargs)
|
||||
|
||||
kwargs = {'input_shape': neck.out_shape}
|
||||
fcos_head = create(cfg['fcos_head'], **kwargs)
|
||||
|
||||
# consistency regularization loss
|
||||
fcos_cr_loss = create(cfg['fcos_cr_loss'])
|
||||
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'neck': neck,
|
||||
'fcos_head': fcos_head,
|
||||
'fcos_cr_loss': fcos_cr_loss,
|
||||
}
|
||||
|
||||
def forward(self, inputs, branch="supervised", teacher_prediction=None):
|
||||
assert branch in ['supervised', 'semi_supervised'], \
|
||||
print('In ARSL, type must be supervised or semi_supervised.')
|
||||
|
||||
if self.data_format == 'NHWC':
|
||||
image = inputs['image']
|
||||
inputs['image'] = paddle.transpose(image, [0, 2, 3, 1])
|
||||
self.inputs = inputs
|
||||
|
||||
if self.training:
|
||||
if branch == "supervised":
|
||||
out = self.get_loss()
|
||||
else:
|
||||
out = self.get_pseudo_loss(teacher_prediction)
|
||||
else:
|
||||
# norm test
|
||||
if branch == "supervised":
|
||||
out = self.get_pred()
|
||||
# predict pseudo labels
|
||||
else:
|
||||
out = self.get_pseudo_pred()
|
||||
return out
|
||||
|
||||
# model forward
|
||||
def model_forward(self):
|
||||
body_feats = self.backbone(self.inputs)
|
||||
fpn_feats = self.neck(body_feats)
|
||||
fcos_head_outs = self.fcos_head(fpn_feats)
|
||||
return fcos_head_outs
|
||||
|
||||
# supervised loss for labeled data
|
||||
def get_loss(self):
|
||||
loss = {}
|
||||
tag_labels, tag_bboxes, tag_centerness = [], [], []
|
||||
for i in range(len(self.fcos_head.fpn_stride)):
|
||||
# labels, reg_target, centerness
|
||||
k_lbl = 'labels{}'.format(i)
|
||||
if k_lbl in self.inputs:
|
||||
tag_labels.append(self.inputs[k_lbl])
|
||||
k_box = 'reg_target{}'.format(i)
|
||||
if k_box in self.inputs:
|
||||
tag_bboxes.append(self.inputs[k_box])
|
||||
k_ctn = 'centerness{}'.format(i)
|
||||
if k_ctn in self.inputs:
|
||||
tag_centerness.append(self.inputs[k_ctn])
|
||||
fcos_head_outs = self.model_forward()
|
||||
loss_fcos = self.fcos_head.get_loss(fcos_head_outs, tag_labels,
|
||||
tag_bboxes, tag_centerness)
|
||||
loss.update(loss_fcos)
|
||||
return loss
|
||||
|
||||
# unsupervised loss for unlabeled data
|
||||
def get_pseudo_loss(self, teacher_prediction):
|
||||
loss = {}
|
||||
fcos_head_outs = self.model_forward()
|
||||
unsup_loss = self.fcos_cr_loss(fcos_head_outs, teacher_prediction)
|
||||
for k in unsup_loss.keys():
|
||||
loss[k + '_pseudo'] = unsup_loss[k]
|
||||
return loss
|
||||
|
||||
# get detection results for test, decode and rescale the results to original size
|
||||
def get_pred(self):
|
||||
fcos_head_outs = self.model_forward()
|
||||
scale_factor = self.inputs['scale_factor']
|
||||
bbox_pred, bbox_num = self.fcos_head.post_process(fcos_head_outs,
|
||||
scale_factor)
|
||||
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
|
||||
return output
|
||||
|
||||
# generate pseudo labels to guide student
|
||||
def get_pseudo_pred(self):
|
||||
fcos_head_outs = self.model_forward()
|
||||
pred_cls, pred_loc, pred_iou = fcos_head_outs[1:] # 0 is locations
|
||||
for lvl, _ in enumerate(pred_loc):
|
||||
pred_loc[lvl] = pred_loc[lvl] / self.fcos_head.fpn_stride[lvl]
|
||||
|
||||
return [pred_cls, pred_loc, pred_iou, self.fcos_head.fpn_stride]
|
||||
87
paddle_detection/ppdet/modeling/architectures/gfl.py
Normal file
87
paddle_detection/ppdet/modeling/architectures/gfl.py
Normal file
@@ -0,0 +1,87 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
|
||||
__all__ = ['GFL']
|
||||
|
||||
|
||||
@register
|
||||
class GFL(BaseArch):
|
||||
"""
|
||||
Generalized Focal Loss network, see https://arxiv.org/abs/2006.04388
|
||||
|
||||
Args:
|
||||
backbone (object): backbone instance
|
||||
neck (object): 'FPN' instance
|
||||
head (object): 'GFLHead' instance
|
||||
"""
|
||||
|
||||
__category__ = 'architecture'
|
||||
|
||||
def __init__(self, backbone, neck, head='GFLHead'):
|
||||
super(GFL, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.neck = neck
|
||||
self.head = head
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
backbone = create(cfg['backbone'])
|
||||
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = create(cfg['neck'], **kwargs)
|
||||
|
||||
kwargs = {'input_shape': neck.out_shape}
|
||||
head = create(cfg['head'], **kwargs)
|
||||
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'neck': neck,
|
||||
"head": head,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
body_feats = self.backbone(self.inputs)
|
||||
fpn_feats = self.neck(body_feats)
|
||||
head_outs = self.head(fpn_feats)
|
||||
if not self.training:
|
||||
im_shape = self.inputs['im_shape']
|
||||
scale_factor = self.inputs['scale_factor']
|
||||
bboxes, bbox_num = self.head.post_process(head_outs, im_shape,
|
||||
scale_factor)
|
||||
return bboxes, bbox_num
|
||||
else:
|
||||
return head_outs
|
||||
|
||||
def get_loss(self, ):
|
||||
loss = {}
|
||||
|
||||
head_outs = self._forward()
|
||||
loss_gfl = self.head.get_loss(head_outs, self.inputs)
|
||||
loss.update(loss_gfl)
|
||||
total_loss = paddle.add_n(list(loss.values()))
|
||||
loss.update({'loss': total_loss})
|
||||
return loss
|
||||
|
||||
def get_pred(self):
|
||||
bbox_pred, bbox_num = self._forward()
|
||||
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
|
||||
return output
|
||||
110
paddle_detection/ppdet/modeling/architectures/jde.py
Normal file
110
paddle_detection/ppdet/modeling/architectures/jde.py
Normal file
@@ -0,0 +1,110 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
|
||||
__all__ = ['JDE']
|
||||
|
||||
|
||||
@register
|
||||
class JDE(BaseArch):
|
||||
__category__ = 'architecture'
|
||||
__shared__ = ['metric']
|
||||
"""
|
||||
JDE network, see https://arxiv.org/abs/1909.12605v1
|
||||
|
||||
Args:
|
||||
detector (object): detector model instance
|
||||
reid (object): reid model instance
|
||||
tracker (object): tracker instance
|
||||
metric (str): 'MOTDet' for training and detection evaluation, 'ReID'
|
||||
for ReID embedding evaluation, or 'MOT' for multi object tracking
|
||||
evaluation.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
detector='YOLOv3',
|
||||
reid='JDEEmbeddingHead',
|
||||
tracker='JDETracker',
|
||||
metric='MOT'):
|
||||
super(JDE, self).__init__()
|
||||
self.detector = detector
|
||||
self.reid = reid
|
||||
self.tracker = tracker
|
||||
self.metric = metric
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
detector = create(cfg['detector'])
|
||||
kwargs = {'input_shape': detector.neck.out_shape}
|
||||
|
||||
reid = create(cfg['reid'], **kwargs)
|
||||
|
||||
tracker = create(cfg['tracker'])
|
||||
|
||||
return {
|
||||
"detector": detector,
|
||||
"reid": reid,
|
||||
"tracker": tracker,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
det_outs = self.detector(self.inputs)
|
||||
|
||||
if self.training:
|
||||
emb_feats = det_outs['emb_feats']
|
||||
loss_confs = det_outs['det_losses']['loss_confs']
|
||||
loss_boxes = det_outs['det_losses']['loss_boxes']
|
||||
jde_losses = self.reid(
|
||||
emb_feats,
|
||||
self.inputs,
|
||||
loss_confs=loss_confs,
|
||||
loss_boxes=loss_boxes)
|
||||
return jde_losses
|
||||
else:
|
||||
if self.metric == 'MOTDet':
|
||||
det_results = {
|
||||
'bbox': det_outs['bbox'],
|
||||
'bbox_num': det_outs['bbox_num'],
|
||||
}
|
||||
return det_results
|
||||
|
||||
elif self.metric == 'MOT':
|
||||
emb_feats = det_outs['emb_feats']
|
||||
bboxes = det_outs['bbox']
|
||||
boxes_idx = det_outs['boxes_idx']
|
||||
nms_keep_idx = det_outs['nms_keep_idx']
|
||||
|
||||
pred_dets, pred_embs = self.reid(
|
||||
emb_feats,
|
||||
self.inputs,
|
||||
bboxes=bboxes,
|
||||
boxes_idx=boxes_idx,
|
||||
nms_keep_idx=nms_keep_idx)
|
||||
return pred_dets, pred_embs
|
||||
|
||||
else:
|
||||
raise ValueError("Unknown metric {} for multi object tracking.".
|
||||
format(self.metric))
|
||||
|
||||
def get_loss(self):
|
||||
return self._forward()
|
||||
|
||||
def get_pred(self):
|
||||
return self._forward()
|
||||
@@ -0,0 +1,287 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
from collections import abc, defaultdict
|
||||
import numpy as np
|
||||
import paddle
|
||||
|
||||
from ppdet.core.workspace import register, create, serializable
|
||||
from .meta_arch import BaseArch
|
||||
from .. import layers as L
|
||||
from ..keypoint_utils import transpred
|
||||
|
||||
__all__ = ['HigherHRNet']
|
||||
|
||||
|
||||
@register
|
||||
class HigherHRNet(BaseArch):
|
||||
__category__ = 'architecture'
|
||||
|
||||
def __init__(self,
|
||||
backbone='HRNet',
|
||||
hrhrnet_head='HrHRNetHead',
|
||||
post_process='HrHRNetPostProcess',
|
||||
eval_flip=True,
|
||||
flip_perm=None,
|
||||
max_num_people=30):
|
||||
"""
|
||||
HigherHRNet network, see https://arxiv.org/abs/1908.10357;
|
||||
HigherHRNet+swahr, see https://arxiv.org/abs/2012.15175
|
||||
|
||||
Args:
|
||||
backbone (nn.Layer): backbone instance
|
||||
hrhrnet_head (nn.Layer): keypoint_head instance
|
||||
bbox_post_process (object): `BBoxPostProcess` instance
|
||||
"""
|
||||
super(HigherHRNet, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.hrhrnet_head = hrhrnet_head
|
||||
self.post_process = post_process
|
||||
self.flip = eval_flip
|
||||
self.flip_perm = paddle.to_tensor(flip_perm)
|
||||
self.deploy = False
|
||||
self.interpolate = L.Upsample(2, mode='bilinear')
|
||||
self.pool = L.MaxPool(5, 1, 2)
|
||||
self.max_num_people = max_num_people
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
# backbone
|
||||
backbone = create(cfg['backbone'])
|
||||
# head
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
hrhrnet_head = create(cfg['hrhrnet_head'], **kwargs)
|
||||
post_process = create(cfg['post_process'])
|
||||
|
||||
return {
|
||||
'backbone': backbone,
|
||||
"hrhrnet_head": hrhrnet_head,
|
||||
"post_process": post_process,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
if self.flip and not self.training and not self.deploy:
|
||||
self.inputs['image'] = paddle.concat(
|
||||
(self.inputs['image'], paddle.flip(self.inputs['image'], [3])))
|
||||
body_feats = self.backbone(self.inputs)
|
||||
|
||||
if self.training:
|
||||
return self.hrhrnet_head(body_feats, self.inputs)
|
||||
else:
|
||||
outputs = self.hrhrnet_head(body_feats)
|
||||
|
||||
if self.flip and not self.deploy:
|
||||
outputs = [paddle.split(o, 2) for o in outputs]
|
||||
output_rflip = [
|
||||
paddle.flip(paddle.gather(o[1], self.flip_perm, 1), [3])
|
||||
for o in outputs
|
||||
]
|
||||
output1 = [o[0] for o in outputs]
|
||||
heatmap = (output1[0] + output_rflip[0]) / 2.
|
||||
tagmaps = [output1[1], output_rflip[1]]
|
||||
outputs = [heatmap] + tagmaps
|
||||
outputs = self.get_topk(outputs)
|
||||
|
||||
if self.deploy:
|
||||
return outputs
|
||||
|
||||
res_lst = []
|
||||
h = self.inputs['im_shape'][0, 0].numpy().item()
|
||||
w = self.inputs['im_shape'][0, 1].numpy().item()
|
||||
kpts, scores = self.post_process(*outputs, h, w)
|
||||
res_lst.append([kpts, scores])
|
||||
return res_lst
|
||||
|
||||
def get_loss(self):
|
||||
return self._forward()
|
||||
|
||||
def get_pred(self):
|
||||
outputs = {}
|
||||
res_lst = self._forward()
|
||||
outputs['keypoint'] = res_lst
|
||||
return outputs
|
||||
|
||||
def get_topk(self, outputs):
|
||||
# resize to image size
|
||||
outputs = [self.interpolate(x) for x in outputs]
|
||||
if len(outputs) == 3:
|
||||
tagmap = paddle.concat(
|
||||
(outputs[1].unsqueeze(4), outputs[2].unsqueeze(4)), axis=4)
|
||||
else:
|
||||
tagmap = outputs[1].unsqueeze(4)
|
||||
|
||||
heatmap = outputs[0]
|
||||
N, J = 1, self.hrhrnet_head.num_joints
|
||||
heatmap_maxpool = self.pool(heatmap)
|
||||
# topk
|
||||
maxmap = heatmap * (heatmap == heatmap_maxpool)
|
||||
maxmap = maxmap.reshape([N, J, -1])
|
||||
heat_k, inds_k = maxmap.topk(self.max_num_people, axis=2)
|
||||
|
||||
outputs = [heatmap, tagmap, heat_k, inds_k]
|
||||
return outputs
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class HrHRNetPostProcess(object):
|
||||
'''
|
||||
HrHRNet postprocess contain:
|
||||
1) get topk keypoints in the output heatmap
|
||||
2) sample the tagmap's value corresponding to each of the topk coordinate
|
||||
3) match different joints to combine to some people with Hungary algorithm
|
||||
4) adjust the coordinate by +-0.25 to decrease error std
|
||||
5) salvage missing joints by check positivity of heatmap - tagdiff_norm
|
||||
Args:
|
||||
max_num_people (int): max number of people support in postprocess
|
||||
heat_thresh (float): value of topk below this threshhold will be ignored
|
||||
tag_thresh (float): coord's value sampled in tagmap below this threshold belong to same people for init
|
||||
|
||||
inputs(list[heatmap]): the output list of model, [heatmap, heatmap_maxpool, tagmap], heatmap_maxpool used to get topk
|
||||
original_height, original_width (float): the original image size
|
||||
'''
|
||||
|
||||
def __init__(self, max_num_people=30, heat_thresh=0.1, tag_thresh=1.):
|
||||
self.max_num_people = max_num_people
|
||||
self.heat_thresh = heat_thresh
|
||||
self.tag_thresh = tag_thresh
|
||||
|
||||
def lerp(self, j, y, x, heatmap):
|
||||
H, W = heatmap.shape[-2:]
|
||||
left = np.clip(x - 1, 0, W - 1)
|
||||
right = np.clip(x + 1, 0, W - 1)
|
||||
up = np.clip(y - 1, 0, H - 1)
|
||||
down = np.clip(y + 1, 0, H - 1)
|
||||
offset_y = np.where(heatmap[j, down, x] > heatmap[j, up, x], 0.25,
|
||||
-0.25)
|
||||
offset_x = np.where(heatmap[j, y, right] > heatmap[j, y, left], 0.25,
|
||||
-0.25)
|
||||
return offset_y + 0.5, offset_x + 0.5
|
||||
|
||||
def __call__(self, heatmap, tagmap, heat_k, inds_k, original_height,
|
||||
original_width):
|
||||
|
||||
N, J, H, W = heatmap.shape
|
||||
assert N == 1, "only support batch size 1"
|
||||
heatmap = heatmap[0].cpu().detach().numpy()
|
||||
tagmap = tagmap[0].cpu().detach().numpy()
|
||||
heats = heat_k[0].cpu().detach().numpy()
|
||||
inds_np = inds_k[0].cpu().detach().numpy()
|
||||
y = inds_np // W
|
||||
x = inds_np % W
|
||||
tags = tagmap[np.arange(J)[None, :].repeat(self.max_num_people),
|
||||
y.flatten(), x.flatten()].reshape(J, -1, tagmap.shape[-1])
|
||||
coords = np.stack((y, x), axis=2)
|
||||
# threshold
|
||||
mask = heats > self.heat_thresh
|
||||
# cluster
|
||||
cluster = defaultdict(lambda: {
|
||||
'coords': np.zeros((J, 2), dtype=np.float32),
|
||||
'scores': np.zeros(J, dtype=np.float32),
|
||||
'tags': []
|
||||
})
|
||||
for jid, m in enumerate(mask):
|
||||
num_valid = m.sum()
|
||||
if num_valid == 0:
|
||||
continue
|
||||
valid_inds = np.where(m)[0]
|
||||
valid_tags = tags[jid, m, :]
|
||||
if len(cluster) == 0: # initialize
|
||||
for i in valid_inds:
|
||||
tag = tags[jid, i]
|
||||
key = tag[0]
|
||||
cluster[key]['tags'].append(tag)
|
||||
cluster[key]['scores'][jid] = heats[jid, i]
|
||||
cluster[key]['coords'][jid] = coords[jid, i]
|
||||
continue
|
||||
candidates = list(cluster.keys())[:self.max_num_people]
|
||||
centroids = [
|
||||
np.mean(
|
||||
cluster[k]['tags'], axis=0) for k in candidates
|
||||
]
|
||||
num_clusters = len(centroids)
|
||||
# shape is (num_valid, num_clusters, tag_dim)
|
||||
dist = valid_tags[:, None, :] - np.array(centroids)[None, ...]
|
||||
l2_dist = np.linalg.norm(dist, ord=2, axis=2)
|
||||
# modulate dist with heat value, see `use_detection_val`
|
||||
cost = np.round(l2_dist) * 100 - heats[jid, m, None]
|
||||
# pad the cost matrix, otherwise new pose are ignored
|
||||
if num_valid > num_clusters:
|
||||
cost = np.pad(cost, ((0, 0), (0, num_valid - num_clusters)),
|
||||
'constant',
|
||||
constant_values=((0, 0), (0, 1e-10)))
|
||||
rows, cols = linear_sum_assignment(cost)
|
||||
for y, x in zip(rows, cols):
|
||||
tag = tags[jid, y]
|
||||
if y < num_valid and x < num_clusters and \
|
||||
l2_dist[y, x] < self.tag_thresh:
|
||||
key = candidates[x] # merge to cluster
|
||||
else:
|
||||
key = tag[0] # initialize new cluster
|
||||
cluster[key]['tags'].append(tag)
|
||||
cluster[key]['scores'][jid] = heats[jid, y]
|
||||
cluster[key]['coords'][jid] = coords[jid, y]
|
||||
|
||||
# shape is [k, J, 2] and [k, J]
|
||||
pose_tags = np.array([cluster[k]['tags'] for k in cluster])
|
||||
pose_coords = np.array([cluster[k]['coords'] for k in cluster])
|
||||
pose_scores = np.array([cluster[k]['scores'] for k in cluster])
|
||||
valid = pose_scores > 0
|
||||
|
||||
pose_kpts = np.zeros((pose_scores.shape[0], J, 3), dtype=np.float32)
|
||||
if valid.sum() == 0:
|
||||
return pose_kpts, pose_kpts
|
||||
|
||||
# refine coords
|
||||
valid_coords = pose_coords[valid].astype(np.int32)
|
||||
y = valid_coords[..., 0].flatten()
|
||||
x = valid_coords[..., 1].flatten()
|
||||
_, j = np.nonzero(valid)
|
||||
offsets = self.lerp(j, y, x, heatmap)
|
||||
pose_coords[valid, 0] += offsets[0]
|
||||
pose_coords[valid, 1] += offsets[1]
|
||||
|
||||
# mean score before salvage
|
||||
mean_score = pose_scores.mean(axis=1)
|
||||
pose_kpts[valid, 2] = pose_scores[valid]
|
||||
|
||||
# salvage missing joints
|
||||
if True:
|
||||
for pid, coords in enumerate(pose_coords):
|
||||
tag_mean = np.array(pose_tags[pid]).mean(axis=0)
|
||||
norm = np.sum((tagmap - tag_mean)**2, axis=3)**0.5
|
||||
score = heatmap - np.round(norm) # (J, H, W)
|
||||
flat_score = score.reshape(J, -1)
|
||||
max_inds = np.argmax(flat_score, axis=1)
|
||||
max_scores = np.max(flat_score, axis=1)
|
||||
salvage_joints = (pose_scores[pid] == 0) & (max_scores > 0)
|
||||
if salvage_joints.sum() == 0:
|
||||
continue
|
||||
y = max_inds[salvage_joints] // W
|
||||
x = max_inds[salvage_joints] % W
|
||||
offsets = self.lerp(salvage_joints.nonzero()[0], y, x, heatmap)
|
||||
y = y.astype(np.float32) + offsets[0]
|
||||
x = x.astype(np.float32) + offsets[1]
|
||||
pose_coords[pid][salvage_joints, 0] = y
|
||||
pose_coords[pid][salvage_joints, 1] = x
|
||||
pose_kpts[pid][salvage_joints, 2] = max_scores[salvage_joints]
|
||||
pose_kpts[..., :2] = transpred(pose_coords[..., :2][..., ::-1],
|
||||
original_height, original_width,
|
||||
min(H, W))
|
||||
return pose_kpts, mean_score
|
||||
468
paddle_detection/ppdet/modeling/architectures/keypoint_hrnet.py
Normal file
468
paddle_detection/ppdet/modeling/architectures/keypoint_hrnet.py
Normal file
@@ -0,0 +1,468 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import numpy as np
|
||||
import math
|
||||
import cv2
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
from ..keypoint_utils import transform_preds
|
||||
from .. import layers as L
|
||||
from paddle.nn import functional as F
|
||||
|
||||
__all__ = ['TopDownHRNet', 'TinyPose3DHRNet', 'TinyPose3DHRHeatmapNet']
|
||||
|
||||
|
||||
@register
|
||||
class TopDownHRNet(BaseArch):
|
||||
__category__ = 'architecture'
|
||||
__inject__ = ['loss']
|
||||
|
||||
def __init__(self,
|
||||
width,
|
||||
num_joints,
|
||||
backbone='HRNet',
|
||||
loss='KeyPointMSELoss',
|
||||
post_process='HRNetPostProcess',
|
||||
flip_perm=None,
|
||||
flip=True,
|
||||
shift_heatmap=True,
|
||||
use_dark=True):
|
||||
"""
|
||||
HRNet network, see https://arxiv.org/abs/1902.09212
|
||||
|
||||
Args:
|
||||
backbone (nn.Layer): backbone instance
|
||||
post_process (object): `HRNetPostProcess` instance
|
||||
flip_perm (list): The left-right joints exchange order list
|
||||
use_dark(bool): Whether to use DARK in post processing
|
||||
"""
|
||||
super(TopDownHRNet, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.post_process = HRNetPostProcess(use_dark)
|
||||
self.loss = loss
|
||||
self.flip_perm = flip_perm
|
||||
self.flip = flip
|
||||
self.final_conv = L.Conv2d(width, num_joints, 1, 1, 0, bias=True)
|
||||
self.shift_heatmap = shift_heatmap
|
||||
self.deploy = False
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
# backbone
|
||||
backbone = create(cfg['backbone'])
|
||||
|
||||
return {'backbone': backbone, }
|
||||
|
||||
def _forward(self):
|
||||
feats = self.backbone(self.inputs)
|
||||
hrnet_outputs = self.final_conv(feats[0])
|
||||
|
||||
if self.training:
|
||||
return self.loss(hrnet_outputs, self.inputs)
|
||||
elif self.deploy:
|
||||
outshape = hrnet_outputs.shape
|
||||
max_idx = paddle.argmax(
|
||||
hrnet_outputs.reshape(
|
||||
(outshape[0], outshape[1], outshape[2] * outshape[3])),
|
||||
axis=-1)
|
||||
return hrnet_outputs, max_idx
|
||||
else:
|
||||
if self.flip:
|
||||
self.inputs['image'] = self.inputs['image'].flip([3])
|
||||
feats = self.backbone(self.inputs)
|
||||
output_flipped = self.final_conv(feats[0])
|
||||
output_flipped = self.flip_back(output_flipped.numpy(),
|
||||
self.flip_perm)
|
||||
output_flipped = paddle.to_tensor(output_flipped.copy())
|
||||
if self.shift_heatmap:
|
||||
output_flipped[:, :, :, 1:] = output_flipped.clone(
|
||||
)[:, :, :, 0:-1]
|
||||
hrnet_outputs = (hrnet_outputs + output_flipped) * 0.5
|
||||
imshape = (self.inputs['im_shape'].numpy()
|
||||
)[:, ::-1] if 'im_shape' in self.inputs else None
|
||||
center = self.inputs['center'].numpy(
|
||||
) if 'center' in self.inputs else np.round(imshape / 2.)
|
||||
scale = self.inputs['scale'].numpy(
|
||||
) if 'scale' in self.inputs else imshape / 200.
|
||||
outputs = self.post_process(hrnet_outputs, center, scale)
|
||||
return outputs
|
||||
|
||||
def get_loss(self):
|
||||
return self._forward()
|
||||
|
||||
def get_pred(self):
|
||||
res_lst = self._forward()
|
||||
outputs = {'keypoint': res_lst}
|
||||
return outputs
|
||||
|
||||
def flip_back(self, output_flipped, matched_parts):
|
||||
assert output_flipped.ndim == 4,\
|
||||
'output_flipped should be [batch_size, num_joints, height, width]'
|
||||
|
||||
output_flipped = output_flipped[:, :, :, ::-1]
|
||||
|
||||
for pair in matched_parts:
|
||||
tmp = output_flipped[:, pair[0], :, :].copy()
|
||||
output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
|
||||
output_flipped[:, pair[1], :, :] = tmp
|
||||
|
||||
return output_flipped
|
||||
|
||||
|
||||
class HRNetPostProcess(object):
|
||||
def __init__(self, use_dark=True):
|
||||
self.use_dark = use_dark
|
||||
|
||||
def get_max_preds(self, heatmaps):
|
||||
'''get predictions from score maps
|
||||
|
||||
Args:
|
||||
heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
|
||||
|
||||
Returns:
|
||||
preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
|
||||
maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints
|
||||
'''
|
||||
assert isinstance(heatmaps,
|
||||
np.ndarray), 'heatmaps should be numpy.ndarray'
|
||||
assert heatmaps.ndim == 4, 'batch_images should be 4-ndim'
|
||||
|
||||
batch_size = heatmaps.shape[0]
|
||||
num_joints = heatmaps.shape[1]
|
||||
width = heatmaps.shape[3]
|
||||
heatmaps_reshaped = heatmaps.reshape((batch_size, num_joints, -1))
|
||||
idx = np.argmax(heatmaps_reshaped, 2)
|
||||
maxvals = np.amax(heatmaps_reshaped, 2)
|
||||
|
||||
maxvals = maxvals.reshape((batch_size, num_joints, 1))
|
||||
idx = idx.reshape((batch_size, num_joints, 1))
|
||||
|
||||
preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
|
||||
|
||||
preds[:, :, 0] = (preds[:, :, 0]) % width
|
||||
preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
|
||||
|
||||
pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
|
||||
pred_mask = pred_mask.astype(np.float32)
|
||||
|
||||
preds *= pred_mask
|
||||
|
||||
return preds, maxvals
|
||||
|
||||
def gaussian_blur(self, heatmap, kernel):
|
||||
border = (kernel - 1) // 2
|
||||
batch_size = heatmap.shape[0]
|
||||
num_joints = heatmap.shape[1]
|
||||
height = heatmap.shape[2]
|
||||
width = heatmap.shape[3]
|
||||
for i in range(batch_size):
|
||||
for j in range(num_joints):
|
||||
origin_max = np.max(heatmap[i, j])
|
||||
dr = np.zeros((height + 2 * border, width + 2 * border))
|
||||
dr[border:-border, border:-border] = heatmap[i, j].copy()
|
||||
dr = cv2.GaussianBlur(dr, (kernel, kernel), 0)
|
||||
heatmap[i, j] = dr[border:-border, border:-border].copy()
|
||||
heatmap[i, j] *= origin_max / np.max(heatmap[i, j])
|
||||
return heatmap
|
||||
|
||||
def dark_parse(self, hm, coord):
|
||||
heatmap_height = hm.shape[0]
|
||||
heatmap_width = hm.shape[1]
|
||||
px = int(coord[0])
|
||||
py = int(coord[1])
|
||||
if 1 < px < heatmap_width - 2 and 1 < py < heatmap_height - 2:
|
||||
dx = 0.5 * (hm[py][px + 1] - hm[py][px - 1])
|
||||
dy = 0.5 * (hm[py + 1][px] - hm[py - 1][px])
|
||||
dxx = 0.25 * (hm[py][px + 2] - 2 * hm[py][px] + hm[py][px - 2])
|
||||
dxy = 0.25 * (hm[py+1][px+1] - hm[py-1][px+1] - hm[py+1][px-1] \
|
||||
+ hm[py-1][px-1])
|
||||
dyy = 0.25 * (
|
||||
hm[py + 2 * 1][px] - 2 * hm[py][px] + hm[py - 2 * 1][px])
|
||||
derivative = np.matrix([[dx], [dy]])
|
||||
hessian = np.matrix([[dxx, dxy], [dxy, dyy]])
|
||||
if dxx * dyy - dxy**2 != 0:
|
||||
hessianinv = hessian.I
|
||||
offset = -hessianinv * derivative
|
||||
offset = np.squeeze(np.array(offset.T), axis=0)
|
||||
coord += offset
|
||||
return coord
|
||||
|
||||
def dark_postprocess(self, hm, coords, kernelsize):
|
||||
'''DARK postpocessing, Zhang et al. Distribution-Aware Coordinate
|
||||
Representation for Human Pose Estimation (CVPR 2020).
|
||||
'''
|
||||
|
||||
hm = self.gaussian_blur(hm, kernelsize)
|
||||
hm = np.maximum(hm, 1e-10)
|
||||
hm = np.log(hm)
|
||||
for n in range(coords.shape[0]):
|
||||
for p in range(coords.shape[1]):
|
||||
coords[n, p] = self.dark_parse(hm[n][p], coords[n][p])
|
||||
return coords
|
||||
|
||||
def get_final_preds(self, heatmaps, center, scale, kernelsize=3):
|
||||
"""the highest heatvalue location with a quarter offset in the
|
||||
direction from the highest response to the second highest response.
|
||||
|
||||
Args:
|
||||
heatmaps (numpy.ndarray): The predicted heatmaps
|
||||
center (numpy.ndarray): The boxes center
|
||||
scale (numpy.ndarray): The scale factor
|
||||
|
||||
Returns:
|
||||
preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
|
||||
maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints
|
||||
"""
|
||||
coords, maxvals = self.get_max_preds(heatmaps)
|
||||
|
||||
heatmap_height = heatmaps.shape[2]
|
||||
heatmap_width = heatmaps.shape[3]
|
||||
|
||||
if self.use_dark:
|
||||
coords = self.dark_postprocess(heatmaps, coords, kernelsize)
|
||||
else:
|
||||
for n in range(coords.shape[0]):
|
||||
for p in range(coords.shape[1]):
|
||||
hm = heatmaps[n][p]
|
||||
px = int(math.floor(coords[n][p][0] + 0.5))
|
||||
py = int(math.floor(coords[n][p][1] + 0.5))
|
||||
if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1:
|
||||
diff = np.array([
|
||||
hm[py][px + 1] - hm[py][px - 1],
|
||||
hm[py + 1][px] - hm[py - 1][px]
|
||||
])
|
||||
coords[n][p] += np.sign(diff) * .25
|
||||
preds = coords.copy()
|
||||
|
||||
# Transform back
|
||||
for i in range(coords.shape[0]):
|
||||
preds[i] = transform_preds(coords[i], center[i], scale[i],
|
||||
[heatmap_width, heatmap_height])
|
||||
|
||||
return preds, maxvals
|
||||
|
||||
def __call__(self, output, center, scale):
|
||||
preds, maxvals = self.get_final_preds(output.numpy(), center, scale)
|
||||
outputs = [[
|
||||
np.concatenate(
|
||||
(preds, maxvals), axis=-1), np.mean(
|
||||
maxvals, axis=1)
|
||||
]]
|
||||
return outputs
|
||||
|
||||
|
||||
class TinyPose3DPostProcess(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def __call__(self, output, center, scale):
|
||||
"""
|
||||
Args:
|
||||
output (numpy.ndarray): numpy.ndarray([batch_size, num_joints, 3]), keypoints coords
|
||||
scale (numpy.ndarray): The scale factor
|
||||
Returns:
|
||||
preds: numpy.ndarray([batch_size, num_joints, 3]), keypoints coords
|
||||
"""
|
||||
|
||||
preds = output.numpy().copy()
|
||||
|
||||
# Transform back
|
||||
for i in range(output.shape[0]): # batch_size
|
||||
preds[i][:, 0] = preds[i][:, 0] * scale[i][0]
|
||||
preds[i][:, 1] = preds[i][:, 1] * scale[i][1]
|
||||
|
||||
return preds
|
||||
|
||||
|
||||
def soft_argmax(heatmaps, joint_num):
|
||||
dims = heatmaps.shape
|
||||
depth_dim = (int)(dims[1] / joint_num)
|
||||
heatmaps = heatmaps.reshape((-1, joint_num, depth_dim * dims[2] * dims[3]))
|
||||
heatmaps = F.softmax(heatmaps, 2)
|
||||
heatmaps = heatmaps.reshape((-1, joint_num, depth_dim, dims[2], dims[3]))
|
||||
|
||||
accu_x = heatmaps.sum(axis=(2, 3))
|
||||
accu_y = heatmaps.sum(axis=(2, 4))
|
||||
accu_z = heatmaps.sum(axis=(3, 4))
|
||||
|
||||
accu_x = accu_x * paddle.arange(1, 33)
|
||||
accu_y = accu_y * paddle.arange(1, 33)
|
||||
accu_z = accu_z * paddle.arange(1, 33)
|
||||
|
||||
accu_x = accu_x.sum(axis=2, keepdim=True) - 1
|
||||
accu_y = accu_y.sum(axis=2, keepdim=True) - 1
|
||||
accu_z = accu_z.sum(axis=2, keepdim=True) - 1
|
||||
|
||||
coord_out = paddle.concat(
|
||||
(accu_x, accu_y, accu_z), axis=2) # [batch_size, joint_num, 3]
|
||||
|
||||
return coord_out
|
||||
|
||||
|
||||
@register
|
||||
class TinyPose3DHRHeatmapNet(BaseArch):
|
||||
__category__ = 'architecture'
|
||||
__inject__ = ['loss']
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
width, # 40, backbone输出的channel数目
|
||||
num_joints,
|
||||
backbone='HRNet',
|
||||
loss='KeyPointRegressionMSELoss',
|
||||
post_process=TinyPose3DPostProcess):
|
||||
"""
|
||||
Args:
|
||||
backbone (nn.Layer): backbone instance
|
||||
post_process (object): post process instance
|
||||
"""
|
||||
super(TinyPose3DHRHeatmapNet, self).__init__()
|
||||
|
||||
self.backbone = backbone
|
||||
self.post_process = TinyPose3DPostProcess()
|
||||
self.loss = loss
|
||||
self.deploy = False
|
||||
self.num_joints = num_joints
|
||||
|
||||
self.final_conv = L.Conv2d(width, num_joints * 32, 1, 1, 0, bias=True)
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
# backbone
|
||||
backbone = create(cfg['backbone'])
|
||||
|
||||
return {'backbone': backbone, }
|
||||
|
||||
def _forward(self):
|
||||
feats = self.backbone(self.inputs) # feats:[[batch_size, 40, 32, 24]]
|
||||
|
||||
hrnet_outputs = self.final_conv(feats[0])
|
||||
res = soft_argmax(hrnet_outputs, self.num_joints)
|
||||
return res
|
||||
|
||||
def get_loss(self):
|
||||
pose3d = self._forward()
|
||||
loss = self.loss(pose3d, None, self.inputs)
|
||||
outputs = {'loss': loss}
|
||||
return outputs
|
||||
|
||||
def get_pred(self):
|
||||
res_lst = self._forward()
|
||||
outputs = {'pose3d': res_lst}
|
||||
return outputs
|
||||
|
||||
def flip_back(self, output_flipped, matched_parts):
|
||||
assert output_flipped.ndim == 4,\
|
||||
'output_flipped should be [batch_size, num_joints, height, width]'
|
||||
|
||||
output_flipped = output_flipped[:, :, :, ::-1]
|
||||
|
||||
for pair in matched_parts:
|
||||
tmp = output_flipped[:, pair[0], :, :].copy()
|
||||
output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
|
||||
output_flipped[:, pair[1], :, :] = tmp
|
||||
|
||||
return output_flipped
|
||||
|
||||
|
||||
@register
|
||||
class TinyPose3DHRNet(BaseArch):
|
||||
__category__ = 'architecture'
|
||||
__inject__ = ['loss']
|
||||
|
||||
def __init__(self,
|
||||
width,
|
||||
num_joints,
|
||||
fc_channel=768,
|
||||
backbone='HRNet',
|
||||
loss='KeyPointRegressionMSELoss',
|
||||
post_process=TinyPose3DPostProcess):
|
||||
"""
|
||||
Args:
|
||||
backbone (nn.Layer): backbone instance
|
||||
post_process (object): post process instance
|
||||
"""
|
||||
super(TinyPose3DHRNet, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.post_process = TinyPose3DPostProcess()
|
||||
self.loss = loss
|
||||
self.deploy = False
|
||||
self.num_joints = num_joints
|
||||
|
||||
self.final_conv = L.Conv2d(width, num_joints, 1, 1, 0, bias=True)
|
||||
|
||||
self.flatten = paddle.nn.Flatten(start_axis=2, stop_axis=3)
|
||||
self.fc1 = paddle.nn.Linear(fc_channel, 256)
|
||||
self.act1 = paddle.nn.ReLU()
|
||||
self.fc2 = paddle.nn.Linear(256, 64)
|
||||
self.act2 = paddle.nn.ReLU()
|
||||
self.fc3 = paddle.nn.Linear(64, 3)
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
# backbone
|
||||
backbone = create(cfg['backbone'])
|
||||
|
||||
return {'backbone': backbone, }
|
||||
|
||||
def _forward(self):
|
||||
'''
|
||||
self.inputs is a dict
|
||||
'''
|
||||
feats = self.backbone(
|
||||
self.inputs) # feats:[[batch_size, 40, width/4, height/4]]
|
||||
|
||||
hrnet_outputs = self.final_conv(
|
||||
feats[0]) # hrnet_outputs: [batch_size, num_joints*32,32,32]
|
||||
|
||||
flatten_res = self.flatten(
|
||||
hrnet_outputs) # [batch_size,num_joints*32,32*32]
|
||||
|
||||
res = self.fc1(flatten_res)
|
||||
res = self.act1(res)
|
||||
res = self.fc2(res)
|
||||
res = self.act2(res)
|
||||
res = self.fc3(res)
|
||||
|
||||
if self.training:
|
||||
return self.loss(res, self.inputs)
|
||||
else: # export model need
|
||||
return res
|
||||
|
||||
def get_loss(self):
|
||||
return self._forward()
|
||||
|
||||
def get_pred(self):
|
||||
res_lst = self._forward()
|
||||
outputs = {'pose3d': res_lst}
|
||||
return outputs
|
||||
|
||||
def flip_back(self, output_flipped, matched_parts):
|
||||
assert output_flipped.ndim == 4,\
|
||||
'output_flipped should be [batch_size, num_joints, height, width]'
|
||||
|
||||
output_flipped = output_flipped[:, :, :, ::-1]
|
||||
|
||||
for pair in matched_parts:
|
||||
tmp = output_flipped[:, pair[0], :, :].copy()
|
||||
output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
|
||||
output_flipped[:, pair[1], :, :] = tmp
|
||||
|
||||
return output_flipped
|
||||
217
paddle_detection/ppdet/modeling/architectures/keypoint_petr.py
Normal file
217
paddle_detection/ppdet/modeling/architectures/keypoint_petr.py
Normal file
@@ -0,0 +1,217 @@
|
||||
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
this code is base on https://github.com/hikvision-research/opera/blob/main/opera/models/detectors/petr.py
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from ppdet.core.workspace import register
|
||||
from .meta_arch import BaseArch
|
||||
from .. import layers as L
|
||||
|
||||
__all__ = ['PETR']
|
||||
|
||||
|
||||
@register
|
||||
class PETR(BaseArch):
|
||||
__category__ = 'architecture'
|
||||
__inject__ = ['backbone', 'neck', 'bbox_head']
|
||||
|
||||
def __init__(self,
|
||||
backbone='ResNet',
|
||||
neck='ChannelMapper',
|
||||
bbox_head='PETRHead'):
|
||||
"""
|
||||
PETR, see https://openaccess.thecvf.com/content/CVPR2022/papers/Shi_End-to-End_Multi-Person_Pose_Estimation_With_Transformers_CVPR_2022_paper.pdf
|
||||
|
||||
Args:
|
||||
backbone (nn.Layer): backbone instance
|
||||
neck (nn.Layer): neck between backbone and head
|
||||
bbox_head (nn.Layer): model output and loss
|
||||
"""
|
||||
super(PETR, self).__init__()
|
||||
self.backbone = backbone
|
||||
if neck is not None:
|
||||
self.with_neck = True
|
||||
self.neck = neck
|
||||
self.bbox_head = bbox_head
|
||||
self.deploy = False
|
||||
|
||||
def extract_feat(self, img):
|
||||
"""Directly extract features from the backbone+neck."""
|
||||
x = self.backbone(img)
|
||||
if self.with_neck:
|
||||
x = self.neck(x)
|
||||
return x
|
||||
|
||||
def get_inputs(self):
|
||||
img_metas = []
|
||||
gt_bboxes = []
|
||||
gt_labels = []
|
||||
gt_keypoints = []
|
||||
gt_areas = []
|
||||
pad_gt_mask = self.inputs['pad_gt_mask'].astype("bool").squeeze(-1)
|
||||
for idx, im_shape in enumerate(self.inputs['im_shape']):
|
||||
img_meta = {
|
||||
'img_shape': im_shape.astype("int32").tolist() + [1, ],
|
||||
'batch_input_shape': self.inputs['image'].shape[-2:],
|
||||
'image_name': self.inputs['image_file'][idx]
|
||||
}
|
||||
img_metas.append(img_meta)
|
||||
if (not pad_gt_mask[idx].any()):
|
||||
gt_keypoints.append(self.inputs['gt_joints'][idx][:1])
|
||||
gt_labels.append(self.inputs['gt_class'][idx][:1])
|
||||
gt_bboxes.append(self.inputs['gt_bbox'][idx][:1])
|
||||
gt_areas.append(self.inputs['gt_areas'][idx][:1])
|
||||
continue
|
||||
|
||||
gt_keypoints.append(self.inputs['gt_joints'][idx][pad_gt_mask[idx]])
|
||||
gt_labels.append(self.inputs['gt_class'][idx][pad_gt_mask[idx]])
|
||||
gt_bboxes.append(self.inputs['gt_bbox'][idx][pad_gt_mask[idx]])
|
||||
gt_areas.append(self.inputs['gt_areas'][idx][pad_gt_mask[idx]])
|
||||
|
||||
return img_metas, gt_bboxes, gt_labels, gt_keypoints, gt_areas
|
||||
|
||||
def get_loss(self):
|
||||
"""
|
||||
Args:
|
||||
img (Tensor): Input images of shape (N, C, H, W).
|
||||
Typically these should be mean centered and std scaled.
|
||||
img_metas (list[dict]): A List of image info dict where each dict
|
||||
has: 'img_shape', 'scale_factor', 'flip', and may also contain
|
||||
'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
|
||||
For details on the values of these keys see
|
||||
:class:`mmdet.datasets.pipelines.Collect`.
|
||||
gt_bboxes (list[Tensor]): Each item are the truth boxes for each
|
||||
image in [tl_x, tl_y, br_x, br_y] format.
|
||||
gt_labels (list[Tensor]): Class indices corresponding to each box.
|
||||
gt_keypoints (list[Tensor]): Each item are the truth keypoints for
|
||||
each image in [p^{1}_x, p^{1}_y, p^{1}_v, ..., p^{K}_x,
|
||||
p^{K}_y, p^{K}_v] format.
|
||||
gt_areas (list[Tensor]): mask areas corresponding to each box.
|
||||
gt_bboxes_ignore (None | list[Tensor]): Specify which bounding
|
||||
boxes can be ignored when computing the loss.
|
||||
|
||||
Returns:
|
||||
dict[str, Tensor]: A dictionary of loss components.
|
||||
"""
|
||||
|
||||
img_metas, gt_bboxes, gt_labels, gt_keypoints, gt_areas = self.get_inputs(
|
||||
)
|
||||
gt_bboxes_ignore = getattr(self.inputs, 'gt_bboxes_ignore', None)
|
||||
|
||||
x = self.extract_feat(self.inputs)
|
||||
losses = self.bbox_head.forward_train(x, img_metas, gt_bboxes,
|
||||
gt_labels, gt_keypoints, gt_areas,
|
||||
gt_bboxes_ignore)
|
||||
loss = 0
|
||||
for k, v in losses.items():
|
||||
loss += v
|
||||
losses['loss'] = loss
|
||||
|
||||
return losses
|
||||
|
||||
def get_pred_numpy(self):
|
||||
"""Used for computing network flops.
|
||||
"""
|
||||
|
||||
img = self.inputs['image']
|
||||
batch_size, _, height, width = img.shape
|
||||
dummy_img_metas = [
|
||||
dict(
|
||||
batch_input_shape=(height, width),
|
||||
img_shape=(height, width, 3),
|
||||
scale_factor=(1., 1., 1., 1.)) for _ in range(batch_size)
|
||||
]
|
||||
x = self.extract_feat(img)
|
||||
outs = self.bbox_head(x, img_metas=dummy_img_metas)
|
||||
bbox_list = self.bbox_head.get_bboxes(
|
||||
*outs, dummy_img_metas, rescale=True)
|
||||
return bbox_list
|
||||
|
||||
def get_pred(self):
|
||||
"""
|
||||
"""
|
||||
img = self.inputs['image']
|
||||
batch_size, _, height, width = img.shape
|
||||
img_metas = [
|
||||
dict(
|
||||
batch_input_shape=(height, width),
|
||||
img_shape=(height, width, 3),
|
||||
scale_factor=self.inputs['scale_factor'][i])
|
||||
for i in range(batch_size)
|
||||
]
|
||||
kptpred = self.simple_test(
|
||||
self.inputs, img_metas=img_metas, rescale=True)
|
||||
keypoints = kptpred[0][1][0]
|
||||
bboxs = kptpred[0][0][0]
|
||||
keypoints[..., 2] = bboxs[:, None, 4]
|
||||
res_lst = [[keypoints, bboxs[:, 4]]]
|
||||
outputs = {'keypoint': res_lst}
|
||||
return outputs
|
||||
|
||||
def simple_test(self, inputs, img_metas, rescale=False):
|
||||
"""Test function without test time augmentation.
|
||||
|
||||
Args:
|
||||
inputs (list[paddle.Tensor]): List of multiple images.
|
||||
img_metas (list[dict]): List of image information.
|
||||
rescale (bool, optional): Whether to rescale the results.
|
||||
Defaults to False.
|
||||
|
||||
Returns:
|
||||
list[list[np.ndarray]]: BBox and keypoint results of each image
|
||||
and classes. The outer list corresponds to each image.
|
||||
The inner list corresponds to each class.
|
||||
"""
|
||||
batch_size = len(img_metas)
|
||||
assert batch_size == 1, 'Currently only batch_size 1 for inference ' \
|
||||
f'mode is supported. Found batch_size {batch_size}.'
|
||||
feat = self.extract_feat(inputs)
|
||||
results_list = self.bbox_head.simple_test(
|
||||
feat, img_metas, rescale=rescale)
|
||||
|
||||
bbox_kpt_results = [
|
||||
self.bbox_kpt2result(det_bboxes, det_labels, det_kpts,
|
||||
self.bbox_head.num_classes)
|
||||
for det_bboxes, det_labels, det_kpts in results_list
|
||||
]
|
||||
return bbox_kpt_results
|
||||
|
||||
def bbox_kpt2result(self, bboxes, labels, kpts, num_classes):
|
||||
"""Convert detection results to a list of numpy arrays.
|
||||
|
||||
Args:
|
||||
bboxes (paddle.Tensor | np.ndarray): shape (n, 5).
|
||||
labels (paddle.Tensor | np.ndarray): shape (n, ).
|
||||
kpts (paddle.Tensor | np.ndarray): shape (n, K, 3).
|
||||
num_classes (int): class number, including background class.
|
||||
|
||||
Returns:
|
||||
list(ndarray): bbox and keypoint results of each class.
|
||||
"""
|
||||
if bboxes.shape[0] == 0:
|
||||
return [np.zeros((0, 5), dtype=np.float32) for i in range(num_classes)], \
|
||||
[np.zeros((0, kpts.size(1), 3), dtype=np.float32)
|
||||
for i in range(num_classes)]
|
||||
else:
|
||||
if isinstance(bboxes, paddle.Tensor):
|
||||
bboxes = bboxes.numpy()
|
||||
labels = labels.numpy()
|
||||
kpts = kpts.numpy()
|
||||
return [bboxes[labels == i, :] for i in range(num_classes)], \
|
||||
[kpts[labels == i, :, :] for i in range(num_classes)]
|
||||
@@ -0,0 +1,317 @@
|
||||
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import numpy as np
|
||||
import math
|
||||
import cv2
|
||||
from ppdet.core.workspace import register, create, serializable
|
||||
from .meta_arch import BaseArch
|
||||
from ..keypoint_utils import transform_preds
|
||||
from .. import layers as L
|
||||
|
||||
__all__ = ['VitPose_TopDown', 'VitPosePostProcess']
|
||||
|
||||
|
||||
@register
|
||||
class VitPose_TopDown(BaseArch):
|
||||
__category__ = 'architecture'
|
||||
__inject__ = ['loss']
|
||||
|
||||
def __init__(self, backbone, head, loss, post_process, flip_test):
|
||||
"""
|
||||
VitPose network, see https://arxiv.org/pdf/2204.12484v2.pdf
|
||||
|
||||
Args:
|
||||
backbone (nn.Layer): backbone instance
|
||||
post_process (object): `HRNetPostProcess` instance
|
||||
|
||||
"""
|
||||
super(VitPose_TopDown, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.head = head
|
||||
self.loss = loss
|
||||
self.post_process = post_process
|
||||
self.flip_test = flip_test
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
# backbone
|
||||
backbone = create(cfg['backbone'])
|
||||
#head
|
||||
head = create(cfg['head'])
|
||||
#post_process
|
||||
post_process = create(cfg['post_process'])
|
||||
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'head': head,
|
||||
'post_process': post_process
|
||||
}
|
||||
|
||||
def _forward_train(self):
|
||||
|
||||
feats = self.backbone.forward_features(self.inputs['image'])
|
||||
vitpost_output = self.head(feats)
|
||||
return self.loss(vitpost_output, self.inputs)
|
||||
|
||||
def _forward_test(self):
|
||||
|
||||
feats = self.backbone.forward_features(self.inputs['image'])
|
||||
output_heatmap = self.head(feats)
|
||||
|
||||
if self.flip_test:
|
||||
img_flipped = self.inputs['image'].flip(3)
|
||||
features_flipped = self.backbone.forward_features(img_flipped)
|
||||
output_flipped_heatmap = self.head.inference_model(features_flipped,
|
||||
self.flip_test)
|
||||
|
||||
output_heatmap = (output_heatmap + output_flipped_heatmap) * 0.5
|
||||
|
||||
imshape = (self.inputs['im_shape'].numpy()
|
||||
)[:, ::-1] if 'im_shape' in self.inputs else None
|
||||
center = self.inputs['center'].numpy(
|
||||
) if 'center' in self.inputs else np.round(imshape / 2.)
|
||||
scale = self.inputs['scale'].numpy(
|
||||
) if 'scale' in self.inputs else imshape / 200.
|
||||
|
||||
result = self.post_process(output_heatmap.cpu().numpy(), center, scale)
|
||||
|
||||
return result
|
||||
|
||||
def get_loss(self):
|
||||
return self._forward_train()
|
||||
|
||||
def get_pred(self):
|
||||
res_lst = self._forward_test()
|
||||
outputs = {'keypoint': res_lst}
|
||||
return outputs
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class VitPosePostProcess(object):
|
||||
def __init__(self, use_dark=False):
|
||||
self.use_dark = use_dark
|
||||
|
||||
def get_max_preds(self, heatmaps):
|
||||
'''get predictions from score maps
|
||||
|
||||
Args:
|
||||
heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
|
||||
|
||||
Returns:
|
||||
preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
|
||||
maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints
|
||||
'''
|
||||
assert isinstance(heatmaps,
|
||||
np.ndarray), 'heatmaps should be numpy.ndarray'
|
||||
assert heatmaps.ndim == 4, 'batch_images should be 4-ndim'
|
||||
|
||||
batch_size = heatmaps.shape[0]
|
||||
num_joints = heatmaps.shape[1]
|
||||
width = heatmaps.shape[3]
|
||||
heatmaps_reshaped = heatmaps.reshape((batch_size, num_joints, -1))
|
||||
idx = np.argmax(heatmaps_reshaped, 2)
|
||||
maxvals = np.amax(heatmaps_reshaped, 2)
|
||||
|
||||
maxvals = maxvals.reshape((batch_size, num_joints, 1))
|
||||
idx = idx.reshape((batch_size, num_joints, 1))
|
||||
|
||||
preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
|
||||
|
||||
preds[:, :, 0] = (preds[:, :, 0]) % width
|
||||
preds[:, :, 1] = np.floor((preds[:, :, 1]) // width)
|
||||
|
||||
pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
|
||||
pred_mask = pred_mask.astype(np.float32)
|
||||
|
||||
preds *= pred_mask
|
||||
|
||||
return preds, maxvals
|
||||
|
||||
def post_datk_udp(self, coords, batch_heatmaps, kernel=3):
|
||||
"""DARK post-pocessing. Implemented by udp. Paper ref: Huang et al. The
|
||||
Devil is in the Details: Delving into Unbiased Data Processing for Human
|
||||
Pose Estimation (CVPR 2020). Zhang et al. Distribution-Aware Coordinate
|
||||
Representation for Human Pose Estimation (CVPR 2020).
|
||||
|
||||
Note:
|
||||
- batch size: B
|
||||
- num keypoints: K
|
||||
- num persons: N
|
||||
- height of heatmaps: H
|
||||
- width of heatmaps: W
|
||||
|
||||
B=1 for bottom_up paradigm where all persons share the same heatmap.
|
||||
B=N for top_down paradigm where each person has its own heatmaps.
|
||||
|
||||
Args:
|
||||
coords (np.ndarray[N, K, 2]): Initial coordinates of human pose.
|
||||
batch_heatmaps (np.ndarray[B, K, H, W]): batch_heatmaps
|
||||
kernel (int): Gaussian kernel size (K) for modulation.
|
||||
|
||||
Returns:
|
||||
np.ndarray([N, K, 2]): Refined coordinates.
|
||||
"""
|
||||
if not isinstance(batch_heatmaps, np.ndarray):
|
||||
batch_heatmaps = batch_heatmaps.cpu().numpy()
|
||||
B, K, H, W = batch_heatmaps.shape
|
||||
N = coords.shape[0]
|
||||
assert (B == 1 or B == N)
|
||||
for heatmaps in batch_heatmaps:
|
||||
for heatmap in heatmaps:
|
||||
cv2.GaussianBlur(heatmap, (kernel, kernel), 0, heatmap)
|
||||
np.clip(batch_heatmaps, 0.001, 50, batch_heatmaps)
|
||||
np.log(batch_heatmaps, batch_heatmaps)
|
||||
|
||||
batch_heatmaps_pad = np.pad(batch_heatmaps, ((0, 0), (0, 0), (1, 1),
|
||||
(1, 1)),
|
||||
mode='edge').flatten()
|
||||
|
||||
index = coords[..., 0] + 1 + (coords[..., 1] + 1) * (W + 2)
|
||||
index += (W + 2) * (H + 2) * np.arange(0, B * K).reshape(-1, K)
|
||||
index = index.astype(int).reshape(-1, 1)
|
||||
i_ = batch_heatmaps_pad[index]
|
||||
ix1 = batch_heatmaps_pad[index + 1]
|
||||
iy1 = batch_heatmaps_pad[index + W + 2]
|
||||
ix1y1 = batch_heatmaps_pad[index + W + 3]
|
||||
ix1_y1_ = batch_heatmaps_pad[index - W - 3]
|
||||
ix1_ = batch_heatmaps_pad[index - 1]
|
||||
iy1_ = batch_heatmaps_pad[index - 2 - W]
|
||||
|
||||
dx = 0.5 * (ix1 - ix1_)
|
||||
dy = 0.5 * (iy1 - iy1_)
|
||||
derivative = np.concatenate([dx, dy], axis=1)
|
||||
derivative = derivative.reshape(N, K, 2, 1)
|
||||
dxx = ix1 - 2 * i_ + ix1_
|
||||
dyy = iy1 - 2 * i_ + iy1_
|
||||
dxy = 0.5 * (ix1y1 - ix1 - iy1 + i_ + i_ - ix1_ - iy1_ + ix1_y1_)
|
||||
hessian = np.concatenate([dxx, dxy, dxy, dyy], axis=1)
|
||||
hessian = hessian.reshape(N, K, 2, 2)
|
||||
hessian = np.linalg.inv(hessian + np.finfo(np.float32).eps * np.eye(2))
|
||||
coords -= np.einsum('ijmn,ijnk->ijmk', hessian, derivative).squeeze()
|
||||
return coords
|
||||
|
||||
def transform_preds_udp(self,
|
||||
coords,
|
||||
center,
|
||||
scale,
|
||||
output_size,
|
||||
use_udp=True):
|
||||
"""Get final keypoint predictions from heatmaps and apply scaling and
|
||||
translation to map them back to the image.
|
||||
|
||||
Note:
|
||||
num_keypoints: K
|
||||
|
||||
Args:
|
||||
coords (np.ndarray[K, ndims]):
|
||||
|
||||
* If ndims=2, corrds are predicted keypoint location.
|
||||
* If ndims=4, corrds are composed of (x, y, scores, tags)
|
||||
* If ndims=5, corrds are composed of (x, y, scores, tags,
|
||||
flipped_tags)
|
||||
|
||||
center (np.ndarray[2, ]): Center of the bounding box (x, y).
|
||||
scale (np.ndarray[2, ]): Scale of the bounding box
|
||||
wrt [width, height].
|
||||
output_size (np.ndarray[2, ] | list(2,)): Size of the
|
||||
destination heatmaps.
|
||||
use_udp (bool): Use unbiased data processing
|
||||
|
||||
Returns:
|
||||
np.ndarray: Predicted coordinates in the images.
|
||||
"""
|
||||
|
||||
assert coords.shape[1] in (2, 4, 5)
|
||||
assert len(center) == 2
|
||||
assert len(scale) == 2
|
||||
assert len(output_size) == 2
|
||||
|
||||
# Recover the scale which is normalized by a factor of 200.
|
||||
scale = scale * 200.0
|
||||
|
||||
if use_udp:
|
||||
scale_x = scale[0] / (output_size[0] - 1.0)
|
||||
scale_y = scale[1] / (output_size[1] - 1.0)
|
||||
else:
|
||||
scale_x = scale[0] / output_size[0]
|
||||
scale_y = scale[1] / output_size[1]
|
||||
|
||||
target_coords = np.ones_like(coords)
|
||||
target_coords[:, 0] = coords[:, 0] * scale_x + center[0] - scale[
|
||||
0] * 0.5
|
||||
target_coords[:, 1] = coords[:, 1] * scale_y + center[1] - scale[
|
||||
1] * 0.5
|
||||
|
||||
return target_coords
|
||||
|
||||
def get_final_preds(self, heatmaps, center, scale, kernelsize=11):
|
||||
"""the highest heatvalue location with a quarter offset in the
|
||||
direction from the highest response to the second highest response.
|
||||
|
||||
Args:
|
||||
heatmaps (numpy.ndarray): The predicted heatmaps
|
||||
center (numpy.ndarray): The boxes center
|
||||
scale (numpy.ndarray): The scale factor
|
||||
|
||||
Returns:
|
||||
preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
|
||||
maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints
|
||||
"""
|
||||
coords, maxvals = self.get_max_preds(heatmaps)
|
||||
|
||||
N, K, H, W = heatmaps.shape
|
||||
|
||||
if self.use_dark:
|
||||
coords = self.post_datk_udp(coords, heatmaps, kernelsize)
|
||||
preds = coords.copy()
|
||||
# Transform back to the image
|
||||
for i in range(N):
|
||||
preds[i] = self.transform_preds_udp(preds[i], center[i],
|
||||
scale[i], [W, H])
|
||||
else:
|
||||
for n in range(coords.shape[0]):
|
||||
for p in range(coords.shape[1]):
|
||||
hm = heatmaps[n][p]
|
||||
px = int(math.floor(coords[n][p][0] + 0.5))
|
||||
py = int(math.floor(coords[n][p][1] + 0.5))
|
||||
if 1 < px < W - 1 and 1 < py < H - 1:
|
||||
diff = np.array([
|
||||
hm[py][px + 1] - hm[py][px - 1],
|
||||
hm[py + 1][px] - hm[py - 1][px]
|
||||
])
|
||||
coords[n][p] += np.sign(diff) * .25
|
||||
preds = coords.copy()
|
||||
|
||||
# Transform back
|
||||
for i in range(coords.shape[0]):
|
||||
preds[i] = transform_preds(coords[i], center[i], scale[i],
|
||||
[W, H])
|
||||
|
||||
return preds, maxvals
|
||||
|
||||
def __call__(self, output, center, scale):
|
||||
preds, maxvals = self.get_final_preds(output, center, scale)
|
||||
outputs = [[
|
||||
np.concatenate(
|
||||
(preds, maxvals), axis=-1), np.mean(
|
||||
maxvals, axis=1)
|
||||
]]
|
||||
return outputs
|
||||
152
paddle_detection/ppdet/modeling/architectures/mask_rcnn.py
Normal file
152
paddle_detection/ppdet/modeling/architectures/mask_rcnn.py
Normal file
@@ -0,0 +1,152 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
|
||||
__all__ = ['MaskRCNN']
|
||||
|
||||
|
||||
@register
|
||||
class MaskRCNN(BaseArch):
|
||||
"""
|
||||
Mask R-CNN network, see https://arxiv.org/abs/1703.06870
|
||||
|
||||
Args:
|
||||
backbone (object): backbone instance
|
||||
rpn_head (object): `RPNHead` instance
|
||||
bbox_head (object): `BBoxHead` instance
|
||||
mask_head (object): `MaskHead` instance
|
||||
bbox_post_process (object): `BBoxPostProcess` instance
|
||||
mask_post_process (object): `MaskPostProcess` instance
|
||||
neck (object): 'FPN' instance
|
||||
"""
|
||||
|
||||
__category__ = 'architecture'
|
||||
__inject__ = [
|
||||
'bbox_post_process',
|
||||
'mask_post_process',
|
||||
]
|
||||
|
||||
def __init__(self,
|
||||
backbone,
|
||||
rpn_head,
|
||||
bbox_head,
|
||||
mask_head,
|
||||
bbox_post_process,
|
||||
mask_post_process,
|
||||
neck=None):
|
||||
super(MaskRCNN, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.neck = neck
|
||||
self.rpn_head = rpn_head
|
||||
self.bbox_head = bbox_head
|
||||
self.mask_head = mask_head
|
||||
|
||||
self.bbox_post_process = bbox_post_process
|
||||
self.mask_post_process = mask_post_process
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
backbone = create(cfg['backbone'])
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = cfg['neck'] and create(cfg['neck'], **kwargs)
|
||||
|
||||
out_shape = neck and neck.out_shape or backbone.out_shape
|
||||
kwargs = {'input_shape': out_shape}
|
||||
rpn_head = create(cfg['rpn_head'], **kwargs)
|
||||
bbox_head = create(cfg['bbox_head'], **kwargs)
|
||||
|
||||
out_shape = neck and out_shape or bbox_head.get_head().out_shape
|
||||
kwargs = {'input_shape': out_shape}
|
||||
mask_head = create(cfg['mask_head'], **kwargs)
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'neck': neck,
|
||||
"rpn_head": rpn_head,
|
||||
"bbox_head": bbox_head,
|
||||
"mask_head": mask_head,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
body_feats = self.backbone(self.inputs)
|
||||
if self.neck is not None:
|
||||
body_feats = self.neck(body_feats)
|
||||
|
||||
if self.training:
|
||||
rois, rois_num, rpn_loss = self.rpn_head(body_feats, self.inputs)
|
||||
bbox_loss, bbox_feat = self.bbox_head(body_feats, rois, rois_num,
|
||||
self.inputs)
|
||||
rois, rois_num = self.bbox_head.get_assigned_rois()
|
||||
bbox_targets = self.bbox_head.get_assigned_targets()
|
||||
# Mask Head needs bbox_feat in Mask RCNN
|
||||
mask_loss = self.mask_head(body_feats, rois, rois_num, self.inputs,
|
||||
bbox_targets, bbox_feat)
|
||||
return rpn_loss, bbox_loss, mask_loss
|
||||
else:
|
||||
rois, rois_num, _ = self.rpn_head(body_feats, self.inputs)
|
||||
preds, feat_func = self.bbox_head(body_feats, rois, rois_num, None)
|
||||
|
||||
im_shape = self.inputs['im_shape']
|
||||
scale_factor = self.inputs['scale_factor']
|
||||
|
||||
bbox, bbox_num, nms_keep_idx = self.bbox_post_process(
|
||||
preds, (rois, rois_num), im_shape, scale_factor)
|
||||
mask_out = self.mask_head(
|
||||
body_feats, bbox, bbox_num, self.inputs, feat_func=feat_func)
|
||||
|
||||
# rescale the prediction back to origin image
|
||||
bbox, bbox_pred, bbox_num = self.bbox_post_process.get_pred(
|
||||
bbox, bbox_num, im_shape, scale_factor)
|
||||
origin_shape = self.bbox_post_process.get_origin_shape()
|
||||
mask_pred = self.mask_post_process(mask_out, bbox_pred, bbox_num,
|
||||
origin_shape)
|
||||
|
||||
if self.use_extra_data:
|
||||
extra_data = {} # record the bbox output before nms, such like scores and nms_keep_idx
|
||||
"""extra_data:{
|
||||
'scores': predict scores,
|
||||
'nms_keep_idx': bbox index before nms,
|
||||
}
|
||||
"""
|
||||
extra_data['scores'] = preds[1] # predict scores (probability)
|
||||
# Todo: get logits output
|
||||
extra_data['nms_keep_idx'] = nms_keep_idx # bbox index before nms
|
||||
return bbox_pred, bbox_num, mask_pred, extra_data
|
||||
else:
|
||||
return bbox_pred, bbox_num, mask_pred
|
||||
|
||||
def get_loss(self, ):
|
||||
bbox_loss, mask_loss, rpn_loss = self._forward()
|
||||
loss = {}
|
||||
loss.update(rpn_loss)
|
||||
loss.update(bbox_loss)
|
||||
loss.update(mask_loss)
|
||||
total_loss = paddle.add_n(list(loss.values()))
|
||||
loss.update({'loss': total_loss})
|
||||
return loss
|
||||
|
||||
def get_pred(self):
|
||||
if self.use_extra_data:
|
||||
bbox_pred, bbox_num, mask_pred, extra_data = self._forward()
|
||||
output = {'bbox': bbox_pred, 'bbox_num': bbox_num, 'mask': mask_pred, 'extra_data': extra_data}
|
||||
else:
|
||||
bbox_pred, bbox_num, mask_pred = self._forward()
|
||||
output = {'bbox': bbox_pred, 'bbox_num': bbox_num, 'mask': mask_pred}
|
||||
return output
|
||||
132
paddle_detection/ppdet/modeling/architectures/meta_arch.py
Normal file
132
paddle_detection/ppdet/modeling/architectures/meta_arch.py
Normal file
@@ -0,0 +1,132 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import typing
|
||||
|
||||
from ppdet.core.workspace import register
|
||||
from ppdet.modeling.post_process import nms
|
||||
|
||||
__all__ = ['BaseArch']
|
||||
|
||||
|
||||
@register
|
||||
class BaseArch(nn.Layer):
|
||||
def __init__(self, data_format='NCHW', use_extra_data=False):
|
||||
super(BaseArch, self).__init__()
|
||||
self.data_format = data_format
|
||||
self.inputs = {}
|
||||
self.fuse_norm = False
|
||||
self.use_extra_data = use_extra_data
|
||||
|
||||
def load_meanstd(self, cfg_transform):
|
||||
scale = 1.
|
||||
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
|
||||
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
|
||||
for item in cfg_transform:
|
||||
if 'NormalizeImage' in item:
|
||||
mean = np.array(
|
||||
item['NormalizeImage']['mean'], dtype=np.float32)
|
||||
std = np.array(item['NormalizeImage']['std'], dtype=np.float32)
|
||||
if item['NormalizeImage'].get('is_scale', True):
|
||||
scale = 1. / 255.
|
||||
break
|
||||
if self.data_format == 'NHWC':
|
||||
self.scale = paddle.to_tensor(scale / std).reshape((1, 1, 1, 3))
|
||||
self.bias = paddle.to_tensor(-mean / std).reshape((1, 1, 1, 3))
|
||||
else:
|
||||
self.scale = paddle.to_tensor(scale / std).reshape((1, 3, 1, 1))
|
||||
self.bias = paddle.to_tensor(-mean / std).reshape((1, 3, 1, 1))
|
||||
|
||||
def forward(self, inputs):
|
||||
if self.data_format == 'NHWC':
|
||||
image = inputs['image']
|
||||
inputs['image'] = paddle.transpose(image, [0, 2, 3, 1])
|
||||
|
||||
if self.fuse_norm:
|
||||
image = inputs['image']
|
||||
self.inputs['image'] = image * self.scale + self.bias
|
||||
self.inputs['im_shape'] = inputs['im_shape']
|
||||
self.inputs['scale_factor'] = inputs['scale_factor']
|
||||
else:
|
||||
self.inputs = inputs
|
||||
|
||||
self.model_arch()
|
||||
|
||||
if self.training:
|
||||
out = self.get_loss()
|
||||
else:
|
||||
inputs_list = []
|
||||
# multi-scale input
|
||||
if not isinstance(inputs, typing.Sequence):
|
||||
inputs_list.append(inputs)
|
||||
else:
|
||||
inputs_list.extend(inputs)
|
||||
outs = []
|
||||
for inp in inputs_list:
|
||||
if self.fuse_norm:
|
||||
self.inputs['image'] = inp['image'] * self.scale + self.bias
|
||||
self.inputs['im_shape'] = inp['im_shape']
|
||||
self.inputs['scale_factor'] = inp['scale_factor']
|
||||
else:
|
||||
self.inputs = inp
|
||||
outs.append(self.get_pred())
|
||||
|
||||
# multi-scale test
|
||||
if len(outs) > 1:
|
||||
out = self.merge_multi_scale_predictions(outs)
|
||||
else:
|
||||
out = outs[0]
|
||||
return out
|
||||
|
||||
def merge_multi_scale_predictions(self, outs):
|
||||
# default values for architectures not included in following list
|
||||
num_classes = 80
|
||||
nms_threshold = 0.5
|
||||
keep_top_k = 100
|
||||
|
||||
if self.__class__.__name__ in ('CascadeRCNN', 'FasterRCNN', 'MaskRCNN'):
|
||||
num_classes = self.bbox_head.num_classes
|
||||
keep_top_k = self.bbox_post_process.nms.keep_top_k
|
||||
nms_threshold = self.bbox_post_process.nms.nms_threshold
|
||||
else:
|
||||
raise Exception(
|
||||
"Multi scale test only supports CascadeRCNN, FasterRCNN and MaskRCNN for now"
|
||||
)
|
||||
|
||||
final_boxes = []
|
||||
all_scale_outs = paddle.concat([o['bbox'] for o in outs]).numpy()
|
||||
for c in range(num_classes):
|
||||
idxs = all_scale_outs[:, 0] == c
|
||||
if np.count_nonzero(idxs) == 0:
|
||||
continue
|
||||
r = nms(all_scale_outs[idxs, 1:], nms_threshold)
|
||||
final_boxes.append(
|
||||
np.concatenate([np.full((r.shape[0], 1), c), r], 1))
|
||||
out = np.concatenate(final_boxes)
|
||||
out = np.concatenate(sorted(
|
||||
out, key=lambda e: e[1])[-keep_top_k:]).reshape((-1, 6))
|
||||
out = {
|
||||
'bbox': paddle.to_tensor(out),
|
||||
'bbox_num': paddle.to_tensor(np.array([out.shape[0], ]))
|
||||
}
|
||||
|
||||
return out
|
||||
|
||||
def build_inputs(self, data, input_def):
|
||||
inputs = {}
|
||||
for i, k in enumerate(input_def):
|
||||
inputs[k] = data[i]
|
||||
return inputs
|
||||
|
||||
def model_arch(self, ):
|
||||
pass
|
||||
|
||||
def get_loss(self, ):
|
||||
raise NotImplementedError("Should implement get_loss method!")
|
||||
|
||||
def get_pred(self, ):
|
||||
raise NotImplementedError("Should implement get_pred method!")
|
||||
@@ -0,0 +1,69 @@
|
||||
from typing import Dict
|
||||
from collections import OrderedDict
|
||||
from ppdet.modeling.architectures.meta_arch import BaseArch
|
||||
|
||||
|
||||
class MultiSteamDetector(BaseArch):
|
||||
def __init__(self,
|
||||
model: Dict[str, BaseArch],
|
||||
train_cfg=None,
|
||||
test_cfg=None):
|
||||
super(MultiSteamDetector, self).__init__()
|
||||
self.submodules = list(model.keys())
|
||||
for k, v in model.items():
|
||||
setattr(self, k, v)
|
||||
|
||||
self.train_cfg = train_cfg
|
||||
self.test_cfg = test_cfg
|
||||
self.inference_on = self.test_cfg.get("inference_on",
|
||||
self.submodules[0])
|
||||
self.first_load = True
|
||||
|
||||
def forward(self, inputs, return_loss=True, **kwargs):
|
||||
"""Calls either :func:`forward_train` or :func:`forward_test` depending
|
||||
on whether ``return_loss`` is ``True``.
|
||||
|
||||
Note this setting will change the expected inputs. When
|
||||
``return_loss=True``, img and img_meta are single-nested (i.e. Tensor
|
||||
and List[dict]), and when ``resturn_loss=False``, img and img_meta
|
||||
should be double nested (i.e. List[Tensor], List[List[dict]]), with
|
||||
the outer list indicating test time augmentations.
|
||||
"""
|
||||
if return_loss:
|
||||
return self.forward_train(inputs, **kwargs)
|
||||
else:
|
||||
return self.forward_test(inputs, **kwargs)
|
||||
|
||||
def get_loss(self, **kwargs):
|
||||
# losses = self(**data)
|
||||
|
||||
return self.forward_train(self, **kwargs)
|
||||
|
||||
def model(self, **kwargs) -> BaseArch:
|
||||
if "submodule" in kwargs:
|
||||
assert (kwargs["submodule"] in self.submodules
|
||||
), "Detector does not contain submodule {}".format(kwargs[
|
||||
"submodule"])
|
||||
model: BaseArch = getattr(self, kwargs["submodule"])
|
||||
else:
|
||||
model: BaseArch = getattr(self, self.inference_on)
|
||||
return model
|
||||
|
||||
def freeze(self, model_ref: str):
|
||||
assert model_ref in self.submodules
|
||||
model = getattr(self, model_ref)
|
||||
model.eval()
|
||||
for param in model.parameters():
|
||||
param.stop_gradient = True
|
||||
|
||||
def update_ema_model(self, momentum=0.9996):
|
||||
# print(momentum)
|
||||
model_dict = self.student.state_dict()
|
||||
new_dict = OrderedDict()
|
||||
for key, value in self.teacher.state_dict().items():
|
||||
if key in model_dict.keys():
|
||||
new_dict[key] = (model_dict[key] *
|
||||
(1 - momentum) + value * momentum)
|
||||
else:
|
||||
raise Exception("{} is not found in student model".format(key))
|
||||
self.teacher.set_dict(new_dict)
|
||||
99
paddle_detection/ppdet/modeling/architectures/picodet.py
Normal file
99
paddle_detection/ppdet/modeling/architectures/picodet.py
Normal file
@@ -0,0 +1,99 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
|
||||
__all__ = ['PicoDet']
|
||||
|
||||
|
||||
@register
|
||||
class PicoDet(BaseArch):
|
||||
"""
|
||||
Generalized Focal Loss network, see https://arxiv.org/abs/2006.04388
|
||||
|
||||
Args:
|
||||
backbone (object): backbone instance
|
||||
neck (object): 'FPN' instance
|
||||
head (object): 'PicoHead' instance
|
||||
"""
|
||||
|
||||
__category__ = 'architecture'
|
||||
|
||||
def __init__(self, backbone, neck, head='PicoHead', nms_cpu=False):
|
||||
super(PicoDet, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.neck = neck
|
||||
self.head = head
|
||||
self.export_post_process = True
|
||||
self.export_nms = True
|
||||
self.nms_cpu = nms_cpu
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
backbone = create(cfg['backbone'])
|
||||
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = create(cfg['neck'], **kwargs)
|
||||
|
||||
kwargs = {'input_shape': neck.out_shape}
|
||||
head = create(cfg['head'], **kwargs)
|
||||
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'neck': neck,
|
||||
"head": head,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
body_feats = self.backbone(self.inputs)
|
||||
fpn_feats = self.neck(body_feats)
|
||||
head_outs = self.head(fpn_feats, self.export_post_process)
|
||||
if self.training or not self.export_post_process:
|
||||
return head_outs, None
|
||||
else:
|
||||
scale_factor = self.inputs['scale_factor']
|
||||
bboxes, bbox_num = self.head.post_process(
|
||||
head_outs,
|
||||
scale_factor,
|
||||
export_nms=self.export_nms,
|
||||
nms_cpu=self.nms_cpu)
|
||||
return bboxes, bbox_num
|
||||
|
||||
def get_loss(self, ):
|
||||
loss = {}
|
||||
|
||||
head_outs, _ = self._forward()
|
||||
loss_gfl = self.head.get_loss(head_outs, self.inputs)
|
||||
loss.update(loss_gfl)
|
||||
total_loss = paddle.add_n(list(loss.values()))
|
||||
loss.update({'loss': total_loss})
|
||||
return loss
|
||||
|
||||
def get_pred(self):
|
||||
if not self.export_post_process:
|
||||
return {'picodet': self._forward()[0]}
|
||||
elif self.export_nms:
|
||||
bbox_pred, bbox_num = self._forward()
|
||||
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
|
||||
return output
|
||||
else:
|
||||
bboxes, mlvl_scores = self._forward()
|
||||
output = {'bbox': bboxes, 'scores': mlvl_scores}
|
||||
return output
|
||||
114
paddle_detection/ppdet/modeling/architectures/pose3d_metro.py
Normal file
114
paddle_detection/ppdet/modeling/architectures/pose3d_metro.py
Normal file
@@ -0,0 +1,114 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
from .. import layers as L
|
||||
|
||||
__all__ = ['METRO_Body']
|
||||
|
||||
|
||||
def orthographic_projection(X, camera):
|
||||
"""Perform orthographic projection of 3D points X using the camera parameters
|
||||
Args:
|
||||
X: size = [B, N, 3]
|
||||
camera: size = [B, 3]
|
||||
Returns:
|
||||
Projected 2D points -- size = [B, N, 2]
|
||||
"""
|
||||
camera = camera.reshape((-1, 1, 3))
|
||||
X_trans = X[:, :, :2] + camera[:, :, 1:]
|
||||
shape = paddle.shape(X_trans)
|
||||
X_2d = (camera[:, :, 0] * X_trans.reshape((shape[0], -1))).reshape(shape)
|
||||
return X_2d
|
||||
|
||||
|
||||
@register
|
||||
class METRO_Body(BaseArch):
|
||||
__category__ = 'architecture'
|
||||
__inject__ = ['loss']
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
num_joints,
|
||||
backbone='HRNet',
|
||||
trans_encoder='',
|
||||
loss='Pose3DLoss', ):
|
||||
"""
|
||||
Modified from METRO network, see https://arxiv.org/abs/2012.09760
|
||||
|
||||
Args:
|
||||
backbone (nn.Layer): backbone instance
|
||||
"""
|
||||
super(METRO_Body, self).__init__()
|
||||
self.num_joints = num_joints
|
||||
self.backbone = backbone
|
||||
self.loss = loss
|
||||
self.deploy = False
|
||||
|
||||
self.trans_encoder = trans_encoder
|
||||
self.conv_learn_tokens = paddle.nn.Conv1D(49, num_joints + 10, 1)
|
||||
self.cam_param_fc = paddle.nn.Linear(3, 2)
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
# backbone
|
||||
backbone = create(cfg['backbone'])
|
||||
trans_encoder = create(cfg['trans_encoder'])
|
||||
|
||||
return {'backbone': backbone, 'trans_encoder': trans_encoder}
|
||||
|
||||
def _forward(self):
|
||||
batch_size = self.inputs['image'].shape[0]
|
||||
|
||||
image_feat = self.backbone(self.inputs)
|
||||
image_feat_flatten = image_feat.reshape((batch_size, 2048, 49))
|
||||
image_feat_flatten = image_feat_flatten.transpose(perm=(0, 2, 1))
|
||||
# and apply a conv layer to learn image token for each 3d joint/vertex position
|
||||
features = self.conv_learn_tokens(image_feat_flatten) # (B, J, C)
|
||||
|
||||
if self.training:
|
||||
# apply mask vertex/joint modeling
|
||||
# meta_masks is a tensor of all the masks, randomly generated in dataloader
|
||||
# we pre-define a [MASK] token, which is a floating-value vector with 0.01s
|
||||
meta_masks = self.inputs['mjm_mask'].expand((-1, -1, 2048))
|
||||
constant_tensor = paddle.ones_like(features) * 0.01
|
||||
features = features * meta_masks + constant_tensor * (1 - meta_masks
|
||||
)
|
||||
pred_out = self.trans_encoder(features)
|
||||
|
||||
pred_3d_joints = pred_out[:, :self.num_joints, :]
|
||||
cam_features = pred_out[:, self.num_joints:, :]
|
||||
|
||||
# learn camera parameters
|
||||
pred_2d_joints = self.cam_param_fc(cam_features)
|
||||
return pred_3d_joints, pred_2d_joints
|
||||
|
||||
def get_loss(self):
|
||||
preds_3d, preds_2d = self._forward()
|
||||
loss = self.loss(preds_3d, preds_2d, self.inputs)
|
||||
output = {'loss': loss}
|
||||
return output
|
||||
|
||||
def get_pred(self):
|
||||
preds_3d, preds_2d = self._forward()
|
||||
outputs = {'pose3d': preds_3d, 'pose2d': preds_2d}
|
||||
return outputs
|
||||
260
paddle_detection/ppdet/modeling/architectures/ppyoloe.py
Normal file
260
paddle_detection/ppdet/modeling/architectures/ppyoloe.py
Normal file
@@ -0,0 +1,260 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import copy
|
||||
import paddle
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
|
||||
__all__ = ['PPYOLOE', 'PPYOLOEWithAuxHead']
|
||||
# PP-YOLOE and PP-YOLOE+ are recommended to use this architecture, especially when use distillation or aux head
|
||||
# PP-YOLOE and PP-YOLOE+ can also use the same architecture of YOLOv3 in yolo.py when not use distillation or aux head
|
||||
|
||||
|
||||
@register
|
||||
class PPYOLOE(BaseArch):
|
||||
"""
|
||||
PPYOLOE network, see https://arxiv.org/abs/2203.16250
|
||||
|
||||
Args:
|
||||
backbone (nn.Layer): backbone instance
|
||||
neck (nn.Layer): neck instance
|
||||
yolo_head (nn.Layer): anchor_head instance
|
||||
post_process (object): `BBoxPostProcess` instance
|
||||
ssod_loss (object): 'SSODPPYOLOELoss' instance, only used for semi-det(ssod)
|
||||
for_distill (bool): whether for distillation
|
||||
feat_distill_place (str): distill which feature for distillation
|
||||
for_mot (bool): whether return other features for multi-object tracking
|
||||
models, default False in pure object detection models.
|
||||
"""
|
||||
|
||||
__category__ = 'architecture'
|
||||
__shared__ = ['for_distill']
|
||||
__inject__ = ['post_process', 'ssod_loss']
|
||||
|
||||
def __init__(self,
|
||||
backbone='CSPResNet',
|
||||
neck='CustomCSPPAN',
|
||||
yolo_head='PPYOLOEHead',
|
||||
post_process='BBoxPostProcess',
|
||||
ssod_loss='SSODPPYOLOELoss',
|
||||
for_distill=False,
|
||||
feat_distill_place='neck_feats',
|
||||
for_mot=False):
|
||||
super(PPYOLOE, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.neck = neck
|
||||
self.yolo_head = yolo_head
|
||||
self.post_process = post_process
|
||||
self.for_mot = for_mot
|
||||
|
||||
# for ssod, semi-det
|
||||
self.is_teacher = False
|
||||
self.ssod_loss = ssod_loss
|
||||
|
||||
# distill
|
||||
self.for_distill = for_distill
|
||||
self.feat_distill_place = feat_distill_place
|
||||
if for_distill:
|
||||
assert feat_distill_place in ['backbone_feats', 'neck_feats']
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
backbone = create(cfg['backbone'])
|
||||
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = create(cfg['neck'], **kwargs)
|
||||
|
||||
kwargs = {'input_shape': neck.out_shape}
|
||||
yolo_head = create(cfg['yolo_head'], **kwargs)
|
||||
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'neck': neck,
|
||||
"yolo_head": yolo_head,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
body_feats = self.backbone(self.inputs)
|
||||
neck_feats = self.neck(body_feats, self.for_mot)
|
||||
|
||||
self.is_teacher = self.inputs.get('is_teacher', False) # for semi-det
|
||||
if self.training or self.is_teacher:
|
||||
yolo_losses = self.yolo_head(neck_feats, self.inputs)
|
||||
|
||||
if self.for_distill:
|
||||
if self.feat_distill_place == 'backbone_feats':
|
||||
self.yolo_head.distill_pairs['backbone_feats'] = body_feats
|
||||
elif self.feat_distill_place == 'neck_feats':
|
||||
self.yolo_head.distill_pairs['neck_feats'] = neck_feats
|
||||
else:
|
||||
raise ValueError
|
||||
return yolo_losses
|
||||
else:
|
||||
|
||||
yolo_head_outs = self.yolo_head(neck_feats)
|
||||
|
||||
if self.post_process is not None:
|
||||
bbox, bbox_num, nms_keep_idx = self.post_process(
|
||||
yolo_head_outs, self.yolo_head.mask_anchors,
|
||||
self.inputs['im_shape'], self.inputs['scale_factor'])
|
||||
|
||||
else:
|
||||
bbox, bbox_num, nms_keep_idx = self.yolo_head.post_process(
|
||||
yolo_head_outs, self.inputs['scale_factor'])
|
||||
|
||||
if self.use_extra_data:
|
||||
extra_data = {} # record the bbox output before nms, such like scores and nms_keep_idx
|
||||
"""extra_data:{
|
||||
'scores': predict scores,
|
||||
'nms_keep_idx': bbox index before nms,
|
||||
}
|
||||
"""
|
||||
extra_data['scores'] = yolo_head_outs[0] # predict scores (probability)
|
||||
extra_data['nms_keep_idx'] = nms_keep_idx
|
||||
output = {'bbox': bbox, 'bbox_num': bbox_num, 'extra_data': extra_data}
|
||||
else:
|
||||
output = {'bbox': bbox, 'bbox_num': bbox_num}
|
||||
|
||||
return output
|
||||
|
||||
def get_loss(self):
|
||||
return self._forward()
|
||||
|
||||
def get_pred(self):
|
||||
return self._forward()
|
||||
|
||||
def get_loss_keys(self):
|
||||
return ['loss_cls', 'loss_iou', 'loss_dfl', 'loss_contrast']
|
||||
|
||||
def get_ssod_loss(self, student_head_outs, teacher_head_outs, train_cfg):
|
||||
ssod_losses = self.ssod_loss(student_head_outs, teacher_head_outs,
|
||||
train_cfg)
|
||||
return ssod_losses
|
||||
|
||||
|
||||
@register
|
||||
class PPYOLOEWithAuxHead(BaseArch):
|
||||
__category__ = 'architecture'
|
||||
__inject__ = ['post_process']
|
||||
|
||||
def __init__(self,
|
||||
backbone='CSPResNet',
|
||||
neck='CustomCSPPAN',
|
||||
yolo_head='PPYOLOEHead',
|
||||
aux_head='SimpleConvHead',
|
||||
post_process='BBoxPostProcess',
|
||||
for_mot=False,
|
||||
detach_epoch=5):
|
||||
"""
|
||||
PPYOLOE network, see https://arxiv.org/abs/2203.16250
|
||||
|
||||
Args:
|
||||
backbone (nn.Layer): backbone instance
|
||||
neck (nn.Layer): neck instance
|
||||
yolo_head (nn.Layer): anchor_head instance
|
||||
post_process (object): `BBoxPostProcess` instance
|
||||
for_mot (bool): whether return other features for multi-object tracking
|
||||
models, default False in pure object detection models.
|
||||
"""
|
||||
super(PPYOLOEWithAuxHead, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.neck = neck
|
||||
self.aux_neck = copy.deepcopy(self.neck)
|
||||
|
||||
self.yolo_head = yolo_head
|
||||
self.aux_head = aux_head
|
||||
self.post_process = post_process
|
||||
self.for_mot = for_mot
|
||||
self.detach_epoch = detach_epoch
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
# backbone
|
||||
backbone = create(cfg['backbone'])
|
||||
|
||||
# fpn
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = create(cfg['neck'], **kwargs)
|
||||
aux_neck = copy.deepcopy(neck)
|
||||
|
||||
# head
|
||||
kwargs = {'input_shape': neck.out_shape}
|
||||
yolo_head = create(cfg['yolo_head'], **kwargs)
|
||||
aux_head = create(cfg['aux_head'], **kwargs)
|
||||
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'neck': neck,
|
||||
"yolo_head": yolo_head,
|
||||
'aux_head': aux_head,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
body_feats = self.backbone(self.inputs)
|
||||
neck_feats = self.neck(body_feats, self.for_mot)
|
||||
|
||||
if self.training:
|
||||
if self.inputs['epoch_id'] >= self.detach_epoch:
|
||||
aux_neck_feats = self.aux_neck([f.detach() for f in body_feats])
|
||||
dual_neck_feats = (paddle.concat(
|
||||
[f.detach(), aux_f], axis=1) for f, aux_f in
|
||||
zip(neck_feats, aux_neck_feats))
|
||||
else:
|
||||
aux_neck_feats = self.aux_neck(body_feats)
|
||||
dual_neck_feats = (paddle.concat(
|
||||
[f, aux_f], axis=1) for f, aux_f in
|
||||
zip(neck_feats, aux_neck_feats))
|
||||
aux_cls_scores, aux_bbox_preds = self.aux_head(dual_neck_feats)
|
||||
loss = self.yolo_head(
|
||||
neck_feats,
|
||||
self.inputs,
|
||||
aux_pred=[aux_cls_scores, aux_bbox_preds])
|
||||
return loss
|
||||
else:
|
||||
yolo_head_outs = self.yolo_head(neck_feats)
|
||||
|
||||
if self.post_process is not None:
|
||||
bbox, bbox_num, nms_keep_idx = self.post_process(
|
||||
yolo_head_outs, self.yolo_head.mask_anchors,
|
||||
self.inputs['im_shape'], self.inputs['scale_factor'])
|
||||
else:
|
||||
bbox, bbox_num, nms_keep_idx = self.yolo_head.post_process(
|
||||
yolo_head_outs, self.inputs['scale_factor'])
|
||||
|
||||
if self.use_extra_data:
|
||||
extra_data = {} # record the bbox output before nms, such like scores and nms_keep_idx
|
||||
"""extra_data:{
|
||||
'scores': predict scores,
|
||||
'nms_keep_idx': bbox index before nms,
|
||||
}
|
||||
"""
|
||||
extra_data['scores'] = yolo_head_outs[0] # predict scores (probability)
|
||||
# Todo: get logits output
|
||||
extra_data['nms_keep_idx'] = nms_keep_idx
|
||||
output = {'bbox': bbox, 'bbox_num': bbox_num, 'extra_data': extra_data}
|
||||
else:
|
||||
output = {'bbox': bbox, 'bbox_num': bbox_num}
|
||||
|
||||
return output
|
||||
|
||||
def get_loss(self):
|
||||
return self._forward()
|
||||
|
||||
def get_pred(self):
|
||||
return self._forward()
|
||||
104
paddle_detection/ppdet/modeling/architectures/queryinst.py
Normal file
104
paddle_detection/ppdet/modeling/architectures/queryinst.py
Normal file
@@ -0,0 +1,104 @@
|
||||
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
|
||||
__all__ = ['QueryInst']
|
||||
|
||||
|
||||
@register
|
||||
class QueryInst(BaseArch):
|
||||
__category__ = 'architecture'
|
||||
__inject__ = ['post_process']
|
||||
|
||||
def __init__(self,
|
||||
backbone,
|
||||
neck,
|
||||
rpn_head,
|
||||
roi_head,
|
||||
post_process='SparsePostProcess'):
|
||||
super(QueryInst, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.neck = neck
|
||||
self.rpn_head = rpn_head
|
||||
self.roi_head = roi_head
|
||||
self.post_process = post_process
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
backbone = create(cfg['backbone'])
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = create(cfg['neck'], **kwargs)
|
||||
|
||||
kwargs = {'input_shape': neck.out_shape}
|
||||
rpn_head = create(cfg['rpn_head'], **kwargs)
|
||||
roi_head = create(cfg['roi_head'], **kwargs)
|
||||
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'neck': neck,
|
||||
'rpn_head': rpn_head,
|
||||
"roi_head": roi_head
|
||||
}
|
||||
|
||||
def _forward(self, targets=None):
|
||||
features = self.backbone(self.inputs)
|
||||
features = self.neck(features)
|
||||
|
||||
proposal_bboxes, proposal_features = self.rpn_head(self.inputs[
|
||||
'img_whwh'])
|
||||
outputs = self.roi_head(features, proposal_bboxes, proposal_features,
|
||||
targets)
|
||||
|
||||
if self.training:
|
||||
return outputs
|
||||
else:
|
||||
bbox_pred, bbox_num, mask_pred = self.post_process(
|
||||
outputs['class_logits'], outputs['bbox_pred'],
|
||||
self.inputs['scale_factor_whwh'], self.inputs['ori_shape'],
|
||||
outputs['mask_logits'])
|
||||
return bbox_pred, bbox_num, mask_pred
|
||||
|
||||
def get_loss(self):
|
||||
targets = []
|
||||
for i in range(len(self.inputs['img_whwh'])):
|
||||
boxes = self.inputs['gt_bbox'][i]
|
||||
labels = self.inputs['gt_class'][i].squeeze(-1)
|
||||
img_whwh = self.inputs['img_whwh'][i]
|
||||
if boxes.shape[0] != 0:
|
||||
img_whwh_tgt = img_whwh.unsqueeze(0).tile([boxes.shape[0], 1])
|
||||
else:
|
||||
img_whwh_tgt = paddle.zeros_like(boxes)
|
||||
gt_segm = self.inputs['gt_segm'][i].astype('float32')
|
||||
targets.append({
|
||||
'boxes': boxes,
|
||||
'labels': labels,
|
||||
'img_whwh': img_whwh,
|
||||
'img_whwh_tgt': img_whwh_tgt,
|
||||
'gt_segm': gt_segm
|
||||
})
|
||||
losses = self._forward(targets)
|
||||
losses.update({'loss': sum(losses.values())})
|
||||
return losses
|
||||
|
||||
def get_pred(self):
|
||||
bbox_pred, bbox_num, mask_pred = self._forward()
|
||||
return {'bbox': bbox_pred, 'bbox_num': bbox_num, 'mask': mask_pred}
|
||||
84
paddle_detection/ppdet/modeling/architectures/retinanet.py
Normal file
84
paddle_detection/ppdet/modeling/architectures/retinanet.py
Normal file
@@ -0,0 +1,84 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
import paddle
|
||||
import paddle.nn.functional as F
|
||||
|
||||
__all__ = ['RetinaNet']
|
||||
|
||||
|
||||
@register
|
||||
class RetinaNet(BaseArch):
|
||||
__category__ = 'architecture'
|
||||
|
||||
def __init__(self, backbone, neck, head):
|
||||
super(RetinaNet, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.neck = neck
|
||||
self.head = head
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
backbone = create(cfg['backbone'])
|
||||
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = create(cfg['neck'], **kwargs)
|
||||
|
||||
kwargs = {'input_shape': neck.out_shape}
|
||||
head = create(cfg['head'], **kwargs)
|
||||
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'neck': neck,
|
||||
'head': head,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
body_feats = self.backbone(self.inputs)
|
||||
neck_feats = self.neck(body_feats)
|
||||
|
||||
if self.training:
|
||||
return self.head(neck_feats, self.inputs)
|
||||
else:
|
||||
head_outs = self.head(neck_feats)
|
||||
bbox, bbox_num, nms_keep_idx = self.head.post_process(
|
||||
head_outs, self.inputs['im_shape'], self.inputs['scale_factor'])
|
||||
|
||||
if self.use_extra_data:
|
||||
extra_data = {} # record the bbox output before nms, such like scores and nms_keep_idx
|
||||
"""extra_data:{
|
||||
'scores': predict scores,
|
||||
'nms_keep_idx': bbox index before nms,
|
||||
}
|
||||
"""
|
||||
preds_logits = self.head.decode_cls_logits(head_outs[0])
|
||||
preds_scores = F.sigmoid(preds_logits)
|
||||
extra_data['logits'] = preds_logits
|
||||
extra_data['scores'] = preds_scores
|
||||
extra_data['nms_keep_idx'] = nms_keep_idx # bbox index before nms
|
||||
return {'bbox': bbox, 'bbox_num': bbox_num, "extra_data": extra_data}
|
||||
else:
|
||||
return {'bbox': bbox, 'bbox_num': bbox_num}
|
||||
|
||||
def get_loss(self):
|
||||
return self._forward()
|
||||
|
||||
def get_pred(self):
|
||||
return self._forward()
|
||||
83
paddle_detection/ppdet/modeling/architectures/s2anet.py
Normal file
83
paddle_detection/ppdet/modeling/architectures/s2anet.py
Normal file
@@ -0,0 +1,83 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
|
||||
__all__ = ['S2ANet']
|
||||
|
||||
|
||||
@register
|
||||
class S2ANet(BaseArch):
|
||||
__category__ = 'architecture'
|
||||
__inject__ = ['head']
|
||||
|
||||
def __init__(self, backbone, neck, head):
|
||||
"""
|
||||
S2ANet, see https://arxiv.org/pdf/2008.09397.pdf
|
||||
|
||||
Args:
|
||||
backbone (object): backbone instance
|
||||
neck (object): `FPN` instance
|
||||
head (object): `Head` instance
|
||||
"""
|
||||
super(S2ANet, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.neck = neck
|
||||
self.s2anet_head = head
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
backbone = create(cfg['backbone'])
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = cfg['neck'] and create(cfg['neck'], **kwargs)
|
||||
|
||||
out_shape = neck and neck.out_shape or backbone.out_shape
|
||||
kwargs = {'input_shape': out_shape}
|
||||
head = create(cfg['head'], **kwargs)
|
||||
|
||||
return {'backbone': backbone, 'neck': neck, "head": head}
|
||||
|
||||
def _forward(self):
|
||||
body_feats = self.backbone(self.inputs)
|
||||
if self.neck is not None:
|
||||
body_feats = self.neck(body_feats)
|
||||
if self.training:
|
||||
loss = self.s2anet_head(body_feats, self.inputs)
|
||||
return loss
|
||||
else:
|
||||
head_outs = self.s2anet_head(body_feats)
|
||||
# post_process
|
||||
bboxes, bbox_num = self.s2anet_head.get_bboxes(head_outs)
|
||||
# rescale the prediction back to origin image
|
||||
im_shape = self.inputs['im_shape']
|
||||
scale_factor = self.inputs['scale_factor']
|
||||
bboxes = self.s2anet_head.get_pred(bboxes, bbox_num, im_shape,
|
||||
scale_factor)
|
||||
# output
|
||||
output = {'bbox': bboxes, 'bbox_num': bbox_num}
|
||||
return output
|
||||
|
||||
def get_loss(self, ):
|
||||
loss = self._forward()
|
||||
return loss
|
||||
|
||||
def get_pred(self):
|
||||
output = self._forward()
|
||||
return output
|
||||
110
paddle_detection/ppdet/modeling/architectures/solov2.py
Normal file
110
paddle_detection/ppdet/modeling/architectures/solov2.py
Normal file
@@ -0,0 +1,110 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
|
||||
__all__ = ['SOLOv2']
|
||||
|
||||
|
||||
@register
|
||||
class SOLOv2(BaseArch):
|
||||
"""
|
||||
SOLOv2 network, see https://arxiv.org/abs/2003.10152
|
||||
|
||||
Args:
|
||||
backbone (object): an backbone instance
|
||||
solov2_head (object): an `SOLOv2Head` instance
|
||||
mask_head (object): an `SOLOv2MaskHead` instance
|
||||
neck (object): neck of network, such as feature pyramid network instance
|
||||
"""
|
||||
|
||||
__category__ = 'architecture'
|
||||
|
||||
def __init__(self, backbone, solov2_head, mask_head, neck=None):
|
||||
super(SOLOv2, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.neck = neck
|
||||
self.solov2_head = solov2_head
|
||||
self.mask_head = mask_head
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
backbone = create(cfg['backbone'])
|
||||
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = create(cfg['neck'], **kwargs)
|
||||
|
||||
kwargs = {'input_shape': neck.out_shape}
|
||||
solov2_head = create(cfg['solov2_head'], **kwargs)
|
||||
mask_head = create(cfg['mask_head'], **kwargs)
|
||||
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'neck': neck,
|
||||
'solov2_head': solov2_head,
|
||||
'mask_head': mask_head,
|
||||
}
|
||||
|
||||
def model_arch(self):
|
||||
body_feats = self.backbone(self.inputs)
|
||||
|
||||
body_feats = self.neck(body_feats)
|
||||
|
||||
self.seg_pred = self.mask_head(body_feats)
|
||||
|
||||
self.cate_pred_list, self.kernel_pred_list = self.solov2_head(
|
||||
body_feats)
|
||||
|
||||
def get_loss(self, ):
|
||||
loss = {}
|
||||
# get gt_ins_labels, gt_cate_labels, etc.
|
||||
gt_ins_labels, gt_cate_labels, gt_grid_orders = [], [], []
|
||||
fg_num = self.inputs['fg_num']
|
||||
for i in range(len(self.solov2_head.seg_num_grids)):
|
||||
ins_label = 'ins_label{}'.format(i)
|
||||
if ins_label in self.inputs:
|
||||
gt_ins_labels.append(self.inputs[ins_label])
|
||||
cate_label = 'cate_label{}'.format(i)
|
||||
if cate_label in self.inputs:
|
||||
gt_cate_labels.append(self.inputs[cate_label])
|
||||
grid_order = 'grid_order{}'.format(i)
|
||||
if grid_order in self.inputs:
|
||||
gt_grid_orders.append(self.inputs[grid_order])
|
||||
|
||||
loss_solov2 = self.solov2_head.get_loss(
|
||||
self.cate_pred_list, self.kernel_pred_list, self.seg_pred,
|
||||
gt_ins_labels, gt_cate_labels, gt_grid_orders, fg_num)
|
||||
loss.update(loss_solov2)
|
||||
total_loss = paddle.add_n(list(loss.values()))
|
||||
loss.update({'loss': total_loss})
|
||||
return loss
|
||||
|
||||
def get_pred(self):
|
||||
seg_masks, cate_labels, cate_scores, bbox_num = self.solov2_head.get_prediction(
|
||||
self.cate_pred_list, self.kernel_pred_list, self.seg_pred,
|
||||
self.inputs['im_shape'], self.inputs['scale_factor'])
|
||||
outs = {
|
||||
"segm": seg_masks,
|
||||
"bbox_num": bbox_num,
|
||||
'cate_label': cate_labels,
|
||||
'cate_score': cate_scores
|
||||
}
|
||||
return outs
|
||||
99
paddle_detection/ppdet/modeling/architectures/sparse_rcnn.py
Normal file
99
paddle_detection/ppdet/modeling/architectures/sparse_rcnn.py
Normal file
@@ -0,0 +1,99 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
|
||||
__all__ = ["SparseRCNN"]
|
||||
|
||||
|
||||
@register
|
||||
class SparseRCNN(BaseArch):
|
||||
__category__ = 'architecture'
|
||||
__inject__ = ["postprocess"]
|
||||
|
||||
def __init__(self,
|
||||
backbone,
|
||||
neck,
|
||||
head="SparsercnnHead",
|
||||
postprocess="SparsePostProcess"):
|
||||
super(SparseRCNN, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.neck = neck
|
||||
self.head = head
|
||||
self.postprocess = postprocess
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
backbone = create(cfg['backbone'])
|
||||
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = create(cfg['neck'], **kwargs)
|
||||
|
||||
kwargs = {'roi_input_shape': neck.out_shape}
|
||||
head = create(cfg['head'], **kwargs)
|
||||
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'neck': neck,
|
||||
"head": head,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
body_feats = self.backbone(self.inputs)
|
||||
fpn_feats = self.neck(body_feats)
|
||||
head_outs = self.head(fpn_feats, self.inputs["img_whwh"])
|
||||
|
||||
if not self.training:
|
||||
bbox_pred, bbox_num = self.postprocess(
|
||||
head_outs["pred_logits"], head_outs["pred_boxes"],
|
||||
self.inputs["scale_factor_whwh"], self.inputs["ori_shape"])
|
||||
return bbox_pred, bbox_num
|
||||
else:
|
||||
return head_outs
|
||||
|
||||
def get_loss(self):
|
||||
batch_gt_class = self.inputs["gt_class"]
|
||||
batch_gt_box = self.inputs["gt_bbox"]
|
||||
batch_whwh = self.inputs["img_whwh"]
|
||||
targets = []
|
||||
|
||||
for i in range(len(batch_gt_class)):
|
||||
boxes = batch_gt_box[i]
|
||||
labels = batch_gt_class[i].squeeze(-1)
|
||||
img_whwh = batch_whwh[i]
|
||||
img_whwh_tgt = img_whwh.unsqueeze(0).tile([int(boxes.shape[0]), 1])
|
||||
targets.append({
|
||||
"boxes": boxes,
|
||||
"labels": labels,
|
||||
"img_whwh": img_whwh,
|
||||
"img_whwh_tgt": img_whwh_tgt
|
||||
})
|
||||
|
||||
outputs = self._forward()
|
||||
loss_dict = self.head.get_loss(outputs, targets)
|
||||
acc = loss_dict["acc"]
|
||||
loss_dict.pop("acc")
|
||||
total_loss = sum(loss_dict.values())
|
||||
loss_dict.update({"loss": total_loss, "acc": acc})
|
||||
return loss_dict
|
||||
|
||||
def get_pred(self):
|
||||
bbox_pred, bbox_num = self._forward()
|
||||
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
|
||||
return output
|
||||
118
paddle_detection/ppdet/modeling/architectures/ssd.py
Normal file
118
paddle_detection/ppdet/modeling/architectures/ssd.py
Normal file
@@ -0,0 +1,118 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
import paddle
|
||||
import paddle.nn.functional as F
|
||||
|
||||
__all__ = ['SSD']
|
||||
|
||||
|
||||
@register
|
||||
class SSD(BaseArch):
|
||||
"""
|
||||
Single Shot MultiBox Detector, see https://arxiv.org/abs/1512.02325
|
||||
|
||||
Args:
|
||||
backbone (nn.Layer): backbone instance
|
||||
ssd_head (nn.Layer): `SSDHead` instance
|
||||
post_process (object): `BBoxPostProcess` instance
|
||||
"""
|
||||
|
||||
__category__ = 'architecture'
|
||||
__inject__ = ['post_process']
|
||||
|
||||
def __init__(self, backbone, ssd_head, post_process, r34_backbone=False):
|
||||
super(SSD, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.ssd_head = ssd_head
|
||||
self.post_process = post_process
|
||||
self.r34_backbone = r34_backbone
|
||||
if self.r34_backbone:
|
||||
from ppdet.modeling.backbones.resnet import ResNet
|
||||
assert isinstance(self.backbone, ResNet) and \
|
||||
self.backbone.depth == 34, \
|
||||
"If you set r34_backbone=True, please use ResNet-34 as backbone."
|
||||
self.backbone.res_layers[2].blocks[0].branch2a.conv._stride = [1, 1]
|
||||
self.backbone.res_layers[2].blocks[0].short.conv._stride = [1, 1]
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
# backbone
|
||||
backbone = create(cfg['backbone'])
|
||||
|
||||
# head
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
ssd_head = create(cfg['ssd_head'], **kwargs)
|
||||
|
||||
return {
|
||||
'backbone': backbone,
|
||||
"ssd_head": ssd_head,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
# Backbone
|
||||
body_feats = self.backbone(self.inputs)
|
||||
|
||||
# SSD Head
|
||||
if self.training:
|
||||
return self.ssd_head(body_feats, self.inputs['image'],
|
||||
self.inputs['gt_bbox'],
|
||||
self.inputs['gt_class'])
|
||||
else:
|
||||
preds, anchors = self.ssd_head(body_feats, self.inputs['image'])
|
||||
bbox, bbox_num, nms_keep_idx = self.post_process(
|
||||
preds, anchors, self.inputs['im_shape'],
|
||||
self.inputs['scale_factor'])
|
||||
|
||||
if self.use_extra_data:
|
||||
extra_data = {} # record the bbox output before nms, such like scores and nms_keep_idx
|
||||
"""extra_data:{
|
||||
'scores': predict scores,
|
||||
'nms_keep_idx': bbox index before nms,
|
||||
}
|
||||
"""
|
||||
preds_logits = preds[1] # [[1xNumBBoxNumClass]]
|
||||
extra_data['scores'] = F.softmax(paddle.concat(
|
||||
preds_logits, axis=1)).transpose([0, 2, 1])
|
||||
extra_data['logits'] = paddle.concat(
|
||||
preds_logits, axis=1).transpose([0, 2, 1])
|
||||
extra_data['nms_keep_idx'] = nms_keep_idx # bbox index before nms
|
||||
return bbox, bbox_num, extra_data
|
||||
else:
|
||||
return bbox, bbox_num
|
||||
|
||||
def get_loss(self, ):
|
||||
return {"loss": self._forward()}
|
||||
|
||||
def get_pred(self):
|
||||
if self.use_extra_data:
|
||||
bbox_pred, bbox_num, extra_data = self._forward()
|
||||
output = {
|
||||
"bbox": bbox_pred,
|
||||
"bbox_num": bbox_num,
|
||||
"extra_data": extra_data
|
||||
}
|
||||
else:
|
||||
bbox_pred, bbox_num = self._forward()
|
||||
output = {
|
||||
"bbox": bbox_pred,
|
||||
"bbox_num": bbox_num,
|
||||
}
|
||||
return output
|
||||
77
paddle_detection/ppdet/modeling/architectures/tood.py
Normal file
77
paddle_detection/ppdet/modeling/architectures/tood.py
Normal file
@@ -0,0 +1,77 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
|
||||
__all__ = ['TOOD']
|
||||
|
||||
|
||||
@register
|
||||
class TOOD(BaseArch):
|
||||
"""
|
||||
TOOD: Task-aligned One-stage Object Detection, see https://arxiv.org/abs/2108.07755
|
||||
Args:
|
||||
backbone (nn.Layer): backbone instance
|
||||
neck (nn.Layer): 'FPN' instance
|
||||
head (nn.Layer): 'TOODHead' instance
|
||||
"""
|
||||
|
||||
__category__ = 'architecture'
|
||||
|
||||
def __init__(self, backbone, neck, head):
|
||||
super(TOOD, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.neck = neck
|
||||
self.head = head
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
backbone = create(cfg['backbone'])
|
||||
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = create(cfg['neck'], **kwargs)
|
||||
|
||||
kwargs = {'input_shape': neck.out_shape}
|
||||
head = create(cfg['head'], **kwargs)
|
||||
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'neck': neck,
|
||||
"head": head,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
body_feats = self.backbone(self.inputs)
|
||||
fpn_feats = self.neck(body_feats)
|
||||
head_outs = self.head(fpn_feats)
|
||||
if not self.training:
|
||||
bboxes, bbox_num = self.head.post_process(
|
||||
head_outs, self.inputs['im_shape'], self.inputs['scale_factor'])
|
||||
return bboxes, bbox_num
|
||||
else:
|
||||
loss = self.head.get_loss(head_outs, self.inputs)
|
||||
return loss
|
||||
|
||||
def get_loss(self):
|
||||
return self._forward()
|
||||
|
||||
def get_pred(self):
|
||||
bbox_pred, bbox_num = self._forward()
|
||||
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
|
||||
return output
|
||||
98
paddle_detection/ppdet/modeling/architectures/ttfnet.py
Normal file
98
paddle_detection/ppdet/modeling/architectures/ttfnet.py
Normal file
@@ -0,0 +1,98 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
|
||||
__all__ = ['TTFNet']
|
||||
|
||||
|
||||
@register
|
||||
class TTFNet(BaseArch):
|
||||
"""
|
||||
TTFNet network, see https://arxiv.org/abs/1909.00700
|
||||
|
||||
Args:
|
||||
backbone (object): backbone instance
|
||||
neck (object): 'TTFFPN' instance
|
||||
ttf_head (object): 'TTFHead' instance
|
||||
post_process (object): 'BBoxPostProcess' instance
|
||||
"""
|
||||
|
||||
__category__ = 'architecture'
|
||||
__inject__ = ['post_process']
|
||||
|
||||
def __init__(self,
|
||||
backbone='DarkNet',
|
||||
neck='TTFFPN',
|
||||
ttf_head='TTFHead',
|
||||
post_process='BBoxPostProcess'):
|
||||
super(TTFNet, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.neck = neck
|
||||
self.ttf_head = ttf_head
|
||||
self.post_process = post_process
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
backbone = create(cfg['backbone'])
|
||||
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = create(cfg['neck'], **kwargs)
|
||||
|
||||
kwargs = {'input_shape': neck.out_shape}
|
||||
ttf_head = create(cfg['ttf_head'], **kwargs)
|
||||
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'neck': neck,
|
||||
"ttf_head": ttf_head,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
body_feats = self.backbone(self.inputs)
|
||||
body_feats = self.neck(body_feats)
|
||||
hm, wh = self.ttf_head(body_feats)
|
||||
if self.training:
|
||||
return hm, wh
|
||||
else:
|
||||
bbox, bbox_num = self.post_process(hm, wh, self.inputs['im_shape'],
|
||||
self.inputs['scale_factor'])
|
||||
return bbox, bbox_num
|
||||
|
||||
def get_loss(self, ):
|
||||
loss = {}
|
||||
heatmap = self.inputs['ttf_heatmap']
|
||||
box_target = self.inputs['ttf_box_target']
|
||||
reg_weight = self.inputs['ttf_reg_weight']
|
||||
hm, wh = self._forward()
|
||||
head_loss = self.ttf_head.get_loss(hm, wh, heatmap, box_target,
|
||||
reg_weight)
|
||||
loss.update(head_loss)
|
||||
total_loss = paddle.add_n(list(loss.values()))
|
||||
loss.update({'loss': total_loss})
|
||||
return loss
|
||||
|
||||
def get_pred(self):
|
||||
bbox_pred, bbox_num = self._forward()
|
||||
output = {
|
||||
"bbox": bbox_pred,
|
||||
"bbox_num": bbox_num,
|
||||
}
|
||||
return output
|
||||
150
paddle_detection/ppdet/modeling/architectures/yolo.py
Normal file
150
paddle_detection/ppdet/modeling/architectures/yolo.py
Normal file
@@ -0,0 +1,150 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
from ..post_process import JDEBBoxPostProcess
|
||||
|
||||
__all__ = ['YOLOv3']
|
||||
# YOLOv3,PP-YOLO,PP-YOLOv2,PP-YOLOE,PP-YOLOE+ use the same architecture as YOLOv3
|
||||
# PP-YOLOE and PP-YOLOE+ are recommended to use PPYOLOE architecture in ppyoloe.py, especially when use distillation or aux head
|
||||
|
||||
|
||||
@register
|
||||
class YOLOv3(BaseArch):
|
||||
__category__ = 'architecture'
|
||||
__shared__ = ['data_format']
|
||||
__inject__ = ['post_process']
|
||||
|
||||
def __init__(self,
|
||||
backbone='DarkNet',
|
||||
neck='YOLOv3FPN',
|
||||
yolo_head='YOLOv3Head',
|
||||
post_process='BBoxPostProcess',
|
||||
data_format='NCHW',
|
||||
for_mot=False):
|
||||
"""
|
||||
YOLOv3 network, see https://arxiv.org/abs/1804.02767
|
||||
|
||||
Args:
|
||||
backbone (nn.Layer): backbone instance
|
||||
neck (nn.Layer): neck instance
|
||||
yolo_head (nn.Layer): anchor_head instance
|
||||
bbox_post_process (object): `BBoxPostProcess` instance
|
||||
data_format (str): data format, NCHW or NHWC
|
||||
for_mot (bool): whether return other features for multi-object tracking
|
||||
models, default False in pure object detection models.
|
||||
"""
|
||||
super(YOLOv3, self).__init__(data_format=data_format)
|
||||
self.backbone = backbone
|
||||
self.neck = neck
|
||||
self.yolo_head = yolo_head
|
||||
self.post_process = post_process
|
||||
self.for_mot = for_mot
|
||||
self.return_idx = isinstance(post_process, JDEBBoxPostProcess)
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
# backbone
|
||||
backbone = create(cfg['backbone'])
|
||||
|
||||
# fpn
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = create(cfg['neck'], **kwargs)
|
||||
|
||||
# head
|
||||
kwargs = {'input_shape': neck.out_shape}
|
||||
yolo_head = create(cfg['yolo_head'], **kwargs)
|
||||
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'neck': neck,
|
||||
"yolo_head": yolo_head,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
body_feats = self.backbone(self.inputs)
|
||||
if self.for_mot:
|
||||
neck_feats = self.neck(body_feats, self.for_mot)
|
||||
else:
|
||||
neck_feats = self.neck(body_feats)
|
||||
|
||||
if isinstance(neck_feats, dict):
|
||||
assert self.for_mot == True
|
||||
emb_feats = neck_feats['emb_feats']
|
||||
neck_feats = neck_feats['yolo_feats']
|
||||
|
||||
if self.training:
|
||||
yolo_losses = self.yolo_head(neck_feats, self.inputs)
|
||||
|
||||
if self.for_mot:
|
||||
return {'det_losses': yolo_losses, 'emb_feats': emb_feats}
|
||||
else:
|
||||
return yolo_losses
|
||||
|
||||
else:
|
||||
yolo_head_outs = self.yolo_head(neck_feats)
|
||||
|
||||
if self.for_mot:
|
||||
# the detection part of JDE MOT model
|
||||
boxes_idx, bbox, bbox_num, nms_keep_idx = self.post_process(
|
||||
yolo_head_outs, self.yolo_head.mask_anchors)
|
||||
output = {
|
||||
'bbox': bbox,
|
||||
'bbox_num': bbox_num,
|
||||
'boxes_idx': boxes_idx,
|
||||
'nms_keep_idx': nms_keep_idx,
|
||||
'emb_feats': emb_feats,
|
||||
}
|
||||
else:
|
||||
if self.return_idx:
|
||||
# the detection part of JDE MOT model
|
||||
_, bbox, bbox_num, nms_keep_idx = self.post_process(
|
||||
yolo_head_outs, self.yolo_head.mask_anchors)
|
||||
elif self.post_process is not None:
|
||||
# anchor based YOLOs: YOLOv3,PP-YOLO,PP-YOLOv2 use mask_anchors
|
||||
bbox, bbox_num, nms_keep_idx = self.post_process(
|
||||
yolo_head_outs, self.yolo_head.mask_anchors,
|
||||
self.inputs['im_shape'], self.inputs['scale_factor'])
|
||||
else:
|
||||
# anchor free YOLOs: PP-YOLOE, PP-YOLOE+
|
||||
bbox, bbox_num, nms_keep_idx = self.yolo_head.post_process(
|
||||
yolo_head_outs, self.inputs['scale_factor'])
|
||||
|
||||
if self.use_extra_data:
|
||||
extra_data = {} # record the bbox output before nms, such like scores and nms_keep_idx
|
||||
"""extra_data:{
|
||||
'scores': predict scores,
|
||||
'nms_keep_idx': bbox index before nms,
|
||||
}
|
||||
"""
|
||||
extra_data['scores'] = yolo_head_outs[0] # predict scores (probability)
|
||||
# Todo: get logits output
|
||||
extra_data['nms_keep_idx'] = nms_keep_idx
|
||||
# Todo support for mask_anchors yolo
|
||||
output = {'bbox': bbox, 'bbox_num': bbox_num, 'extra_data': extra_data}
|
||||
else:
|
||||
output = {'bbox': bbox, 'bbox_num': bbox_num}
|
||||
|
||||
return output
|
||||
|
||||
def get_loss(self):
|
||||
return self._forward()
|
||||
|
||||
def get_pred(self):
|
||||
return self._forward()
|
||||
88
paddle_detection/ppdet/modeling/architectures/yolof.py
Normal file
88
paddle_detection/ppdet/modeling/architectures/yolof.py
Normal file
@@ -0,0 +1,88 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
|
||||
__all__ = ['YOLOF']
|
||||
|
||||
|
||||
@register
|
||||
class YOLOF(BaseArch):
|
||||
__category__ = 'architecture'
|
||||
|
||||
def __init__(self,
|
||||
backbone='ResNet',
|
||||
neck='DilatedEncoder',
|
||||
head='YOLOFHead',
|
||||
for_mot=False):
|
||||
"""
|
||||
YOLOF network, see https://arxiv.org/abs/2103.09460
|
||||
|
||||
Args:
|
||||
backbone (nn.Layer): backbone instance
|
||||
neck (nn.Layer): DilatedEncoder instance
|
||||
head (nn.Layer): YOLOFHead instance
|
||||
for_mot (bool): whether return other features for multi-object tracking
|
||||
models, default False in pure object detection models.
|
||||
"""
|
||||
super(YOLOF, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.neck = neck
|
||||
self.head = head
|
||||
self.for_mot = for_mot
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
# backbone
|
||||
backbone = create(cfg['backbone'])
|
||||
|
||||
# fpn
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = create(cfg['neck'], **kwargs)
|
||||
|
||||
# head
|
||||
kwargs = {'input_shape': neck.out_shape}
|
||||
head = create(cfg['head'], **kwargs)
|
||||
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'neck': neck,
|
||||
"head": head,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
body_feats = self.backbone(self.inputs)
|
||||
neck_feats = self.neck(body_feats, self.for_mot)
|
||||
|
||||
if self.training:
|
||||
yolo_losses = self.head(neck_feats, self.inputs)
|
||||
return yolo_losses
|
||||
else:
|
||||
yolo_head_outs = self.head(neck_feats)
|
||||
bbox, bbox_num = self.head.post_process(yolo_head_outs,
|
||||
self.inputs['im_shape'],
|
||||
self.inputs['scale_factor'])
|
||||
output = {'bbox': bbox, 'bbox_num': bbox_num}
|
||||
return output
|
||||
|
||||
def get_loss(self):
|
||||
return self._forward()
|
||||
|
||||
def get_pred(self):
|
||||
return self._forward()
|
||||
138
paddle_detection/ppdet/modeling/architectures/yolox.py
Normal file
138
paddle_detection/ppdet/modeling/architectures/yolox.py
Normal file
@@ -0,0 +1,138 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from ppdet.core.workspace import register, create
|
||||
from .meta_arch import BaseArch
|
||||
|
||||
import random
|
||||
import paddle
|
||||
import paddle.nn.functional as F
|
||||
import paddle.distributed as dist
|
||||
|
||||
__all__ = ['YOLOX']
|
||||
|
||||
|
||||
@register
|
||||
class YOLOX(BaseArch):
|
||||
"""
|
||||
YOLOX network, see https://arxiv.org/abs/2107.08430
|
||||
|
||||
Args:
|
||||
backbone (nn.Layer): backbone instance
|
||||
neck (nn.Layer): neck instance
|
||||
head (nn.Layer): head instance
|
||||
for_mot (bool): whether used for MOT or not
|
||||
input_size (list[int]): initial scale, will be reset by self._preprocess()
|
||||
size_stride (int): stride of the size range
|
||||
size_range (list[int]): multi-scale range for training
|
||||
random_interval (int): interval of iter to change self._input_size
|
||||
"""
|
||||
__category__ = 'architecture'
|
||||
|
||||
def __init__(self,
|
||||
backbone='CSPDarkNet',
|
||||
neck='YOLOCSPPAN',
|
||||
head='YOLOXHead',
|
||||
for_mot=False,
|
||||
input_size=[640, 640],
|
||||
size_stride=32,
|
||||
size_range=[15, 25],
|
||||
random_interval=10):
|
||||
super(YOLOX, self).__init__()
|
||||
self.backbone = backbone
|
||||
self.neck = neck
|
||||
self.head = head
|
||||
self.for_mot = for_mot
|
||||
|
||||
self.input_size = input_size
|
||||
self._input_size = paddle.to_tensor(input_size)
|
||||
self.size_stride = size_stride
|
||||
self.size_range = size_range
|
||||
self.random_interval = random_interval
|
||||
self._step = 0
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, *args, **kwargs):
|
||||
# backbone
|
||||
backbone = create(cfg['backbone'])
|
||||
|
||||
# fpn
|
||||
kwargs = {'input_shape': backbone.out_shape}
|
||||
neck = create(cfg['neck'], **kwargs)
|
||||
|
||||
# head
|
||||
kwargs = {'input_shape': neck.out_shape}
|
||||
head = create(cfg['head'], **kwargs)
|
||||
|
||||
return {
|
||||
'backbone': backbone,
|
||||
'neck': neck,
|
||||
"head": head,
|
||||
}
|
||||
|
||||
def _forward(self):
|
||||
if self.training:
|
||||
self._preprocess()
|
||||
body_feats = self.backbone(self.inputs)
|
||||
neck_feats = self.neck(body_feats, self.for_mot)
|
||||
|
||||
if self.training:
|
||||
yolox_losses = self.head(neck_feats, self.inputs)
|
||||
yolox_losses.update({'size': self._input_size[0]})
|
||||
return yolox_losses
|
||||
else:
|
||||
head_outs = self.head(neck_feats)
|
||||
bbox, bbox_num = self.head.post_process(
|
||||
head_outs, self.inputs['im_shape'], self.inputs['scale_factor'])
|
||||
return {'bbox': bbox, 'bbox_num': bbox_num}
|
||||
|
||||
def get_loss(self):
|
||||
return self._forward()
|
||||
|
||||
def get_pred(self):
|
||||
return self._forward()
|
||||
|
||||
def _preprocess(self):
|
||||
# YOLOX multi-scale training, interpolate resize before inputs of the network.
|
||||
self._get_size()
|
||||
scale_y = self._input_size[0] / self.input_size[0]
|
||||
scale_x = self._input_size[1] / self.input_size[1]
|
||||
if scale_x != 1 or scale_y != 1:
|
||||
self.inputs['image'] = F.interpolate(
|
||||
self.inputs['image'],
|
||||
size=self._input_size,
|
||||
mode='bilinear',
|
||||
align_corners=False)
|
||||
gt_bboxes = self.inputs['gt_bbox']
|
||||
for i in range(len(gt_bboxes)):
|
||||
if len(gt_bboxes[i]) > 0:
|
||||
gt_bboxes[i][:, 0::2] = gt_bboxes[i][:, 0::2] * scale_x
|
||||
gt_bboxes[i][:, 1::2] = gt_bboxes[i][:, 1::2] * scale_y
|
||||
self.inputs['gt_bbox'] = gt_bboxes
|
||||
|
||||
def _get_size(self):
|
||||
# random_interval = 10 as default, every 10 iters to change self._input_size
|
||||
image_ratio = self.input_size[1] * 1.0 / self.input_size[0]
|
||||
if self._step % self.random_interval == 0:
|
||||
size_factor = random.randint(*self.size_range)
|
||||
size = [
|
||||
self.size_stride * size_factor,
|
||||
self.size_stride * int(size_factor * image_ratio)
|
||||
]
|
||||
self._input_size = paddle.to_tensor(size)
|
||||
self._step += 1
|
||||
Reference in New Issue
Block a user