更换文档检测模型

2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions
--- a/paddle_detection/ppdet/modeling/reid/init.py
+++ b/paddle_detection/ppdet/modeling/reid/init.py
@@ -0,0 +1,27 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from . import jde_embedding_head
+from . import fairmot_embedding_head
+from . import resnet
+from . import pyramidal_embedding
+from . import pplcnet_embedding
+from . import resnet_embedding
+
+from .fairmot_embedding_head import *
+from .jde_embedding_head import *
+from .resnet import *
+from .pyramidal_embedding import *
+from .pplcnet_embedding import *
+from .resnet_embedding import *
--- a/paddle_detection/ppdet/modeling/reid/fairmot_embedding_head.py
+++ b/paddle_detection/ppdet/modeling/reid/fairmot_embedding_head.py
@@ -0,0 +1,224 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import numpy as np
+import math
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn.initializer import KaimingUniform, Uniform
+from ppdet.core.workspace import register
+from ppdet.modeling.heads.centernet_head import ConvLayer
+
+__all__ = ['FairMOTEmbeddingHead']
+
+
+@register
+class FairMOTEmbeddingHead(nn.Layer):
+    __shared__ = ['num_classes']
+    """
+    Args:
+        in_channels (int): the channel number of input to FairMOTEmbeddingHead.
+        ch_head (int): the channel of features before fed into embedding, 256 by default.
+        ch_emb (int): the channel of the embedding feature, 128 by default.
+        num_identities_dict (dict): the number of identities of each category,
+            support single class and multi-calss, {0: 14455} as default. 
+    """
+
+    def __init__(self,
+                 in_channels,
+                 ch_head=256,
+                 ch_emb=128,
+                 num_classes=1,
+                 num_identities_dict={0: 14455}):
+        super(FairMOTEmbeddingHead, self).__init__()
+        assert num_classes >= 1
+        self.num_classes = num_classes
+        self.ch_emb = ch_emb
+        self.num_identities_dict = num_identities_dict
+        self.reid = nn.Sequential(
+            ConvLayer(
+                in_channels, ch_head, kernel_size=3, padding=1, bias=True),
+            nn.ReLU(),
+            ConvLayer(
+                ch_head, ch_emb, kernel_size=1, stride=1, padding=0, bias=True))
+        param_attr = paddle.ParamAttr(initializer=KaimingUniform())
+        bound = 1 / math.sqrt(ch_emb)
+        bias_attr = paddle.ParamAttr(initializer=Uniform(-bound, bound))
+        self.reid_loss = nn.CrossEntropyLoss(ignore_index=-1, reduction='sum')
+
+        if num_classes == 1:
+            nID = self.num_identities_dict[0]  # single class
+            self.classifier = nn.Linear(
+                ch_emb, nID, weight_attr=param_attr, bias_attr=bias_attr)
+            # When num_identities(nID) is 1, emb_scale is set as 1
+            self.emb_scale = math.sqrt(2) * math.log(nID - 1) if nID > 1 else 1
+        else:
+            self.classifiers = dict()
+            self.emb_scale_dict = dict()
+            for cls_id, nID in self.num_identities_dict.items():
+                self.classifiers[str(cls_id)] = nn.Linear(
+                    ch_emb, nID, weight_attr=param_attr, bias_attr=bias_attr)
+                # When num_identities(nID) is 1, emb_scale is set as 1
+                self.emb_scale_dict[str(cls_id)] = math.sqrt(2) * math.log(
+                    nID - 1) if nID > 1 else 1
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        if isinstance(input_shape, (list, tuple)):
+            input_shape = input_shape[0]
+        return {'in_channels': input_shape.channels}
+
+    def process_by_class(self, bboxes, embedding, bbox_inds, topk_clses):
+        pred_dets, pred_embs = [], []
+        for cls_id in range(self.num_classes):
+            inds_masks = topk_clses == cls_id
+            inds_masks = paddle.cast(inds_masks, 'float32')
+
+            pos_num = inds_masks.sum().numpy()
+            if pos_num == 0:
+                continue
+
+            cls_inds_mask = inds_masks > 0
+
+            bbox_mask = paddle.nonzero(cls_inds_mask)
+            cls_bboxes = paddle.gather_nd(bboxes, bbox_mask)
+            pred_dets.append(cls_bboxes)
+
+            cls_inds = paddle.masked_select(bbox_inds, cls_inds_mask)
+            cls_inds = cls_inds.unsqueeze(-1)
+            cls_embedding = paddle.gather_nd(embedding, cls_inds)
+            pred_embs.append(cls_embedding)
+
+        return paddle.concat(pred_dets), paddle.concat(pred_embs)
+
+    def forward(self,
+                neck_feat,
+                inputs,
+                bboxes=None,
+                bbox_inds=None,
+                topk_clses=None):
+        reid_feat = self.reid(neck_feat)
+        if self.training:
+            if self.num_classes == 1:
+                loss = self.get_loss(reid_feat, inputs)
+            else:
+                loss = self.get_mc_loss(reid_feat, inputs)
+            return loss
+        else:
+            assert bboxes is not None and bbox_inds is not None
+            reid_feat = F.normalize(reid_feat)
+            embedding = paddle.transpose(reid_feat, [0, 2, 3, 1])
+            embedding = paddle.reshape(embedding, [-1, self.ch_emb])
+            # embedding shape: [bs * h * w, ch_emb]
+
+            if self.num_classes == 1:
+                pred_dets = bboxes
+                pred_embs = paddle.gather(embedding, bbox_inds)
+            else:
+                pred_dets, pred_embs = self.process_by_class(
+                    bboxes, embedding, bbox_inds, topk_clses)
+            return pred_dets, pred_embs
+
+    def get_loss(self, feat, inputs):
+        index = inputs['index']
+        mask = inputs['index_mask']
+        target = inputs['reid']
+        target = paddle.masked_select(target, mask > 0)
+        target = paddle.unsqueeze(target, 1)
+
+        feat = paddle.transpose(feat, perm=[0, 2, 3, 1])
+        feat_n, feat_h, feat_w, feat_c = feat.shape
+        feat = paddle.reshape(feat, shape=[feat_n, -1, feat_c])
+        index = paddle.unsqueeze(index, 2)
+        batch_inds = list()
+        for i in range(feat_n):
+            batch_ind = paddle.full(
+                shape=[1, index.shape[1], 1], fill_value=i, dtype='int64')
+            batch_inds.append(batch_ind)
+        batch_inds = paddle.concat(batch_inds, axis=0)
+        index = paddle.concat(x=[batch_inds, index], axis=2)
+        feat = paddle.gather_nd(feat, index=index)
+
+        mask = paddle.unsqueeze(mask, axis=2)
+        mask = paddle.expand_as(mask, feat)
+        mask.stop_gradient = True
+        feat = paddle.masked_select(feat, mask > 0)
+        feat = paddle.reshape(feat, shape=[-1, feat_c])
+        feat = F.normalize(feat)
+        feat = self.emb_scale * feat
+        logit = self.classifier(feat)
+        target.stop_gradient = True
+        loss = self.reid_loss(logit, target)
+        valid = (target != self.reid_loss.ignore_index)
+        valid.stop_gradient = True
+        count = paddle.sum((paddle.cast(valid, dtype=np.int32)))
+        count.stop_gradient = True
+        if count > 0:
+            loss = loss / count
+
+        return loss
+
+    def get_mc_loss(self, feat, inputs):
+        # feat.shape = [bs, ch_emb, h, w]
+        assert 'cls_id_map' in inputs and 'cls_tr_ids' in inputs
+        index = inputs['index']
+        mask = inputs['index_mask']
+        cls_id_map = inputs['cls_id_map']  # [bs, h, w]
+        cls_tr_ids = inputs['cls_tr_ids']  # [bs, num_classes, h, w]
+
+        feat = paddle.transpose(feat, perm=[0, 2, 3, 1])
+        feat_n, feat_h, feat_w, feat_c = feat.shape
+        feat = paddle.reshape(feat, shape=[feat_n, -1, feat_c])
+
+        index = paddle.unsqueeze(index, 2)
+        batch_inds = list()
+        for i in range(feat_n):
+            batch_ind = paddle.full(
+                shape=[1, index.shape[1], 1], fill_value=i, dtype='int64')
+            batch_inds.append(batch_ind)
+        batch_inds = paddle.concat(batch_inds, axis=0)
+        index = paddle.concat(x=[batch_inds, index], axis=2)
+        feat = paddle.gather_nd(feat, index=index)
+
+        mask = paddle.unsqueeze(mask, axis=2)
+        mask = paddle.expand_as(mask, feat)
+        mask.stop_gradient = True
+        feat = paddle.masked_select(feat, mask > 0)
+        feat = paddle.reshape(feat, shape=[-1, feat_c])
+
+        reid_losses = 0
+        for cls_id, id_num in self.num_identities_dict.items():
+            # target
+            cur_cls_tr_ids = paddle.reshape(
+                cls_tr_ids[:, cls_id, :, :], shape=[feat_n, -1])  # [bs, h*w]
+            cls_id_target = paddle.gather_nd(cur_cls_tr_ids, index=index)
+            mask = inputs['index_mask']
+            cls_id_target = paddle.masked_select(cls_id_target, mask > 0)
+            cls_id_target.stop_gradient = True
+
+            # feat
+            cls_id_feat = self.emb_scale_dict[str(cls_id)] * F.normalize(feat)
+            cls_id_pred = self.classifiers[str(cls_id)](cls_id_feat)
+
+            loss = self.reid_loss(cls_id_pred, cls_id_target)
+            valid = (cls_id_target != self.reid_loss.ignore_index)
+            valid.stop_gradient = True
+            count = paddle.sum((paddle.cast(valid, dtype=np.int32)))
+            count.stop_gradient = True
+            if count > 0:
+                loss = loss / count
+            reid_losses += loss
+
+        return reid_losses
--- a/paddle_detection/ppdet/modeling/reid/jde_embedding_head.py
+++ b/paddle_detection/ppdet/modeling/reid/jde_embedding_head.py
@@ -0,0 +1,211 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import numpy as np
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.regularizer import L2Decay
+from ppdet.core.workspace import register
+from paddle.nn.initializer import Normal, Constant
+
+__all__ = ['JDEEmbeddingHead']
+
+
+class LossParam(nn.Layer):
+    def __init__(self, init_value=0., use_uncertainy=True):
+        super(LossParam, self).__init__()
+        self.loss_param = self.create_parameter(
+            shape=[1],
+            attr=ParamAttr(initializer=Constant(value=init_value)),
+            dtype="float32")
+
+    def forward(self, inputs):
+        out = paddle.exp(-self.loss_param) * inputs + self.loss_param
+        return out * 0.5
+
+
+@register
+class JDEEmbeddingHead(nn.Layer):
+    __shared__ = ['num_classes']
+    __inject__ = ['emb_loss', 'jde_loss']
+    """
+    JDEEmbeddingHead
+    Args:
+        num_classes(int): Number of classes. Only support one class tracking.
+        num_identities(int): Number of identities.
+        anchor_levels(int): Number of anchor levels, same as FPN levels.
+        anchor_scales(int): Number of anchor scales on each FPN level.
+        embedding_dim(int): Embedding dimension. Default: 512.
+        emb_loss(object): Instance of 'JDEEmbeddingLoss'
+        jde_loss(object): Instance of 'JDELoss'
+    """
+
+    def __init__(
+            self,
+            num_classes=1,
+            num_identities=14455,  # dataset.num_identities_dict[0]
+            anchor_levels=3,
+            anchor_scales=4,
+            embedding_dim=512,
+            emb_loss='JDEEmbeddingLoss',
+            jde_loss='JDELoss'):
+        super(JDEEmbeddingHead, self).__init__()
+        self.num_classes = num_classes
+        self.num_identities = num_identities
+        self.anchor_levels = anchor_levels
+        self.anchor_scales = anchor_scales
+        self.embedding_dim = embedding_dim
+        self.emb_loss = emb_loss
+        self.jde_loss = jde_loss
+
+        self.emb_scale = math.sqrt(2) * math.log(
+            self.num_identities - 1) if self.num_identities > 1 else 1
+
+        self.identify_outputs = []
+        self.loss_params_cls = []
+        self.loss_params_reg = []
+        self.loss_params_ide = []
+        for i in range(self.anchor_levels):
+            name = 'identify_output.{}'.format(i)
+            identify_output = self.add_sublayer(
+                name,
+                nn.Conv2D(
+                    in_channels=64 * (2**self.anchor_levels) // (2**i),
+                    out_channels=self.embedding_dim,
+                    kernel_size=3,
+                    stride=1,
+                    padding=1,
+                    bias_attr=ParamAttr(regularizer=L2Decay(0.))))
+            self.identify_outputs.append(identify_output)
+
+            loss_p_cls = self.add_sublayer('cls.{}'.format(i), LossParam(-4.15))
+            self.loss_params_cls.append(loss_p_cls)
+            loss_p_reg = self.add_sublayer('reg.{}'.format(i), LossParam(-4.85))
+            self.loss_params_reg.append(loss_p_reg)
+            loss_p_ide = self.add_sublayer('ide.{}'.format(i), LossParam(-2.3))
+            self.loss_params_ide.append(loss_p_ide)
+
+        self.classifier = self.add_sublayer(
+            'classifier',
+            nn.Linear(
+                self.embedding_dim,
+                self.num_identities,
+                weight_attr=ParamAttr(
+                    learning_rate=1., initializer=Normal(
+                        mean=0.0, std=0.01)),
+                bias_attr=ParamAttr(
+                    learning_rate=2., regularizer=L2Decay(0.))))
+
+    def forward(self,
+                identify_feats,
+                targets,
+                loss_confs=None,
+                loss_boxes=None,
+                bboxes=None,
+                boxes_idx=None,
+                nms_keep_idx=None):
+        assert self.num_classes == 1, 'JDE only support sindle class MOT.'
+        assert len(identify_feats) == self.anchor_levels
+        ide_outs = []
+        for feat, ide_head in zip(identify_feats, self.identify_outputs):
+            ide_outs.append(ide_head(feat))
+
+        if self.training:
+            assert len(loss_confs) == len(loss_boxes) == self.anchor_levels
+            loss_ides = self.emb_loss(ide_outs, targets, self.emb_scale,
+                                      self.classifier)
+            jde_losses = self.jde_loss(
+                loss_confs, loss_boxes, loss_ides, self.loss_params_cls,
+                self.loss_params_reg, self.loss_params_ide, targets)
+            return jde_losses
+        else:
+            assert bboxes is not None
+            assert boxes_idx is not None
+            assert nms_keep_idx is not None
+
+            emb_outs = self.get_emb_outs(ide_outs)
+            emb_valid = paddle.gather_nd(emb_outs, boxes_idx)
+            pred_embs = paddle.gather_nd(emb_valid, nms_keep_idx)
+
+            input_shape = targets['image'].shape[2:]
+            # input_shape: [h, w], before data transforms, set in model config
+            im_shape = targets['im_shape'][0].numpy()
+            # im_shape: [new_h, new_w], after data transforms
+            scale_factor = targets['scale_factor'][0].numpy()
+            bboxes[:, 2:] = self.scale_coords(bboxes[:, 2:], input_shape,
+                                              im_shape, scale_factor)
+            # cls_ids, scores, tlwhs 
+            pred_dets = bboxes
+            return pred_dets, pred_embs
+
+    def scale_coords(self, coords, input_shape, im_shape, scale_factor):
+        ratio = scale_factor[0]
+        pad_w = (input_shape[1] - int(im_shape[1])) / 2
+        pad_h = (input_shape[0] - int(im_shape[0])) / 2
+        coords = paddle.cast(coords, 'float32')
+        coords[:, 0::2] -= pad_w
+        coords[:, 1::2] -= pad_h
+        coords[:, 0:4] /= ratio
+        coords[:, :4] = paddle.clip(
+            coords[:, :4], min=0, max=coords[:, :4].max())
+        return coords.round()
+
+    def get_emb_and_gt_outs(self, ide_outs, targets):
+        emb_and_gts = []
+        for i, p_ide in enumerate(ide_outs):
+            t_conf = targets['tconf{}'.format(i)]
+            t_ide = targets['tide{}'.format(i)]
+
+            p_ide = p_ide.transpose((0, 2, 3, 1))
+            p_ide_flatten = paddle.reshape(p_ide, [-1, self.embedding_dim])
+
+            mask = t_conf > 0
+            mask = paddle.cast(mask, dtype="int64")
+            emb_mask = mask.max(1).flatten()
+            emb_mask_inds = paddle.nonzero(emb_mask > 0).flatten()
+            if len(emb_mask_inds) > 0:
+                t_ide_flatten = paddle.reshape(t_ide.max(1), [-1, 1])
+                tids = paddle.gather(t_ide_flatten, emb_mask_inds)
+
+                embedding = paddle.gather(p_ide_flatten, emb_mask_inds)
+                embedding = self.emb_scale * F.normalize(embedding)
+                emb_and_gt = paddle.concat([embedding, tids], axis=1)
+                emb_and_gts.append(emb_and_gt)
+
+        if len(emb_and_gts) > 0:
+            return paddle.concat(emb_and_gts, axis=0)
+        else:
+            return paddle.zeros((1, self.embedding_dim + 1))
+
+    def get_emb_outs(self, ide_outs):
+        emb_outs = []
+        for i, p_ide in enumerate(ide_outs):
+            p_ide = p_ide.transpose((0, 2, 3, 1))
+
+            p_ide_repeat = paddle.tile(p_ide, [self.anchor_scales, 1, 1, 1])
+            embedding = F.normalize(p_ide_repeat, axis=-1)
+            emb = paddle.reshape(embedding, [-1, self.embedding_dim])
+            emb_outs.append(emb)
+
+        if len(emb_outs) > 0:
+            return paddle.concat(emb_outs, axis=0)
+        else:
+            return paddle.zeros((1, self.embedding_dim))
--- a/paddle_detection/ppdet/modeling/reid/pplcnet_embedding.py
+++ b/paddle_detection/ppdet/modeling/reid/pplcnet_embedding.py
@@ -0,0 +1,281 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn.initializer import Normal, Constant
+from paddle import ParamAttr
+from paddle.nn import AdaptiveAvgPool2D, BatchNorm2D, Conv2D, Linear
+from paddle.regularizer import L2Decay
+from paddle.nn.initializer import KaimingNormal, XavierNormal
+from ppdet.core.workspace import register
+
+__all__ = ['PPLCNetEmbedding']
+
+
+# Each element(list) represents a depthwise block, which is composed of k, in_c, out_c, s, use_se.
+# k: kernel_size
+# in_c: input channel number in depthwise block
+# out_c: output channel number in depthwise block
+# s: stride in depthwise block
+# use_se: whether to use SE block
+
+NET_CONFIG = {
+    "blocks2":
+    #k, in_c, out_c, s, use_se
+    [[3, 16, 32, 1, False]],
+    "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
+    "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
+    "blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False],
+                [5, 256, 256, 1, False], [5, 256, 256, 1, False],
+                [5, 256, 256, 1, False], [5, 256, 256, 1, False]],
+    "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]]
+}
+
+
+def make_divisible(v, divisor=8, min_value=None):
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 filter_size,
+                 num_filters,
+                 stride,
+                 num_groups=1):
+        super().__init__()
+
+        self.conv = Conv2D(
+            in_channels=num_channels,
+            out_channels=num_filters,
+            kernel_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2,
+            groups=num_groups,
+            weight_attr=ParamAttr(initializer=KaimingNormal()),
+            bias_attr=False)
+
+        self.bn = BatchNorm2D(
+            num_filters,
+            weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+            bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+        self.hardswish = nn.Hardswish()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.hardswish(x)
+        return x
+
+
+class DepthwiseSeparable(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 stride,
+                 dw_size=3,
+                 use_se=False):
+        super().__init__()
+        self.use_se = use_se
+        self.dw_conv = ConvBNLayer(
+            num_channels=num_channels,
+            num_filters=num_channels,
+            filter_size=dw_size,
+            stride=stride,
+            num_groups=num_channels)
+        if use_se:
+            self.se = SEModule(num_channels)
+        self.pw_conv = ConvBNLayer(
+            num_channels=num_channels,
+            filter_size=1,
+            num_filters=num_filters,
+            stride=1)
+
+    def forward(self, x):
+        x = self.dw_conv(x)
+        if self.use_se:
+            x = self.se(x)
+        x = self.pw_conv(x)
+        return x
+
+
+class SEModule(nn.Layer):
+    def __init__(self, channel, reduction=4):
+        super().__init__()
+        self.avg_pool = AdaptiveAvgPool2D(1)
+        self.conv1 = Conv2D(
+            in_channels=channel,
+            out_channels=channel // reduction,
+            kernel_size=1,
+            stride=1,
+            padding=0)
+        self.relu = nn.ReLU()
+        self.conv2 = Conv2D(
+            in_channels=channel // reduction,
+            out_channels=channel,
+            kernel_size=1,
+            stride=1,
+            padding=0)
+        self.hardsigmoid = nn.Hardsigmoid()
+
+    def forward(self, x):
+        identity = x
+        x = self.avg_pool(x)
+        x = self.conv1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.hardsigmoid(x)
+        x = paddle.multiply(x=identity, y=x)
+        return x
+
+
+class PPLCNet(nn.Layer):
+    """
+    PP-LCNet, see https://arxiv.org/abs/2109.15099.
+    This code is different from PPLCNet in ppdet/modeling/backbones/lcnet.py
+    or in PaddleClas, because the output is the flatten feature of last_conv.
+
+    Args:
+        scale (float): Scale ratio of channels.
+        class_expand (int): Number of channels of conv feature.
+    """
+
+    def __init__(self, scale=1.0, class_expand=1280):
+        super(PPLCNet, self).__init__()
+        self.scale = scale
+        self.class_expand = class_expand
+
+        self.conv1 = ConvBNLayer(
+            num_channels=3,
+            filter_size=3,
+            num_filters=make_divisible(16 * scale),
+            stride=2)
+
+        self.blocks2 = nn.Sequential(*[
+            DepthwiseSeparable(
+                num_channels=make_divisible(in_c * scale),
+                num_filters=make_divisible(out_c * scale),
+                dw_size=k,
+                stride=s,
+                use_se=se)
+            for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"])
+        ])
+
+        self.blocks3 = nn.Sequential(*[
+            DepthwiseSeparable(
+                num_channels=make_divisible(in_c * scale),
+                num_filters=make_divisible(out_c * scale),
+                dw_size=k,
+                stride=s,
+                use_se=se)
+            for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"])
+        ])
+
+        self.blocks4 = nn.Sequential(*[
+            DepthwiseSeparable(
+                num_channels=make_divisible(in_c * scale),
+                num_filters=make_divisible(out_c * scale),
+                dw_size=k,
+                stride=s,
+                use_se=se)
+            for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"])
+        ])
+
+        self.blocks5 = nn.Sequential(*[
+            DepthwiseSeparable(
+                num_channels=make_divisible(in_c * scale),
+                num_filters=make_divisible(out_c * scale),
+                dw_size=k,
+                stride=s,
+                use_se=se)
+            for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"])
+        ])
+
+        self.blocks6 = nn.Sequential(*[
+            DepthwiseSeparable(
+                num_channels=make_divisible(in_c * scale),
+                num_filters=make_divisible(out_c * scale),
+                dw_size=k,
+                stride=s,
+                use_se=se)
+            for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"])
+        ])
+
+        self.avg_pool = AdaptiveAvgPool2D(1)
+        self.last_conv = Conv2D(
+            in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale),
+            out_channels=self.class_expand,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            bias_attr=False)
+        self.hardswish = nn.Hardswish()
+        self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
+
+    def forward(self, x):
+        x = self.conv1(x)
+
+        x = self.blocks2(x)
+        x = self.blocks3(x)
+        x = self.blocks4(x)
+        x = self.blocks5(x)
+        x = self.blocks6(x)
+
+        x = self.avg_pool(x)
+        x = self.last_conv(x)
+        x = self.hardswish(x)
+        x = self.flatten(x)
+        return x
+
+
+class FC(nn.Layer):
+    def __init__(self, input_ch, output_ch):
+        super(FC, self).__init__()
+        weight_attr = ParamAttr(initializer=XavierNormal())
+        self.fc = paddle.nn.Linear(input_ch, output_ch, weight_attr=weight_attr)
+
+    def forward(self, x):
+        out = self.fc(x)
+        return out
+
+
+@register
+class PPLCNetEmbedding(nn.Layer):
+    """
+    PPLCNet Embedding
+
+    Args:
+        input_ch (int): Number of channels of input conv feature.
+        output_ch (int): Number of channels of output conv feature.
+    """
+    def __init__(self, scale=2.5, input_ch=1280, output_ch=512):
+        super(PPLCNetEmbedding, self).__init__()
+        self.backbone = PPLCNet(scale=scale)
+        self.neck = FC(input_ch, output_ch)
+
+    def forward(self, x):
+        feat = self.backbone(x)
+        feat_out = self.neck(feat)
+        return feat_out
--- a/paddle_detection/ppdet/modeling/reid/pyramidal_embedding.py
+++ b/paddle_detection/ppdet/modeling/reid/pyramidal_embedding.py
@@ -0,0 +1,146 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn.initializer import Normal, Constant
+from paddle import ParamAttr
+from .resnet import ResNet50, ResNet101
+from ppdet.core.workspace import register
+
+__all__ = ['PCBPyramid']
+
+
+@register
+class PCBPyramid(nn.Layer):
+    """
+    PCB (Part-based Convolutional Baseline), see https://arxiv.org/abs/1711.09349,
+    Pyramidal Person Re-IDentification, see https://arxiv.org/abs/1810.12193
+
+    Args:
+        input_ch (int): Number of channels of the input feature.
+        num_stripes (int): Number of sub-parts.
+        used_levels (tuple): Whether the level is used, 1 means used.
+        num_classes (int): Number of classes for identities, default 751 in
+            Market-1501 dataset.
+        last_conv_stride (int): Stride of the last conv.
+        last_conv_dilation (int): Dilation of the last conv.
+        num_conv_out_channels (int): Number of channels of conv feature.
+    """
+
+    def __init__(self,
+                 input_ch=2048,
+                 model_name='ResNet101',
+                 num_stripes=6,
+                 used_levels=(1, 1, 1, 1, 1, 1),
+                 num_classes=751,
+                 last_conv_stride=1,
+                 last_conv_dilation=1,
+                 num_conv_out_channels=128):
+        super(PCBPyramid, self).__init__()
+        self.num_stripes = num_stripes
+        self.used_levels = used_levels
+        self.num_classes = num_classes
+
+        self.num_in_each_level = [i for i in range(self.num_stripes, 0, -1)]
+        self.num_branches = sum(self.num_in_each_level)
+
+        assert model_name in ['ResNet50', 'ResNet101'], "Unsupported ReID arch: {}".format(model_name)
+        self.base = eval(model_name)(
+            lr_mult=0.1,
+            last_conv_stride=last_conv_stride,
+            last_conv_dilation=last_conv_dilation)
+        self.dropout_layer = nn.Dropout(p=0.2)
+        self.pyramid_conv_list0, self.pyramid_fc_list0 = self.basic_branch(
+            num_conv_out_channels, input_ch)
+
+    def basic_branch(self, num_conv_out_channels, input_ch):
+        # the level indexes are defined from fine to coarse,
+        # the branch will contain one more part than that of its previous level
+        # the sliding step is set to 1
+        pyramid_conv_list = nn.LayerList()
+        pyramid_fc_list = nn.LayerList()
+
+        idx_levels = 0
+        for idx_branches in range(self.num_branches):
+            if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]):
+                idx_levels += 1
+
+            pyramid_conv_list.append(
+                nn.Sequential(
+                    nn.Conv2D(input_ch, num_conv_out_channels, 1),
+                    nn.BatchNorm2D(num_conv_out_channels), nn.ReLU()))
+
+        idx_levels = 0
+        for idx_branches in range(self.num_branches):
+            if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]):
+                idx_levels += 1
+
+            fc = nn.Linear(
+                in_features=num_conv_out_channels,
+                out_features=self.num_classes,
+                weight_attr=ParamAttr(initializer=Normal(
+                    mean=0., std=0.001)),
+                bias_attr=ParamAttr(initializer=Constant(value=0.)))
+            pyramid_fc_list.append(fc)
+        return pyramid_conv_list, pyramid_fc_list
+
+    def pyramid_forward(self, feat):
+        each_stripe_size = int(feat.shape[2] / self.num_stripes)
+
+        feat_list, logits_list = [], []
+        idx_levels = 0
+        used_branches = 0
+        for idx_branches in range(self.num_branches):
+            if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]):
+                idx_levels += 1
+            idx_in_each_level = idx_branches - sum(self.num_in_each_level[
+                0:idx_levels])
+            stripe_size_in_each_level = each_stripe_size * (idx_levels + 1)
+            start = idx_in_each_level * each_stripe_size
+            end = start + stripe_size_in_each_level
+
+            k = feat.shape[-1]
+            local_feat_avgpool = F.avg_pool2d(
+                feat[:, :, start:end, :],
+                kernel_size=(stripe_size_in_each_level, k))
+            local_feat_maxpool = F.max_pool2d(
+                feat[:, :, start:end, :],
+                kernel_size=(stripe_size_in_each_level, k))
+            local_feat = local_feat_avgpool + local_feat_maxpool
+
+            local_feat = self.pyramid_conv_list0[used_branches](local_feat)
+            local_feat = paddle.reshape(
+                local_feat, shape=[local_feat.shape[0], -1])
+            feat_list.append(local_feat)
+
+            local_logits = self.pyramid_fc_list0[used_branches](
+                self.dropout_layer(local_feat))
+            logits_list.append(local_logits)
+
+            used_branches += 1
+
+        return feat_list, logits_list
+
+    def forward(self, x):
+        feat = self.base(x)
+        assert feat.shape[2] % self.num_stripes == 0
+        feat_list, logits_list = self.pyramid_forward(feat)
+        feat_out = paddle.concat(feat_list, axis=-1)
+        return feat_out
--- a/paddle_detection/ppdet/modeling/reid/resnet.py
+++ b/paddle_detection/ppdet/modeling/reid/resnet.py
@@ -0,0 +1,312 @@
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import math
+import paddle
+from paddle import ParamAttr
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn.initializer import Normal
+
+__all__ = ["ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152"]
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 filter_size,
+                 stride=1,
+                 dilation=1,
+                 groups=1,
+                 act=None,
+                 lr_mult=1.0,
+                 name=None,
+                 data_format="NCHW"):
+        super(ConvBNLayer, self).__init__()
+        conv_stdv = filter_size * filter_size * num_filters
+        self._conv = nn.Conv2D(
+            in_channels=num_channels,
+            out_channels=num_filters,
+            kernel_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2,
+            dilation=dilation,
+            groups=groups,
+            weight_attr=ParamAttr(
+                learning_rate=lr_mult,
+                initializer=Normal(0, math.sqrt(2. / conv_stdv))),
+            bias_attr=False,
+            data_format=data_format)
+
+        self._batch_norm = nn.BatchNorm2D(num_filters)
+        self.act = act
+
+    def forward(self, inputs):
+        y = self._conv(inputs)
+        y = self._batch_norm(y)
+        if self.act:
+            y = getattr(F, self.act)(y)
+        return y
+
+
+class BottleneckBlock(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 stride,
+                 shortcut=True,
+                 name=None,
+                 lr_mult=1.0,
+                 dilation=1,
+                 data_format="NCHW"):
+        super(BottleneckBlock, self).__init__()
+        self.conv0 = ConvBNLayer(
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=1,
+            dilation=dilation,
+            act="relu",
+            lr_mult=lr_mult,
+            name=name + "_branch2a",
+            data_format=data_format)
+        self.conv1 = ConvBNLayer(
+            num_channels=num_filters,
+            num_filters=num_filters,
+            filter_size=3,
+            dilation=dilation,
+            stride=stride,
+            act="relu",
+            lr_mult=lr_mult,
+            name=name + "_branch2b",
+            data_format=data_format)
+        self.conv2 = ConvBNLayer(
+            num_channels=num_filters,
+            num_filters=num_filters * 4,
+            filter_size=1,
+            dilation=dilation,
+            act=None,
+            lr_mult=lr_mult,
+            name=name + "_branch2c",
+            data_format=data_format)
+        if not shortcut:
+            self.short = ConvBNLayer(
+                num_channels=num_channels,
+                num_filters=num_filters * 4,
+                filter_size=1,
+                dilation=dilation,
+                stride=stride,
+                lr_mult=lr_mult,
+                name=name + "_branch1",
+                data_format=data_format)
+        self.shortcut = shortcut
+        self._num_channels_out = num_filters * 4
+
+    def forward(self, inputs):
+        y = self.conv0(inputs)
+        conv1 = self.conv1(y)
+        conv2 = self.conv2(conv1)
+        if self.shortcut:
+            short = inputs
+        else:
+            short = self.short(inputs)
+        y = paddle.add(x=short, y=conv2)
+        y = F.relu(y)
+        return y
+
+
+class BasicBlock(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 stride,
+                 shortcut=True,
+                 name=None,
+                 data_format="NCHW"):
+        super(BasicBlock, self).__init__()
+        self.stride = stride
+        self.conv0 = ConvBNLayer(
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=3,
+            stride=stride,
+            act="relu",
+            name=name + "_branch2a",
+            data_format=data_format)
+        self.conv1 = ConvBNLayer(
+            num_channels=num_filters,
+            num_filters=num_filters,
+            filter_size=3,
+            act=None,
+            name=name + "_branch2b",
+            data_format=data_format)
+        if not shortcut:
+            self.short = ConvBNLayer(
+                num_channels=num_channels,
+                num_filters=num_filters,
+                filter_size=1,
+                stride=stride,
+                name=name + "_branch1",
+                data_format=data_format)
+        self.shortcut = shortcut
+
+    def forward(self, inputs):
+        y = self.conv0(inputs)
+        conv1 = self.conv1(y)
+        if self.shortcut:
+            short = inputs
+        else:
+            short = self.short(inputs)
+        y = paddle.add(x=short, y=conv1)
+        y = F.relu(y)
+        return y
+
+
+class ResNet(nn.Layer):
+    def __init__(self,
+                 layers=50,
+                 lr_mult=1.0,
+                 last_conv_stride=2,
+                 last_conv_dilation=1):
+        super(ResNet, self).__init__()
+        self.layers = layers
+        self.data_format = "NCHW"
+        self.input_image_channel = 3
+        supported_layers = [18, 34, 50, 101, 152]
+        assert layers in supported_layers, \
+            "supported layers are {} but input layer is {}".format(
+                supported_layers, layers)
+        if layers == 18:
+            depth = [2, 2, 2, 2]
+        elif layers == 34 or layers == 50:
+            depth = [3, 4, 6, 3]
+        elif layers == 101:
+            depth = [3, 4, 23, 3]
+        elif layers == 152:
+            depth = [3, 8, 36, 3]
+        num_channels = [64, 256, 512,
+                        1024] if layers >= 50 else [64, 64, 128, 256]
+        num_filters = [64, 128, 256, 512]
+        self.conv = ConvBNLayer(
+            num_channels=self.input_image_channel,
+            num_filters=64,
+            filter_size=7,
+            stride=2,
+            act="relu",
+            lr_mult=lr_mult,
+            name="conv1",
+            data_format=self.data_format)
+        self.pool2d_max = nn.MaxPool2D(
+            kernel_size=3, stride=2, padding=1, data_format=self.data_format)
+        self.block_list = []
+        if layers >= 50:
+            for block in range(len(depth)):
+                shortcut = False
+                for i in range(depth[block]):
+                    if layers in [101, 152] and block == 2:
+                        if i == 0:
+                            conv_name = "res" + str(block + 2) + "a"
+                        else:
+                            conv_name = "res" + str(block + 2) + "b" + str(i)
+                    else:
+                        conv_name = "res" + str(block + 2) + chr(97 + i)
+                    if i != 0 or block == 0:
+                        stride = 1
+                    elif block == len(depth) - 1:
+                        stride = last_conv_stride
+                    else:
+                        stride = 2
+                    bottleneck_block = self.add_sublayer(
+                        conv_name,
+                        BottleneckBlock(
+                            num_channels=num_channels[block]
+                            if i == 0 else num_filters[block] * 4,
+                            num_filters=num_filters[block],
+                            stride=stride,
+                            shortcut=shortcut,
+                            name=conv_name,
+                            lr_mult=lr_mult,
+                            dilation=last_conv_dilation
+                            if block == len(depth) - 1 else 1,
+                            data_format=self.data_format))
+                    self.block_list.append(bottleneck_block)
+                    shortcut = True
+        else:
+            for block in range(len(depth)):
+                shortcut = False
+                for i in range(depth[block]):
+                    conv_name = "res" + str(block + 2) + chr(97 + i)
+                    basic_block = self.add_sublayer(
+                        conv_name,
+                        BasicBlock(
+                            num_channels=num_channels[block]
+                            if i == 0 else num_filters[block],
+                            num_filters=num_filters[block],
+                            stride=2 if i == 0 and block != 0 else 1,
+                            shortcut=shortcut,
+                            name=conv_name,
+                            data_format=self.data_format))
+                    self.block_list.append(basic_block)
+                    shortcut = True
+
+    def forward(self, inputs):
+        y = self.conv(inputs)
+        y = self.pool2d_max(y)
+        for block in self.block_list:
+            y = block(y)
+        return y
+
+
+def ResNet18(**args):
+    model = ResNet(layers=18, **args)
+    return model
+
+
+def ResNet34(**args):
+    model = ResNet(layers=34, **args)
+    return model
+
+
+def ResNet50(pretrained=None, **args):
+    model = ResNet(layers=50, **args)
+    if pretrained is not None:
+        if not (os.path.isdir(pretrained) or
+                os.path.exists(pretrained + '.pdparams')):
+            raise ValueError("Model pretrain path {} does not "
+                             "exists.".format(pretrained))
+        param_state_dict = paddle.load(pretrained + '.pdparams')
+        model.set_dict(param_state_dict)
+    return model
+
+
+def ResNet101(pretrained=None, **args):
+    model = ResNet(layers=101, **args)
+    if pretrained is not None:
+        if not (os.path.isdir(pretrained) or
+                os.path.exists(pretrained + '.pdparams')):
+            raise ValueError("Model pretrain path {} does not "
+                             "exists.".format(pretrained))
+        param_state_dict = paddle.load(pretrained + '.pdparams')
+        model.set_dict(param_state_dict)
+    return model
+
+
+def ResNet152(**args):
+    model = ResNet(layers=152, **args)
+    return model
--- a/paddle_detection/ppdet/modeling/reid/resnet_embedding.py
+++ b/paddle_detection/ppdet/modeling/reid/resnet_embedding.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import os
+import paddle
+import paddle.nn.functional as F
+from paddle import nn
+from .resnet import ResNet50, ResNet101
+from ppdet.core.workspace import register
+
+__all__ = ['ResNetEmbedding']
+
+
+@register
+class ResNetEmbedding(nn.Layer):
+    in_planes = 2048
+    def __init__(self, model_name='ResNet50', last_stride=1):
+        super(ResNetEmbedding, self).__init__()
+        assert model_name in ['ResNet50', 'ResNet101'], "Unsupported ReID arch: {}".format(model_name)
+        self.base = eval(model_name)(last_conv_stride=last_stride)
+        self.gap = nn.AdaptiveAvgPool2D(output_size=1)
+        self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
+        self.bn = nn.BatchNorm1D(self.in_planes, bias_attr=False)
+
+    def forward(self, x):
+        base_out = self.base(x)
+        global_feat = self.gap(base_out)
+        global_feat = self.flatten(global_feat)
+        global_feat = self.bn(global_feat)
+        return global_feat