更换文档检测模型

This commit is contained in:
2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions

View File

@@ -0,0 +1,27 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import jde_embedding_head
from . import fairmot_embedding_head
from . import resnet
from . import pyramidal_embedding
from . import pplcnet_embedding
from . import resnet_embedding
from .fairmot_embedding_head import *
from .jde_embedding_head import *
from .resnet import *
from .pyramidal_embedding import *
from .pplcnet_embedding import *
from .resnet_embedding import *

View File

@@ -0,0 +1,224 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import math
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn.initializer import KaimingUniform, Uniform
from ppdet.core.workspace import register
from ppdet.modeling.heads.centernet_head import ConvLayer
__all__ = ['FairMOTEmbeddingHead']
@register
class FairMOTEmbeddingHead(nn.Layer):
__shared__ = ['num_classes']
"""
Args:
in_channels (int): the channel number of input to FairMOTEmbeddingHead.
ch_head (int): the channel of features before fed into embedding, 256 by default.
ch_emb (int): the channel of the embedding feature, 128 by default.
num_identities_dict (dict): the number of identities of each category,
support single class and multi-calss, {0: 14455} as default.
"""
def __init__(self,
in_channels,
ch_head=256,
ch_emb=128,
num_classes=1,
num_identities_dict={0: 14455}):
super(FairMOTEmbeddingHead, self).__init__()
assert num_classes >= 1
self.num_classes = num_classes
self.ch_emb = ch_emb
self.num_identities_dict = num_identities_dict
self.reid = nn.Sequential(
ConvLayer(
in_channels, ch_head, kernel_size=3, padding=1, bias=True),
nn.ReLU(),
ConvLayer(
ch_head, ch_emb, kernel_size=1, stride=1, padding=0, bias=True))
param_attr = paddle.ParamAttr(initializer=KaimingUniform())
bound = 1 / math.sqrt(ch_emb)
bias_attr = paddle.ParamAttr(initializer=Uniform(-bound, bound))
self.reid_loss = nn.CrossEntropyLoss(ignore_index=-1, reduction='sum')
if num_classes == 1:
nID = self.num_identities_dict[0] # single class
self.classifier = nn.Linear(
ch_emb, nID, weight_attr=param_attr, bias_attr=bias_attr)
# When num_identities(nID) is 1, emb_scale is set as 1
self.emb_scale = math.sqrt(2) * math.log(nID - 1) if nID > 1 else 1
else:
self.classifiers = dict()
self.emb_scale_dict = dict()
for cls_id, nID in self.num_identities_dict.items():
self.classifiers[str(cls_id)] = nn.Linear(
ch_emb, nID, weight_attr=param_attr, bias_attr=bias_attr)
# When num_identities(nID) is 1, emb_scale is set as 1
self.emb_scale_dict[str(cls_id)] = math.sqrt(2) * math.log(
nID - 1) if nID > 1 else 1
@classmethod
def from_config(cls, cfg, input_shape):
if isinstance(input_shape, (list, tuple)):
input_shape = input_shape[0]
return {'in_channels': input_shape.channels}
def process_by_class(self, bboxes, embedding, bbox_inds, topk_clses):
pred_dets, pred_embs = [], []
for cls_id in range(self.num_classes):
inds_masks = topk_clses == cls_id
inds_masks = paddle.cast(inds_masks, 'float32')
pos_num = inds_masks.sum().numpy()
if pos_num == 0:
continue
cls_inds_mask = inds_masks > 0
bbox_mask = paddle.nonzero(cls_inds_mask)
cls_bboxes = paddle.gather_nd(bboxes, bbox_mask)
pred_dets.append(cls_bboxes)
cls_inds = paddle.masked_select(bbox_inds, cls_inds_mask)
cls_inds = cls_inds.unsqueeze(-1)
cls_embedding = paddle.gather_nd(embedding, cls_inds)
pred_embs.append(cls_embedding)
return paddle.concat(pred_dets), paddle.concat(pred_embs)
def forward(self,
neck_feat,
inputs,
bboxes=None,
bbox_inds=None,
topk_clses=None):
reid_feat = self.reid(neck_feat)
if self.training:
if self.num_classes == 1:
loss = self.get_loss(reid_feat, inputs)
else:
loss = self.get_mc_loss(reid_feat, inputs)
return loss
else:
assert bboxes is not None and bbox_inds is not None
reid_feat = F.normalize(reid_feat)
embedding = paddle.transpose(reid_feat, [0, 2, 3, 1])
embedding = paddle.reshape(embedding, [-1, self.ch_emb])
# embedding shape: [bs * h * w, ch_emb]
if self.num_classes == 1:
pred_dets = bboxes
pred_embs = paddle.gather(embedding, bbox_inds)
else:
pred_dets, pred_embs = self.process_by_class(
bboxes, embedding, bbox_inds, topk_clses)
return pred_dets, pred_embs
def get_loss(self, feat, inputs):
index = inputs['index']
mask = inputs['index_mask']
target = inputs['reid']
target = paddle.masked_select(target, mask > 0)
target = paddle.unsqueeze(target, 1)
feat = paddle.transpose(feat, perm=[0, 2, 3, 1])
feat_n, feat_h, feat_w, feat_c = feat.shape
feat = paddle.reshape(feat, shape=[feat_n, -1, feat_c])
index = paddle.unsqueeze(index, 2)
batch_inds = list()
for i in range(feat_n):
batch_ind = paddle.full(
shape=[1, index.shape[1], 1], fill_value=i, dtype='int64')
batch_inds.append(batch_ind)
batch_inds = paddle.concat(batch_inds, axis=0)
index = paddle.concat(x=[batch_inds, index], axis=2)
feat = paddle.gather_nd(feat, index=index)
mask = paddle.unsqueeze(mask, axis=2)
mask = paddle.expand_as(mask, feat)
mask.stop_gradient = True
feat = paddle.masked_select(feat, mask > 0)
feat = paddle.reshape(feat, shape=[-1, feat_c])
feat = F.normalize(feat)
feat = self.emb_scale * feat
logit = self.classifier(feat)
target.stop_gradient = True
loss = self.reid_loss(logit, target)
valid = (target != self.reid_loss.ignore_index)
valid.stop_gradient = True
count = paddle.sum((paddle.cast(valid, dtype=np.int32)))
count.stop_gradient = True
if count > 0:
loss = loss / count
return loss
def get_mc_loss(self, feat, inputs):
# feat.shape = [bs, ch_emb, h, w]
assert 'cls_id_map' in inputs and 'cls_tr_ids' in inputs
index = inputs['index']
mask = inputs['index_mask']
cls_id_map = inputs['cls_id_map'] # [bs, h, w]
cls_tr_ids = inputs['cls_tr_ids'] # [bs, num_classes, h, w]
feat = paddle.transpose(feat, perm=[0, 2, 3, 1])
feat_n, feat_h, feat_w, feat_c = feat.shape
feat = paddle.reshape(feat, shape=[feat_n, -1, feat_c])
index = paddle.unsqueeze(index, 2)
batch_inds = list()
for i in range(feat_n):
batch_ind = paddle.full(
shape=[1, index.shape[1], 1], fill_value=i, dtype='int64')
batch_inds.append(batch_ind)
batch_inds = paddle.concat(batch_inds, axis=0)
index = paddle.concat(x=[batch_inds, index], axis=2)
feat = paddle.gather_nd(feat, index=index)
mask = paddle.unsqueeze(mask, axis=2)
mask = paddle.expand_as(mask, feat)
mask.stop_gradient = True
feat = paddle.masked_select(feat, mask > 0)
feat = paddle.reshape(feat, shape=[-1, feat_c])
reid_losses = 0
for cls_id, id_num in self.num_identities_dict.items():
# target
cur_cls_tr_ids = paddle.reshape(
cls_tr_ids[:, cls_id, :, :], shape=[feat_n, -1]) # [bs, h*w]
cls_id_target = paddle.gather_nd(cur_cls_tr_ids, index=index)
mask = inputs['index_mask']
cls_id_target = paddle.masked_select(cls_id_target, mask > 0)
cls_id_target.stop_gradient = True
# feat
cls_id_feat = self.emb_scale_dict[str(cls_id)] * F.normalize(feat)
cls_id_pred = self.classifiers[str(cls_id)](cls_id_feat)
loss = self.reid_loss(cls_id_pred, cls_id_target)
valid = (cls_id_target != self.reid_loss.ignore_index)
valid.stop_gradient = True
count = paddle.sum((paddle.cast(valid, dtype=np.int32)))
count.stop_gradient = True
if count > 0:
loss = loss / count
reid_losses += loss
return reid_losses

View File

@@ -0,0 +1,211 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.regularizer import L2Decay
from ppdet.core.workspace import register
from paddle.nn.initializer import Normal, Constant
__all__ = ['JDEEmbeddingHead']
class LossParam(nn.Layer):
def __init__(self, init_value=0., use_uncertainy=True):
super(LossParam, self).__init__()
self.loss_param = self.create_parameter(
shape=[1],
attr=ParamAttr(initializer=Constant(value=init_value)),
dtype="float32")
def forward(self, inputs):
out = paddle.exp(-self.loss_param) * inputs + self.loss_param
return out * 0.5
@register
class JDEEmbeddingHead(nn.Layer):
__shared__ = ['num_classes']
__inject__ = ['emb_loss', 'jde_loss']
"""
JDEEmbeddingHead
Args:
num_classes(int): Number of classes. Only support one class tracking.
num_identities(int): Number of identities.
anchor_levels(int): Number of anchor levels, same as FPN levels.
anchor_scales(int): Number of anchor scales on each FPN level.
embedding_dim(int): Embedding dimension. Default: 512.
emb_loss(object): Instance of 'JDEEmbeddingLoss'
jde_loss(object): Instance of 'JDELoss'
"""
def __init__(
self,
num_classes=1,
num_identities=14455, # dataset.num_identities_dict[0]
anchor_levels=3,
anchor_scales=4,
embedding_dim=512,
emb_loss='JDEEmbeddingLoss',
jde_loss='JDELoss'):
super(JDEEmbeddingHead, self).__init__()
self.num_classes = num_classes
self.num_identities = num_identities
self.anchor_levels = anchor_levels
self.anchor_scales = anchor_scales
self.embedding_dim = embedding_dim
self.emb_loss = emb_loss
self.jde_loss = jde_loss
self.emb_scale = math.sqrt(2) * math.log(
self.num_identities - 1) if self.num_identities > 1 else 1
self.identify_outputs = []
self.loss_params_cls = []
self.loss_params_reg = []
self.loss_params_ide = []
for i in range(self.anchor_levels):
name = 'identify_output.{}'.format(i)
identify_output = self.add_sublayer(
name,
nn.Conv2D(
in_channels=64 * (2**self.anchor_levels) // (2**i),
out_channels=self.embedding_dim,
kernel_size=3,
stride=1,
padding=1,
bias_attr=ParamAttr(regularizer=L2Decay(0.))))
self.identify_outputs.append(identify_output)
loss_p_cls = self.add_sublayer('cls.{}'.format(i), LossParam(-4.15))
self.loss_params_cls.append(loss_p_cls)
loss_p_reg = self.add_sublayer('reg.{}'.format(i), LossParam(-4.85))
self.loss_params_reg.append(loss_p_reg)
loss_p_ide = self.add_sublayer('ide.{}'.format(i), LossParam(-2.3))
self.loss_params_ide.append(loss_p_ide)
self.classifier = self.add_sublayer(
'classifier',
nn.Linear(
self.embedding_dim,
self.num_identities,
weight_attr=ParamAttr(
learning_rate=1., initializer=Normal(
mean=0.0, std=0.01)),
bias_attr=ParamAttr(
learning_rate=2., regularizer=L2Decay(0.))))
def forward(self,
identify_feats,
targets,
loss_confs=None,
loss_boxes=None,
bboxes=None,
boxes_idx=None,
nms_keep_idx=None):
assert self.num_classes == 1, 'JDE only support sindle class MOT.'
assert len(identify_feats) == self.anchor_levels
ide_outs = []
for feat, ide_head in zip(identify_feats, self.identify_outputs):
ide_outs.append(ide_head(feat))
if self.training:
assert len(loss_confs) == len(loss_boxes) == self.anchor_levels
loss_ides = self.emb_loss(ide_outs, targets, self.emb_scale,
self.classifier)
jde_losses = self.jde_loss(
loss_confs, loss_boxes, loss_ides, self.loss_params_cls,
self.loss_params_reg, self.loss_params_ide, targets)
return jde_losses
else:
assert bboxes is not None
assert boxes_idx is not None
assert nms_keep_idx is not None
emb_outs = self.get_emb_outs(ide_outs)
emb_valid = paddle.gather_nd(emb_outs, boxes_idx)
pred_embs = paddle.gather_nd(emb_valid, nms_keep_idx)
input_shape = targets['image'].shape[2:]
# input_shape: [h, w], before data transforms, set in model config
im_shape = targets['im_shape'][0].numpy()
# im_shape: [new_h, new_w], after data transforms
scale_factor = targets['scale_factor'][0].numpy()
bboxes[:, 2:] = self.scale_coords(bboxes[:, 2:], input_shape,
im_shape, scale_factor)
# cls_ids, scores, tlwhs
pred_dets = bboxes
return pred_dets, pred_embs
def scale_coords(self, coords, input_shape, im_shape, scale_factor):
ratio = scale_factor[0]
pad_w = (input_shape[1] - int(im_shape[1])) / 2
pad_h = (input_shape[0] - int(im_shape[0])) / 2
coords = paddle.cast(coords, 'float32')
coords[:, 0::2] -= pad_w
coords[:, 1::2] -= pad_h
coords[:, 0:4] /= ratio
coords[:, :4] = paddle.clip(
coords[:, :4], min=0, max=coords[:, :4].max())
return coords.round()
def get_emb_and_gt_outs(self, ide_outs, targets):
emb_and_gts = []
for i, p_ide in enumerate(ide_outs):
t_conf = targets['tconf{}'.format(i)]
t_ide = targets['tide{}'.format(i)]
p_ide = p_ide.transpose((0, 2, 3, 1))
p_ide_flatten = paddle.reshape(p_ide, [-1, self.embedding_dim])
mask = t_conf > 0
mask = paddle.cast(mask, dtype="int64")
emb_mask = mask.max(1).flatten()
emb_mask_inds = paddle.nonzero(emb_mask > 0).flatten()
if len(emb_mask_inds) > 0:
t_ide_flatten = paddle.reshape(t_ide.max(1), [-1, 1])
tids = paddle.gather(t_ide_flatten, emb_mask_inds)
embedding = paddle.gather(p_ide_flatten, emb_mask_inds)
embedding = self.emb_scale * F.normalize(embedding)
emb_and_gt = paddle.concat([embedding, tids], axis=1)
emb_and_gts.append(emb_and_gt)
if len(emb_and_gts) > 0:
return paddle.concat(emb_and_gts, axis=0)
else:
return paddle.zeros((1, self.embedding_dim + 1))
def get_emb_outs(self, ide_outs):
emb_outs = []
for i, p_ide in enumerate(ide_outs):
p_ide = p_ide.transpose((0, 2, 3, 1))
p_ide_repeat = paddle.tile(p_ide, [self.anchor_scales, 1, 1, 1])
embedding = F.normalize(p_ide_repeat, axis=-1)
emb = paddle.reshape(embedding, [-1, self.embedding_dim])
emb_outs.append(emb)
if len(emb_outs) > 0:
return paddle.concat(emb_outs, axis=0)
else:
return paddle.zeros((1, self.embedding_dim))

View File

@@ -0,0 +1,281 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn.initializer import Normal, Constant
from paddle import ParamAttr
from paddle.nn import AdaptiveAvgPool2D, BatchNorm2D, Conv2D, Linear
from paddle.regularizer import L2Decay
from paddle.nn.initializer import KaimingNormal, XavierNormal
from ppdet.core.workspace import register
__all__ = ['PPLCNetEmbedding']
# Each element(list) represents a depthwise block, which is composed of k, in_c, out_c, s, use_se.
# k: kernel_size
# in_c: input channel number in depthwise block
# out_c: output channel number in depthwise block
# s: stride in depthwise block
# use_se: whether to use SE block
NET_CONFIG = {
"blocks2":
#k, in_c, out_c, s, use_se
[[3, 16, 32, 1, False]],
"blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
"blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
"blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False],
[5, 256, 256, 1, False], [5, 256, 256, 1, False],
[5, 256, 256, 1, False], [5, 256, 256, 1, False]],
"blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]]
}
def make_divisible(v, divisor=8, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
if new_v < 0.9 * v:
new_v += divisor
return new_v
class ConvBNLayer(nn.Layer):
def __init__(self,
num_channels,
filter_size,
num_filters,
stride,
num_groups=1):
super().__init__()
self.conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=num_groups,
weight_attr=ParamAttr(initializer=KaimingNormal()),
bias_attr=False)
self.bn = BatchNorm2D(
num_filters,
weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
self.hardswish = nn.Hardswish()
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.hardswish(x)
return x
class DepthwiseSeparable(nn.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
dw_size=3,
use_se=False):
super().__init__()
self.use_se = use_se
self.dw_conv = ConvBNLayer(
num_channels=num_channels,
num_filters=num_channels,
filter_size=dw_size,
stride=stride,
num_groups=num_channels)
if use_se:
self.se = SEModule(num_channels)
self.pw_conv = ConvBNLayer(
num_channels=num_channels,
filter_size=1,
num_filters=num_filters,
stride=1)
def forward(self, x):
x = self.dw_conv(x)
if self.use_se:
x = self.se(x)
x = self.pw_conv(x)
return x
class SEModule(nn.Layer):
def __init__(self, channel, reduction=4):
super().__init__()
self.avg_pool = AdaptiveAvgPool2D(1)
self.conv1 = Conv2D(
in_channels=channel,
out_channels=channel // reduction,
kernel_size=1,
stride=1,
padding=0)
self.relu = nn.ReLU()
self.conv2 = Conv2D(
in_channels=channel // reduction,
out_channels=channel,
kernel_size=1,
stride=1,
padding=0)
self.hardsigmoid = nn.Hardsigmoid()
def forward(self, x):
identity = x
x = self.avg_pool(x)
x = self.conv1(x)
x = self.relu(x)
x = self.conv2(x)
x = self.hardsigmoid(x)
x = paddle.multiply(x=identity, y=x)
return x
class PPLCNet(nn.Layer):
"""
PP-LCNet, see https://arxiv.org/abs/2109.15099.
This code is different from PPLCNet in ppdet/modeling/backbones/lcnet.py
or in PaddleClas, because the output is the flatten feature of last_conv.
Args:
scale (float): Scale ratio of channels.
class_expand (int): Number of channels of conv feature.
"""
def __init__(self, scale=1.0, class_expand=1280):
super(PPLCNet, self).__init__()
self.scale = scale
self.class_expand = class_expand
self.conv1 = ConvBNLayer(
num_channels=3,
filter_size=3,
num_filters=make_divisible(16 * scale),
stride=2)
self.blocks2 = nn.Sequential(*[
DepthwiseSeparable(
num_channels=make_divisible(in_c * scale),
num_filters=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"])
])
self.blocks3 = nn.Sequential(*[
DepthwiseSeparable(
num_channels=make_divisible(in_c * scale),
num_filters=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"])
])
self.blocks4 = nn.Sequential(*[
DepthwiseSeparable(
num_channels=make_divisible(in_c * scale),
num_filters=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"])
])
self.blocks5 = nn.Sequential(*[
DepthwiseSeparable(
num_channels=make_divisible(in_c * scale),
num_filters=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"])
])
self.blocks6 = nn.Sequential(*[
DepthwiseSeparable(
num_channels=make_divisible(in_c * scale),
num_filters=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"])
])
self.avg_pool = AdaptiveAvgPool2D(1)
self.last_conv = Conv2D(
in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale),
out_channels=self.class_expand,
kernel_size=1,
stride=1,
padding=0,
bias_attr=False)
self.hardswish = nn.Hardswish()
self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
def forward(self, x):
x = self.conv1(x)
x = self.blocks2(x)
x = self.blocks3(x)
x = self.blocks4(x)
x = self.blocks5(x)
x = self.blocks6(x)
x = self.avg_pool(x)
x = self.last_conv(x)
x = self.hardswish(x)
x = self.flatten(x)
return x
class FC(nn.Layer):
def __init__(self, input_ch, output_ch):
super(FC, self).__init__()
weight_attr = ParamAttr(initializer=XavierNormal())
self.fc = paddle.nn.Linear(input_ch, output_ch, weight_attr=weight_attr)
def forward(self, x):
out = self.fc(x)
return out
@register
class PPLCNetEmbedding(nn.Layer):
"""
PPLCNet Embedding
Args:
input_ch (int): Number of channels of input conv feature.
output_ch (int): Number of channels of output conv feature.
"""
def __init__(self, scale=2.5, input_ch=1280, output_ch=512):
super(PPLCNetEmbedding, self).__init__()
self.backbone = PPLCNet(scale=scale)
self.neck = FC(input_ch, output_ch)
def forward(self, x):
feat = self.backbone(x)
feat_out = self.neck(feat)
return feat_out

View File

@@ -0,0 +1,146 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn.initializer import Normal, Constant
from paddle import ParamAttr
from .resnet import ResNet50, ResNet101
from ppdet.core.workspace import register
__all__ = ['PCBPyramid']
@register
class PCBPyramid(nn.Layer):
"""
PCB (Part-based Convolutional Baseline), see https://arxiv.org/abs/1711.09349,
Pyramidal Person Re-IDentification, see https://arxiv.org/abs/1810.12193
Args:
input_ch (int): Number of channels of the input feature.
num_stripes (int): Number of sub-parts.
used_levels (tuple): Whether the level is used, 1 means used.
num_classes (int): Number of classes for identities, default 751 in
Market-1501 dataset.
last_conv_stride (int): Stride of the last conv.
last_conv_dilation (int): Dilation of the last conv.
num_conv_out_channels (int): Number of channels of conv feature.
"""
def __init__(self,
input_ch=2048,
model_name='ResNet101',
num_stripes=6,
used_levels=(1, 1, 1, 1, 1, 1),
num_classes=751,
last_conv_stride=1,
last_conv_dilation=1,
num_conv_out_channels=128):
super(PCBPyramid, self).__init__()
self.num_stripes = num_stripes
self.used_levels = used_levels
self.num_classes = num_classes
self.num_in_each_level = [i for i in range(self.num_stripes, 0, -1)]
self.num_branches = sum(self.num_in_each_level)
assert model_name in ['ResNet50', 'ResNet101'], "Unsupported ReID arch: {}".format(model_name)
self.base = eval(model_name)(
lr_mult=0.1,
last_conv_stride=last_conv_stride,
last_conv_dilation=last_conv_dilation)
self.dropout_layer = nn.Dropout(p=0.2)
self.pyramid_conv_list0, self.pyramid_fc_list0 = self.basic_branch(
num_conv_out_channels, input_ch)
def basic_branch(self, num_conv_out_channels, input_ch):
# the level indexes are defined from fine to coarse,
# the branch will contain one more part than that of its previous level
# the sliding step is set to 1
pyramid_conv_list = nn.LayerList()
pyramid_fc_list = nn.LayerList()
idx_levels = 0
for idx_branches in range(self.num_branches):
if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]):
idx_levels += 1
pyramid_conv_list.append(
nn.Sequential(
nn.Conv2D(input_ch, num_conv_out_channels, 1),
nn.BatchNorm2D(num_conv_out_channels), nn.ReLU()))
idx_levels = 0
for idx_branches in range(self.num_branches):
if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]):
idx_levels += 1
fc = nn.Linear(
in_features=num_conv_out_channels,
out_features=self.num_classes,
weight_attr=ParamAttr(initializer=Normal(
mean=0., std=0.001)),
bias_attr=ParamAttr(initializer=Constant(value=0.)))
pyramid_fc_list.append(fc)
return pyramid_conv_list, pyramid_fc_list
def pyramid_forward(self, feat):
each_stripe_size = int(feat.shape[2] / self.num_stripes)
feat_list, logits_list = [], []
idx_levels = 0
used_branches = 0
for idx_branches in range(self.num_branches):
if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]):
idx_levels += 1
idx_in_each_level = idx_branches - sum(self.num_in_each_level[
0:idx_levels])
stripe_size_in_each_level = each_stripe_size * (idx_levels + 1)
start = idx_in_each_level * each_stripe_size
end = start + stripe_size_in_each_level
k = feat.shape[-1]
local_feat_avgpool = F.avg_pool2d(
feat[:, :, start:end, :],
kernel_size=(stripe_size_in_each_level, k))
local_feat_maxpool = F.max_pool2d(
feat[:, :, start:end, :],
kernel_size=(stripe_size_in_each_level, k))
local_feat = local_feat_avgpool + local_feat_maxpool
local_feat = self.pyramid_conv_list0[used_branches](local_feat)
local_feat = paddle.reshape(
local_feat, shape=[local_feat.shape[0], -1])
feat_list.append(local_feat)
local_logits = self.pyramid_fc_list0[used_branches](
self.dropout_layer(local_feat))
logits_list.append(local_logits)
used_branches += 1
return feat_list, logits_list
def forward(self, x):
feat = self.base(x)
assert feat.shape[2] % self.num_stripes == 0
feat_list, logits_list = self.pyramid_forward(feat)
feat_out = paddle.concat(feat_list, axis=-1)
return feat_out

View File

@@ -0,0 +1,312 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import math
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn.initializer import Normal
__all__ = ["ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152"]
class ConvBNLayer(nn.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
dilation=1,
groups=1,
act=None,
lr_mult=1.0,
name=None,
data_format="NCHW"):
super(ConvBNLayer, self).__init__()
conv_stdv = filter_size * filter_size * num_filters
self._conv = nn.Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
dilation=dilation,
groups=groups,
weight_attr=ParamAttr(
learning_rate=lr_mult,
initializer=Normal(0, math.sqrt(2. / conv_stdv))),
bias_attr=False,
data_format=data_format)
self._batch_norm = nn.BatchNorm2D(num_filters)
self.act = act
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
if self.act:
y = getattr(F, self.act)(y)
return y
class BottleneckBlock(nn.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True,
name=None,
lr_mult=1.0,
dilation=1,
data_format="NCHW"):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
dilation=dilation,
act="relu",
lr_mult=lr_mult,
name=name + "_branch2a",
data_format=data_format)
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
dilation=dilation,
stride=stride,
act="relu",
lr_mult=lr_mult,
name=name + "_branch2b",
data_format=data_format)
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
dilation=dilation,
act=None,
lr_mult=lr_mult,
name=name + "_branch2c",
data_format=data_format)
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
dilation=dilation,
stride=stride,
lr_mult=lr_mult,
name=name + "_branch1",
data_format=data_format)
self.shortcut = shortcut
self._num_channels_out = num_filters * 4
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=conv2)
y = F.relu(y)
return y
class BasicBlock(nn.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True,
name=None,
data_format="NCHW"):
super(BasicBlock, self).__init__()
self.stride = stride
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=3,
stride=stride,
act="relu",
name=name + "_branch2a",
data_format=data_format)
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
act=None,
name=name + "_branch2b",
data_format=data_format)
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
stride=stride,
name=name + "_branch1",
data_format=data_format)
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=conv1)
y = F.relu(y)
return y
class ResNet(nn.Layer):
def __init__(self,
layers=50,
lr_mult=1.0,
last_conv_stride=2,
last_conv_dilation=1):
super(ResNet, self).__init__()
self.layers = layers
self.data_format = "NCHW"
self.input_image_channel = 3
supported_layers = [18, 34, 50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
if layers == 18:
depth = [2, 2, 2, 2]
elif layers == 34 or layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_channels = [64, 256, 512,
1024] if layers >= 50 else [64, 64, 128, 256]
num_filters = [64, 128, 256, 512]
self.conv = ConvBNLayer(
num_channels=self.input_image_channel,
num_filters=64,
filter_size=7,
stride=2,
act="relu",
lr_mult=lr_mult,
name="conv1",
data_format=self.data_format)
self.pool2d_max = nn.MaxPool2D(
kernel_size=3, stride=2, padding=1, data_format=self.data_format)
self.block_list = []
if layers >= 50:
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
if i != 0 or block == 0:
stride = 1
elif block == len(depth) - 1:
stride = last_conv_stride
else:
stride = 2
bottleneck_block = self.add_sublayer(
conv_name,
BottleneckBlock(
num_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
num_filters=num_filters[block],
stride=stride,
shortcut=shortcut,
name=conv_name,
lr_mult=lr_mult,
dilation=last_conv_dilation
if block == len(depth) - 1 else 1,
data_format=self.data_format))
self.block_list.append(bottleneck_block)
shortcut = True
else:
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
conv_name = "res" + str(block + 2) + chr(97 + i)
basic_block = self.add_sublayer(
conv_name,
BasicBlock(
num_channels=num_channels[block]
if i == 0 else num_filters[block],
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
name=conv_name,
data_format=self.data_format))
self.block_list.append(basic_block)
shortcut = True
def forward(self, inputs):
y = self.conv(inputs)
y = self.pool2d_max(y)
for block in self.block_list:
y = block(y)
return y
def ResNet18(**args):
model = ResNet(layers=18, **args)
return model
def ResNet34(**args):
model = ResNet(layers=34, **args)
return model
def ResNet50(pretrained=None, **args):
model = ResNet(layers=50, **args)
if pretrained is not None:
if not (os.path.isdir(pretrained) or
os.path.exists(pretrained + '.pdparams')):
raise ValueError("Model pretrain path {} does not "
"exists.".format(pretrained))
param_state_dict = paddle.load(pretrained + '.pdparams')
model.set_dict(param_state_dict)
return model
def ResNet101(pretrained=None, **args):
model = ResNet(layers=101, **args)
if pretrained is not None:
if not (os.path.isdir(pretrained) or
os.path.exists(pretrained + '.pdparams')):
raise ValueError("Model pretrain path {} does not "
"exists.".format(pretrained))
param_state_dict = paddle.load(pretrained + '.pdparams')
model.set_dict(param_state_dict)
return model
def ResNet152(**args):
model = ResNet(layers=152, **args)
return model

View File

@@ -0,0 +1,41 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import paddle
import paddle.nn.functional as F
from paddle import nn
from .resnet import ResNet50, ResNet101
from ppdet.core.workspace import register
__all__ = ['ResNetEmbedding']
@register
class ResNetEmbedding(nn.Layer):
in_planes = 2048
def __init__(self, model_name='ResNet50', last_stride=1):
super(ResNetEmbedding, self).__init__()
assert model_name in ['ResNet50', 'ResNet101'], "Unsupported ReID arch: {}".format(model_name)
self.base = eval(model_name)(last_conv_stride=last_stride)
self.gap = nn.AdaptiveAvgPool2D(output_size=1)
self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
self.bn = nn.BatchNorm1D(self.in_planes, bias_attr=False)
def forward(self, x):
base_out = self.base(x)
global_feat = self.gap(base_out)
global_feat = self.flatten(global_feat)
global_feat = self.bn(global_feat)
return global_feat