更换文档检测模型
This commit is contained in:
27
paddle_detection/ppdet/modeling/reid/__init__.py
Normal file
27
paddle_detection/ppdet/modeling/reid/__init__.py
Normal file
@@ -0,0 +1,27 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from . import jde_embedding_head
|
||||
from . import fairmot_embedding_head
|
||||
from . import resnet
|
||||
from . import pyramidal_embedding
|
||||
from . import pplcnet_embedding
|
||||
from . import resnet_embedding
|
||||
|
||||
from .fairmot_embedding_head import *
|
||||
from .jde_embedding_head import *
|
||||
from .resnet import *
|
||||
from .pyramidal_embedding import *
|
||||
from .pplcnet_embedding import *
|
||||
from .resnet_embedding import *
|
||||
224
paddle_detection/ppdet/modeling/reid/fairmot_embedding_head.py
Normal file
224
paddle_detection/ppdet/modeling/reid/fairmot_embedding_head.py
Normal file
@@ -0,0 +1,224 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
import math
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn.initializer import KaimingUniform, Uniform
|
||||
from ppdet.core.workspace import register
|
||||
from ppdet.modeling.heads.centernet_head import ConvLayer
|
||||
|
||||
__all__ = ['FairMOTEmbeddingHead']
|
||||
|
||||
|
||||
@register
|
||||
class FairMOTEmbeddingHead(nn.Layer):
|
||||
__shared__ = ['num_classes']
|
||||
"""
|
||||
Args:
|
||||
in_channels (int): the channel number of input to FairMOTEmbeddingHead.
|
||||
ch_head (int): the channel of features before fed into embedding, 256 by default.
|
||||
ch_emb (int): the channel of the embedding feature, 128 by default.
|
||||
num_identities_dict (dict): the number of identities of each category,
|
||||
support single class and multi-calss, {0: 14455} as default.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
ch_head=256,
|
||||
ch_emb=128,
|
||||
num_classes=1,
|
||||
num_identities_dict={0: 14455}):
|
||||
super(FairMOTEmbeddingHead, self).__init__()
|
||||
assert num_classes >= 1
|
||||
self.num_classes = num_classes
|
||||
self.ch_emb = ch_emb
|
||||
self.num_identities_dict = num_identities_dict
|
||||
self.reid = nn.Sequential(
|
||||
ConvLayer(
|
||||
in_channels, ch_head, kernel_size=3, padding=1, bias=True),
|
||||
nn.ReLU(),
|
||||
ConvLayer(
|
||||
ch_head, ch_emb, kernel_size=1, stride=1, padding=0, bias=True))
|
||||
param_attr = paddle.ParamAttr(initializer=KaimingUniform())
|
||||
bound = 1 / math.sqrt(ch_emb)
|
||||
bias_attr = paddle.ParamAttr(initializer=Uniform(-bound, bound))
|
||||
self.reid_loss = nn.CrossEntropyLoss(ignore_index=-1, reduction='sum')
|
||||
|
||||
if num_classes == 1:
|
||||
nID = self.num_identities_dict[0] # single class
|
||||
self.classifier = nn.Linear(
|
||||
ch_emb, nID, weight_attr=param_attr, bias_attr=bias_attr)
|
||||
# When num_identities(nID) is 1, emb_scale is set as 1
|
||||
self.emb_scale = math.sqrt(2) * math.log(nID - 1) if nID > 1 else 1
|
||||
else:
|
||||
self.classifiers = dict()
|
||||
self.emb_scale_dict = dict()
|
||||
for cls_id, nID in self.num_identities_dict.items():
|
||||
self.classifiers[str(cls_id)] = nn.Linear(
|
||||
ch_emb, nID, weight_attr=param_attr, bias_attr=bias_attr)
|
||||
# When num_identities(nID) is 1, emb_scale is set as 1
|
||||
self.emb_scale_dict[str(cls_id)] = math.sqrt(2) * math.log(
|
||||
nID - 1) if nID > 1 else 1
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, input_shape):
|
||||
if isinstance(input_shape, (list, tuple)):
|
||||
input_shape = input_shape[0]
|
||||
return {'in_channels': input_shape.channels}
|
||||
|
||||
def process_by_class(self, bboxes, embedding, bbox_inds, topk_clses):
|
||||
pred_dets, pred_embs = [], []
|
||||
for cls_id in range(self.num_classes):
|
||||
inds_masks = topk_clses == cls_id
|
||||
inds_masks = paddle.cast(inds_masks, 'float32')
|
||||
|
||||
pos_num = inds_masks.sum().numpy()
|
||||
if pos_num == 0:
|
||||
continue
|
||||
|
||||
cls_inds_mask = inds_masks > 0
|
||||
|
||||
bbox_mask = paddle.nonzero(cls_inds_mask)
|
||||
cls_bboxes = paddle.gather_nd(bboxes, bbox_mask)
|
||||
pred_dets.append(cls_bboxes)
|
||||
|
||||
cls_inds = paddle.masked_select(bbox_inds, cls_inds_mask)
|
||||
cls_inds = cls_inds.unsqueeze(-1)
|
||||
cls_embedding = paddle.gather_nd(embedding, cls_inds)
|
||||
pred_embs.append(cls_embedding)
|
||||
|
||||
return paddle.concat(pred_dets), paddle.concat(pred_embs)
|
||||
|
||||
def forward(self,
|
||||
neck_feat,
|
||||
inputs,
|
||||
bboxes=None,
|
||||
bbox_inds=None,
|
||||
topk_clses=None):
|
||||
reid_feat = self.reid(neck_feat)
|
||||
if self.training:
|
||||
if self.num_classes == 1:
|
||||
loss = self.get_loss(reid_feat, inputs)
|
||||
else:
|
||||
loss = self.get_mc_loss(reid_feat, inputs)
|
||||
return loss
|
||||
else:
|
||||
assert bboxes is not None and bbox_inds is not None
|
||||
reid_feat = F.normalize(reid_feat)
|
||||
embedding = paddle.transpose(reid_feat, [0, 2, 3, 1])
|
||||
embedding = paddle.reshape(embedding, [-1, self.ch_emb])
|
||||
# embedding shape: [bs * h * w, ch_emb]
|
||||
|
||||
if self.num_classes == 1:
|
||||
pred_dets = bboxes
|
||||
pred_embs = paddle.gather(embedding, bbox_inds)
|
||||
else:
|
||||
pred_dets, pred_embs = self.process_by_class(
|
||||
bboxes, embedding, bbox_inds, topk_clses)
|
||||
return pred_dets, pred_embs
|
||||
|
||||
def get_loss(self, feat, inputs):
|
||||
index = inputs['index']
|
||||
mask = inputs['index_mask']
|
||||
target = inputs['reid']
|
||||
target = paddle.masked_select(target, mask > 0)
|
||||
target = paddle.unsqueeze(target, 1)
|
||||
|
||||
feat = paddle.transpose(feat, perm=[0, 2, 3, 1])
|
||||
feat_n, feat_h, feat_w, feat_c = feat.shape
|
||||
feat = paddle.reshape(feat, shape=[feat_n, -1, feat_c])
|
||||
index = paddle.unsqueeze(index, 2)
|
||||
batch_inds = list()
|
||||
for i in range(feat_n):
|
||||
batch_ind = paddle.full(
|
||||
shape=[1, index.shape[1], 1], fill_value=i, dtype='int64')
|
||||
batch_inds.append(batch_ind)
|
||||
batch_inds = paddle.concat(batch_inds, axis=0)
|
||||
index = paddle.concat(x=[batch_inds, index], axis=2)
|
||||
feat = paddle.gather_nd(feat, index=index)
|
||||
|
||||
mask = paddle.unsqueeze(mask, axis=2)
|
||||
mask = paddle.expand_as(mask, feat)
|
||||
mask.stop_gradient = True
|
||||
feat = paddle.masked_select(feat, mask > 0)
|
||||
feat = paddle.reshape(feat, shape=[-1, feat_c])
|
||||
feat = F.normalize(feat)
|
||||
feat = self.emb_scale * feat
|
||||
logit = self.classifier(feat)
|
||||
target.stop_gradient = True
|
||||
loss = self.reid_loss(logit, target)
|
||||
valid = (target != self.reid_loss.ignore_index)
|
||||
valid.stop_gradient = True
|
||||
count = paddle.sum((paddle.cast(valid, dtype=np.int32)))
|
||||
count.stop_gradient = True
|
||||
if count > 0:
|
||||
loss = loss / count
|
||||
|
||||
return loss
|
||||
|
||||
def get_mc_loss(self, feat, inputs):
|
||||
# feat.shape = [bs, ch_emb, h, w]
|
||||
assert 'cls_id_map' in inputs and 'cls_tr_ids' in inputs
|
||||
index = inputs['index']
|
||||
mask = inputs['index_mask']
|
||||
cls_id_map = inputs['cls_id_map'] # [bs, h, w]
|
||||
cls_tr_ids = inputs['cls_tr_ids'] # [bs, num_classes, h, w]
|
||||
|
||||
feat = paddle.transpose(feat, perm=[0, 2, 3, 1])
|
||||
feat_n, feat_h, feat_w, feat_c = feat.shape
|
||||
feat = paddle.reshape(feat, shape=[feat_n, -1, feat_c])
|
||||
|
||||
index = paddle.unsqueeze(index, 2)
|
||||
batch_inds = list()
|
||||
for i in range(feat_n):
|
||||
batch_ind = paddle.full(
|
||||
shape=[1, index.shape[1], 1], fill_value=i, dtype='int64')
|
||||
batch_inds.append(batch_ind)
|
||||
batch_inds = paddle.concat(batch_inds, axis=0)
|
||||
index = paddle.concat(x=[batch_inds, index], axis=2)
|
||||
feat = paddle.gather_nd(feat, index=index)
|
||||
|
||||
mask = paddle.unsqueeze(mask, axis=2)
|
||||
mask = paddle.expand_as(mask, feat)
|
||||
mask.stop_gradient = True
|
||||
feat = paddle.masked_select(feat, mask > 0)
|
||||
feat = paddle.reshape(feat, shape=[-1, feat_c])
|
||||
|
||||
reid_losses = 0
|
||||
for cls_id, id_num in self.num_identities_dict.items():
|
||||
# target
|
||||
cur_cls_tr_ids = paddle.reshape(
|
||||
cls_tr_ids[:, cls_id, :, :], shape=[feat_n, -1]) # [bs, h*w]
|
||||
cls_id_target = paddle.gather_nd(cur_cls_tr_ids, index=index)
|
||||
mask = inputs['index_mask']
|
||||
cls_id_target = paddle.masked_select(cls_id_target, mask > 0)
|
||||
cls_id_target.stop_gradient = True
|
||||
|
||||
# feat
|
||||
cls_id_feat = self.emb_scale_dict[str(cls_id)] * F.normalize(feat)
|
||||
cls_id_pred = self.classifiers[str(cls_id)](cls_id_feat)
|
||||
|
||||
loss = self.reid_loss(cls_id_pred, cls_id_target)
|
||||
valid = (cls_id_target != self.reid_loss.ignore_index)
|
||||
valid.stop_gradient = True
|
||||
count = paddle.sum((paddle.cast(valid, dtype=np.int32)))
|
||||
count.stop_gradient = True
|
||||
if count > 0:
|
||||
loss = loss / count
|
||||
reid_losses += loss
|
||||
|
||||
return reid_losses
|
||||
211
paddle_detection/ppdet/modeling/reid/jde_embedding_head.py
Normal file
211
paddle_detection/ppdet/modeling/reid/jde_embedding_head.py
Normal file
@@ -0,0 +1,211 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle import ParamAttr
|
||||
from paddle.regularizer import L2Decay
|
||||
from ppdet.core.workspace import register
|
||||
from paddle.nn.initializer import Normal, Constant
|
||||
|
||||
__all__ = ['JDEEmbeddingHead']
|
||||
|
||||
|
||||
class LossParam(nn.Layer):
|
||||
def __init__(self, init_value=0., use_uncertainy=True):
|
||||
super(LossParam, self).__init__()
|
||||
self.loss_param = self.create_parameter(
|
||||
shape=[1],
|
||||
attr=ParamAttr(initializer=Constant(value=init_value)),
|
||||
dtype="float32")
|
||||
|
||||
def forward(self, inputs):
|
||||
out = paddle.exp(-self.loss_param) * inputs + self.loss_param
|
||||
return out * 0.5
|
||||
|
||||
|
||||
@register
|
||||
class JDEEmbeddingHead(nn.Layer):
|
||||
__shared__ = ['num_classes']
|
||||
__inject__ = ['emb_loss', 'jde_loss']
|
||||
"""
|
||||
JDEEmbeddingHead
|
||||
Args:
|
||||
num_classes(int): Number of classes. Only support one class tracking.
|
||||
num_identities(int): Number of identities.
|
||||
anchor_levels(int): Number of anchor levels, same as FPN levels.
|
||||
anchor_scales(int): Number of anchor scales on each FPN level.
|
||||
embedding_dim(int): Embedding dimension. Default: 512.
|
||||
emb_loss(object): Instance of 'JDEEmbeddingLoss'
|
||||
jde_loss(object): Instance of 'JDELoss'
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
num_classes=1,
|
||||
num_identities=14455, # dataset.num_identities_dict[0]
|
||||
anchor_levels=3,
|
||||
anchor_scales=4,
|
||||
embedding_dim=512,
|
||||
emb_loss='JDEEmbeddingLoss',
|
||||
jde_loss='JDELoss'):
|
||||
super(JDEEmbeddingHead, self).__init__()
|
||||
self.num_classes = num_classes
|
||||
self.num_identities = num_identities
|
||||
self.anchor_levels = anchor_levels
|
||||
self.anchor_scales = anchor_scales
|
||||
self.embedding_dim = embedding_dim
|
||||
self.emb_loss = emb_loss
|
||||
self.jde_loss = jde_loss
|
||||
|
||||
self.emb_scale = math.sqrt(2) * math.log(
|
||||
self.num_identities - 1) if self.num_identities > 1 else 1
|
||||
|
||||
self.identify_outputs = []
|
||||
self.loss_params_cls = []
|
||||
self.loss_params_reg = []
|
||||
self.loss_params_ide = []
|
||||
for i in range(self.anchor_levels):
|
||||
name = 'identify_output.{}'.format(i)
|
||||
identify_output = self.add_sublayer(
|
||||
name,
|
||||
nn.Conv2D(
|
||||
in_channels=64 * (2**self.anchor_levels) // (2**i),
|
||||
out_channels=self.embedding_dim,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
bias_attr=ParamAttr(regularizer=L2Decay(0.))))
|
||||
self.identify_outputs.append(identify_output)
|
||||
|
||||
loss_p_cls = self.add_sublayer('cls.{}'.format(i), LossParam(-4.15))
|
||||
self.loss_params_cls.append(loss_p_cls)
|
||||
loss_p_reg = self.add_sublayer('reg.{}'.format(i), LossParam(-4.85))
|
||||
self.loss_params_reg.append(loss_p_reg)
|
||||
loss_p_ide = self.add_sublayer('ide.{}'.format(i), LossParam(-2.3))
|
||||
self.loss_params_ide.append(loss_p_ide)
|
||||
|
||||
self.classifier = self.add_sublayer(
|
||||
'classifier',
|
||||
nn.Linear(
|
||||
self.embedding_dim,
|
||||
self.num_identities,
|
||||
weight_attr=ParamAttr(
|
||||
learning_rate=1., initializer=Normal(
|
||||
mean=0.0, std=0.01)),
|
||||
bias_attr=ParamAttr(
|
||||
learning_rate=2., regularizer=L2Decay(0.))))
|
||||
|
||||
def forward(self,
|
||||
identify_feats,
|
||||
targets,
|
||||
loss_confs=None,
|
||||
loss_boxes=None,
|
||||
bboxes=None,
|
||||
boxes_idx=None,
|
||||
nms_keep_idx=None):
|
||||
assert self.num_classes == 1, 'JDE only support sindle class MOT.'
|
||||
assert len(identify_feats) == self.anchor_levels
|
||||
ide_outs = []
|
||||
for feat, ide_head in zip(identify_feats, self.identify_outputs):
|
||||
ide_outs.append(ide_head(feat))
|
||||
|
||||
if self.training:
|
||||
assert len(loss_confs) == len(loss_boxes) == self.anchor_levels
|
||||
loss_ides = self.emb_loss(ide_outs, targets, self.emb_scale,
|
||||
self.classifier)
|
||||
jde_losses = self.jde_loss(
|
||||
loss_confs, loss_boxes, loss_ides, self.loss_params_cls,
|
||||
self.loss_params_reg, self.loss_params_ide, targets)
|
||||
return jde_losses
|
||||
else:
|
||||
assert bboxes is not None
|
||||
assert boxes_idx is not None
|
||||
assert nms_keep_idx is not None
|
||||
|
||||
emb_outs = self.get_emb_outs(ide_outs)
|
||||
emb_valid = paddle.gather_nd(emb_outs, boxes_idx)
|
||||
pred_embs = paddle.gather_nd(emb_valid, nms_keep_idx)
|
||||
|
||||
input_shape = targets['image'].shape[2:]
|
||||
# input_shape: [h, w], before data transforms, set in model config
|
||||
im_shape = targets['im_shape'][0].numpy()
|
||||
# im_shape: [new_h, new_w], after data transforms
|
||||
scale_factor = targets['scale_factor'][0].numpy()
|
||||
bboxes[:, 2:] = self.scale_coords(bboxes[:, 2:], input_shape,
|
||||
im_shape, scale_factor)
|
||||
# cls_ids, scores, tlwhs
|
||||
pred_dets = bboxes
|
||||
return pred_dets, pred_embs
|
||||
|
||||
def scale_coords(self, coords, input_shape, im_shape, scale_factor):
|
||||
ratio = scale_factor[0]
|
||||
pad_w = (input_shape[1] - int(im_shape[1])) / 2
|
||||
pad_h = (input_shape[0] - int(im_shape[0])) / 2
|
||||
coords = paddle.cast(coords, 'float32')
|
||||
coords[:, 0::2] -= pad_w
|
||||
coords[:, 1::2] -= pad_h
|
||||
coords[:, 0:4] /= ratio
|
||||
coords[:, :4] = paddle.clip(
|
||||
coords[:, :4], min=0, max=coords[:, :4].max())
|
||||
return coords.round()
|
||||
|
||||
def get_emb_and_gt_outs(self, ide_outs, targets):
|
||||
emb_and_gts = []
|
||||
for i, p_ide in enumerate(ide_outs):
|
||||
t_conf = targets['tconf{}'.format(i)]
|
||||
t_ide = targets['tide{}'.format(i)]
|
||||
|
||||
p_ide = p_ide.transpose((0, 2, 3, 1))
|
||||
p_ide_flatten = paddle.reshape(p_ide, [-1, self.embedding_dim])
|
||||
|
||||
mask = t_conf > 0
|
||||
mask = paddle.cast(mask, dtype="int64")
|
||||
emb_mask = mask.max(1).flatten()
|
||||
emb_mask_inds = paddle.nonzero(emb_mask > 0).flatten()
|
||||
if len(emb_mask_inds) > 0:
|
||||
t_ide_flatten = paddle.reshape(t_ide.max(1), [-1, 1])
|
||||
tids = paddle.gather(t_ide_flatten, emb_mask_inds)
|
||||
|
||||
embedding = paddle.gather(p_ide_flatten, emb_mask_inds)
|
||||
embedding = self.emb_scale * F.normalize(embedding)
|
||||
emb_and_gt = paddle.concat([embedding, tids], axis=1)
|
||||
emb_and_gts.append(emb_and_gt)
|
||||
|
||||
if len(emb_and_gts) > 0:
|
||||
return paddle.concat(emb_and_gts, axis=0)
|
||||
else:
|
||||
return paddle.zeros((1, self.embedding_dim + 1))
|
||||
|
||||
def get_emb_outs(self, ide_outs):
|
||||
emb_outs = []
|
||||
for i, p_ide in enumerate(ide_outs):
|
||||
p_ide = p_ide.transpose((0, 2, 3, 1))
|
||||
|
||||
p_ide_repeat = paddle.tile(p_ide, [self.anchor_scales, 1, 1, 1])
|
||||
embedding = F.normalize(p_ide_repeat, axis=-1)
|
||||
emb = paddle.reshape(embedding, [-1, self.embedding_dim])
|
||||
emb_outs.append(emb)
|
||||
|
||||
if len(emb_outs) > 0:
|
||||
return paddle.concat(emb_outs, axis=0)
|
||||
else:
|
||||
return paddle.zeros((1, self.embedding_dim))
|
||||
281
paddle_detection/ppdet/modeling/reid/pplcnet_embedding.py
Normal file
281
paddle_detection/ppdet/modeling/reid/pplcnet_embedding.py
Normal file
@@ -0,0 +1,281 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn.initializer import Normal, Constant
|
||||
from paddle import ParamAttr
|
||||
from paddle.nn import AdaptiveAvgPool2D, BatchNorm2D, Conv2D, Linear
|
||||
from paddle.regularizer import L2Decay
|
||||
from paddle.nn.initializer import KaimingNormal, XavierNormal
|
||||
from ppdet.core.workspace import register
|
||||
|
||||
__all__ = ['PPLCNetEmbedding']
|
||||
|
||||
|
||||
# Each element(list) represents a depthwise block, which is composed of k, in_c, out_c, s, use_se.
|
||||
# k: kernel_size
|
||||
# in_c: input channel number in depthwise block
|
||||
# out_c: output channel number in depthwise block
|
||||
# s: stride in depthwise block
|
||||
# use_se: whether to use SE block
|
||||
|
||||
NET_CONFIG = {
|
||||
"blocks2":
|
||||
#k, in_c, out_c, s, use_se
|
||||
[[3, 16, 32, 1, False]],
|
||||
"blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
|
||||
"blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
|
||||
"blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False],
|
||||
[5, 256, 256, 1, False], [5, 256, 256, 1, False],
|
||||
[5, 256, 256, 1, False], [5, 256, 256, 1, False]],
|
||||
"blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]]
|
||||
}
|
||||
|
||||
|
||||
def make_divisible(v, divisor=8, min_value=None):
|
||||
if min_value is None:
|
||||
min_value = divisor
|
||||
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
|
||||
if new_v < 0.9 * v:
|
||||
new_v += divisor
|
||||
return new_v
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
filter_size,
|
||||
num_filters,
|
||||
stride,
|
||||
num_groups=1):
|
||||
super().__init__()
|
||||
|
||||
self.conv = Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=(filter_size - 1) // 2,
|
||||
groups=num_groups,
|
||||
weight_attr=ParamAttr(initializer=KaimingNormal()),
|
||||
bias_attr=False)
|
||||
|
||||
self.bn = BatchNorm2D(
|
||||
num_filters,
|
||||
weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
|
||||
bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
|
||||
self.hardswish = nn.Hardswish()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
x = self.hardswish(x)
|
||||
return x
|
||||
|
||||
|
||||
class DepthwiseSeparable(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
stride,
|
||||
dw_size=3,
|
||||
use_se=False):
|
||||
super().__init__()
|
||||
self.use_se = use_se
|
||||
self.dw_conv = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_channels,
|
||||
filter_size=dw_size,
|
||||
stride=stride,
|
||||
num_groups=num_channels)
|
||||
if use_se:
|
||||
self.se = SEModule(num_channels)
|
||||
self.pw_conv = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
filter_size=1,
|
||||
num_filters=num_filters,
|
||||
stride=1)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.dw_conv(x)
|
||||
if self.use_se:
|
||||
x = self.se(x)
|
||||
x = self.pw_conv(x)
|
||||
return x
|
||||
|
||||
|
||||
class SEModule(nn.Layer):
|
||||
def __init__(self, channel, reduction=4):
|
||||
super().__init__()
|
||||
self.avg_pool = AdaptiveAvgPool2D(1)
|
||||
self.conv1 = Conv2D(
|
||||
in_channels=channel,
|
||||
out_channels=channel // reduction,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0)
|
||||
self.relu = nn.ReLU()
|
||||
self.conv2 = Conv2D(
|
||||
in_channels=channel // reduction,
|
||||
out_channels=channel,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0)
|
||||
self.hardsigmoid = nn.Hardsigmoid()
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
x = self.avg_pool(x)
|
||||
x = self.conv1(x)
|
||||
x = self.relu(x)
|
||||
x = self.conv2(x)
|
||||
x = self.hardsigmoid(x)
|
||||
x = paddle.multiply(x=identity, y=x)
|
||||
return x
|
||||
|
||||
|
||||
class PPLCNet(nn.Layer):
|
||||
"""
|
||||
PP-LCNet, see https://arxiv.org/abs/2109.15099.
|
||||
This code is different from PPLCNet in ppdet/modeling/backbones/lcnet.py
|
||||
or in PaddleClas, because the output is the flatten feature of last_conv.
|
||||
|
||||
Args:
|
||||
scale (float): Scale ratio of channels.
|
||||
class_expand (int): Number of channels of conv feature.
|
||||
"""
|
||||
|
||||
def __init__(self, scale=1.0, class_expand=1280):
|
||||
super(PPLCNet, self).__init__()
|
||||
self.scale = scale
|
||||
self.class_expand = class_expand
|
||||
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=3,
|
||||
filter_size=3,
|
||||
num_filters=make_divisible(16 * scale),
|
||||
stride=2)
|
||||
|
||||
self.blocks2 = nn.Sequential(*[
|
||||
DepthwiseSeparable(
|
||||
num_channels=make_divisible(in_c * scale),
|
||||
num_filters=make_divisible(out_c * scale),
|
||||
dw_size=k,
|
||||
stride=s,
|
||||
use_se=se)
|
||||
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"])
|
||||
])
|
||||
|
||||
self.blocks3 = nn.Sequential(*[
|
||||
DepthwiseSeparable(
|
||||
num_channels=make_divisible(in_c * scale),
|
||||
num_filters=make_divisible(out_c * scale),
|
||||
dw_size=k,
|
||||
stride=s,
|
||||
use_se=se)
|
||||
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"])
|
||||
])
|
||||
|
||||
self.blocks4 = nn.Sequential(*[
|
||||
DepthwiseSeparable(
|
||||
num_channels=make_divisible(in_c * scale),
|
||||
num_filters=make_divisible(out_c * scale),
|
||||
dw_size=k,
|
||||
stride=s,
|
||||
use_se=se)
|
||||
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"])
|
||||
])
|
||||
|
||||
self.blocks5 = nn.Sequential(*[
|
||||
DepthwiseSeparable(
|
||||
num_channels=make_divisible(in_c * scale),
|
||||
num_filters=make_divisible(out_c * scale),
|
||||
dw_size=k,
|
||||
stride=s,
|
||||
use_se=se)
|
||||
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"])
|
||||
])
|
||||
|
||||
self.blocks6 = nn.Sequential(*[
|
||||
DepthwiseSeparable(
|
||||
num_channels=make_divisible(in_c * scale),
|
||||
num_filters=make_divisible(out_c * scale),
|
||||
dw_size=k,
|
||||
stride=s,
|
||||
use_se=se)
|
||||
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"])
|
||||
])
|
||||
|
||||
self.avg_pool = AdaptiveAvgPool2D(1)
|
||||
self.last_conv = Conv2D(
|
||||
in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale),
|
||||
out_channels=self.class_expand,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
bias_attr=False)
|
||||
self.hardswish = nn.Hardswish()
|
||||
self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
|
||||
x = self.blocks2(x)
|
||||
x = self.blocks3(x)
|
||||
x = self.blocks4(x)
|
||||
x = self.blocks5(x)
|
||||
x = self.blocks6(x)
|
||||
|
||||
x = self.avg_pool(x)
|
||||
x = self.last_conv(x)
|
||||
x = self.hardswish(x)
|
||||
x = self.flatten(x)
|
||||
return x
|
||||
|
||||
|
||||
class FC(nn.Layer):
|
||||
def __init__(self, input_ch, output_ch):
|
||||
super(FC, self).__init__()
|
||||
weight_attr = ParamAttr(initializer=XavierNormal())
|
||||
self.fc = paddle.nn.Linear(input_ch, output_ch, weight_attr=weight_attr)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.fc(x)
|
||||
return out
|
||||
|
||||
|
||||
@register
|
||||
class PPLCNetEmbedding(nn.Layer):
|
||||
"""
|
||||
PPLCNet Embedding
|
||||
|
||||
Args:
|
||||
input_ch (int): Number of channels of input conv feature.
|
||||
output_ch (int): Number of channels of output conv feature.
|
||||
"""
|
||||
def __init__(self, scale=2.5, input_ch=1280, output_ch=512):
|
||||
super(PPLCNetEmbedding, self).__init__()
|
||||
self.backbone = PPLCNet(scale=scale)
|
||||
self.neck = FC(input_ch, output_ch)
|
||||
|
||||
def forward(self, x):
|
||||
feat = self.backbone(x)
|
||||
feat_out = self.neck(feat)
|
||||
return feat_out
|
||||
146
paddle_detection/ppdet/modeling/reid/pyramidal_embedding.py
Normal file
146
paddle_detection/ppdet/modeling/reid/pyramidal_embedding.py
Normal file
@@ -0,0 +1,146 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn.initializer import Normal, Constant
|
||||
from paddle import ParamAttr
|
||||
from .resnet import ResNet50, ResNet101
|
||||
from ppdet.core.workspace import register
|
||||
|
||||
__all__ = ['PCBPyramid']
|
||||
|
||||
|
||||
@register
|
||||
class PCBPyramid(nn.Layer):
|
||||
"""
|
||||
PCB (Part-based Convolutional Baseline), see https://arxiv.org/abs/1711.09349,
|
||||
Pyramidal Person Re-IDentification, see https://arxiv.org/abs/1810.12193
|
||||
|
||||
Args:
|
||||
input_ch (int): Number of channels of the input feature.
|
||||
num_stripes (int): Number of sub-parts.
|
||||
used_levels (tuple): Whether the level is used, 1 means used.
|
||||
num_classes (int): Number of classes for identities, default 751 in
|
||||
Market-1501 dataset.
|
||||
last_conv_stride (int): Stride of the last conv.
|
||||
last_conv_dilation (int): Dilation of the last conv.
|
||||
num_conv_out_channels (int): Number of channels of conv feature.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
input_ch=2048,
|
||||
model_name='ResNet101',
|
||||
num_stripes=6,
|
||||
used_levels=(1, 1, 1, 1, 1, 1),
|
||||
num_classes=751,
|
||||
last_conv_stride=1,
|
||||
last_conv_dilation=1,
|
||||
num_conv_out_channels=128):
|
||||
super(PCBPyramid, self).__init__()
|
||||
self.num_stripes = num_stripes
|
||||
self.used_levels = used_levels
|
||||
self.num_classes = num_classes
|
||||
|
||||
self.num_in_each_level = [i for i in range(self.num_stripes, 0, -1)]
|
||||
self.num_branches = sum(self.num_in_each_level)
|
||||
|
||||
assert model_name in ['ResNet50', 'ResNet101'], "Unsupported ReID arch: {}".format(model_name)
|
||||
self.base = eval(model_name)(
|
||||
lr_mult=0.1,
|
||||
last_conv_stride=last_conv_stride,
|
||||
last_conv_dilation=last_conv_dilation)
|
||||
self.dropout_layer = nn.Dropout(p=0.2)
|
||||
self.pyramid_conv_list0, self.pyramid_fc_list0 = self.basic_branch(
|
||||
num_conv_out_channels, input_ch)
|
||||
|
||||
def basic_branch(self, num_conv_out_channels, input_ch):
|
||||
# the level indexes are defined from fine to coarse,
|
||||
# the branch will contain one more part than that of its previous level
|
||||
# the sliding step is set to 1
|
||||
pyramid_conv_list = nn.LayerList()
|
||||
pyramid_fc_list = nn.LayerList()
|
||||
|
||||
idx_levels = 0
|
||||
for idx_branches in range(self.num_branches):
|
||||
if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]):
|
||||
idx_levels += 1
|
||||
|
||||
pyramid_conv_list.append(
|
||||
nn.Sequential(
|
||||
nn.Conv2D(input_ch, num_conv_out_channels, 1),
|
||||
nn.BatchNorm2D(num_conv_out_channels), nn.ReLU()))
|
||||
|
||||
idx_levels = 0
|
||||
for idx_branches in range(self.num_branches):
|
||||
if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]):
|
||||
idx_levels += 1
|
||||
|
||||
fc = nn.Linear(
|
||||
in_features=num_conv_out_channels,
|
||||
out_features=self.num_classes,
|
||||
weight_attr=ParamAttr(initializer=Normal(
|
||||
mean=0., std=0.001)),
|
||||
bias_attr=ParamAttr(initializer=Constant(value=0.)))
|
||||
pyramid_fc_list.append(fc)
|
||||
return pyramid_conv_list, pyramid_fc_list
|
||||
|
||||
def pyramid_forward(self, feat):
|
||||
each_stripe_size = int(feat.shape[2] / self.num_stripes)
|
||||
|
||||
feat_list, logits_list = [], []
|
||||
idx_levels = 0
|
||||
used_branches = 0
|
||||
for idx_branches in range(self.num_branches):
|
||||
if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]):
|
||||
idx_levels += 1
|
||||
idx_in_each_level = idx_branches - sum(self.num_in_each_level[
|
||||
0:idx_levels])
|
||||
stripe_size_in_each_level = each_stripe_size * (idx_levels + 1)
|
||||
start = idx_in_each_level * each_stripe_size
|
||||
end = start + stripe_size_in_each_level
|
||||
|
||||
k = feat.shape[-1]
|
||||
local_feat_avgpool = F.avg_pool2d(
|
||||
feat[:, :, start:end, :],
|
||||
kernel_size=(stripe_size_in_each_level, k))
|
||||
local_feat_maxpool = F.max_pool2d(
|
||||
feat[:, :, start:end, :],
|
||||
kernel_size=(stripe_size_in_each_level, k))
|
||||
local_feat = local_feat_avgpool + local_feat_maxpool
|
||||
|
||||
local_feat = self.pyramid_conv_list0[used_branches](local_feat)
|
||||
local_feat = paddle.reshape(
|
||||
local_feat, shape=[local_feat.shape[0], -1])
|
||||
feat_list.append(local_feat)
|
||||
|
||||
local_logits = self.pyramid_fc_list0[used_branches](
|
||||
self.dropout_layer(local_feat))
|
||||
logits_list.append(local_logits)
|
||||
|
||||
used_branches += 1
|
||||
|
||||
return feat_list, logits_list
|
||||
|
||||
def forward(self, x):
|
||||
feat = self.base(x)
|
||||
assert feat.shape[2] % self.num_stripes == 0
|
||||
feat_list, logits_list = self.pyramid_forward(feat)
|
||||
feat_out = paddle.concat(feat_list, axis=-1)
|
||||
return feat_out
|
||||
312
paddle_detection/ppdet/modeling/reid/resnet.py
Normal file
312
paddle_detection/ppdet/modeling/reid/resnet.py
Normal file
@@ -0,0 +1,312 @@
|
||||
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import math
|
||||
import paddle
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle.nn.initializer import Normal
|
||||
|
||||
__all__ = ["ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152"]
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
filter_size,
|
||||
stride=1,
|
||||
dilation=1,
|
||||
groups=1,
|
||||
act=None,
|
||||
lr_mult=1.0,
|
||||
name=None,
|
||||
data_format="NCHW"):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
conv_stdv = filter_size * filter_size * num_filters
|
||||
self._conv = nn.Conv2D(
|
||||
in_channels=num_channels,
|
||||
out_channels=num_filters,
|
||||
kernel_size=filter_size,
|
||||
stride=stride,
|
||||
padding=(filter_size - 1) // 2,
|
||||
dilation=dilation,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(
|
||||
learning_rate=lr_mult,
|
||||
initializer=Normal(0, math.sqrt(2. / conv_stdv))),
|
||||
bias_attr=False,
|
||||
data_format=data_format)
|
||||
|
||||
self._batch_norm = nn.BatchNorm2D(num_filters)
|
||||
self.act = act
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self._conv(inputs)
|
||||
y = self._batch_norm(y)
|
||||
if self.act:
|
||||
y = getattr(F, self.act)(y)
|
||||
return y
|
||||
|
||||
|
||||
class BottleneckBlock(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
stride,
|
||||
shortcut=True,
|
||||
name=None,
|
||||
lr_mult=1.0,
|
||||
dilation=1,
|
||||
data_format="NCHW"):
|
||||
super(BottleneckBlock, self).__init__()
|
||||
self.conv0 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters,
|
||||
filter_size=1,
|
||||
dilation=dilation,
|
||||
act="relu",
|
||||
lr_mult=lr_mult,
|
||||
name=name + "_branch2a",
|
||||
data_format=data_format)
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
dilation=dilation,
|
||||
stride=stride,
|
||||
act="relu",
|
||||
lr_mult=lr_mult,
|
||||
name=name + "_branch2b",
|
||||
data_format=data_format)
|
||||
self.conv2 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters * 4,
|
||||
filter_size=1,
|
||||
dilation=dilation,
|
||||
act=None,
|
||||
lr_mult=lr_mult,
|
||||
name=name + "_branch2c",
|
||||
data_format=data_format)
|
||||
if not shortcut:
|
||||
self.short = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters * 4,
|
||||
filter_size=1,
|
||||
dilation=dilation,
|
||||
stride=stride,
|
||||
lr_mult=lr_mult,
|
||||
name=name + "_branch1",
|
||||
data_format=data_format)
|
||||
self.shortcut = shortcut
|
||||
self._num_channels_out = num_filters * 4
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self.conv0(inputs)
|
||||
conv1 = self.conv1(y)
|
||||
conv2 = self.conv2(conv1)
|
||||
if self.shortcut:
|
||||
short = inputs
|
||||
else:
|
||||
short = self.short(inputs)
|
||||
y = paddle.add(x=short, y=conv2)
|
||||
y = F.relu(y)
|
||||
return y
|
||||
|
||||
|
||||
class BasicBlock(nn.Layer):
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
num_filters,
|
||||
stride,
|
||||
shortcut=True,
|
||||
name=None,
|
||||
data_format="NCHW"):
|
||||
super(BasicBlock, self).__init__()
|
||||
self.stride = stride
|
||||
self.conv0 = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
stride=stride,
|
||||
act="relu",
|
||||
name=name + "_branch2a",
|
||||
data_format=data_format)
|
||||
self.conv1 = ConvBNLayer(
|
||||
num_channels=num_filters,
|
||||
num_filters=num_filters,
|
||||
filter_size=3,
|
||||
act=None,
|
||||
name=name + "_branch2b",
|
||||
data_format=data_format)
|
||||
if not shortcut:
|
||||
self.short = ConvBNLayer(
|
||||
num_channels=num_channels,
|
||||
num_filters=num_filters,
|
||||
filter_size=1,
|
||||
stride=stride,
|
||||
name=name + "_branch1",
|
||||
data_format=data_format)
|
||||
self.shortcut = shortcut
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self.conv0(inputs)
|
||||
conv1 = self.conv1(y)
|
||||
if self.shortcut:
|
||||
short = inputs
|
||||
else:
|
||||
short = self.short(inputs)
|
||||
y = paddle.add(x=short, y=conv1)
|
||||
y = F.relu(y)
|
||||
return y
|
||||
|
||||
|
||||
class ResNet(nn.Layer):
|
||||
def __init__(self,
|
||||
layers=50,
|
||||
lr_mult=1.0,
|
||||
last_conv_stride=2,
|
||||
last_conv_dilation=1):
|
||||
super(ResNet, self).__init__()
|
||||
self.layers = layers
|
||||
self.data_format = "NCHW"
|
||||
self.input_image_channel = 3
|
||||
supported_layers = [18, 34, 50, 101, 152]
|
||||
assert layers in supported_layers, \
|
||||
"supported layers are {} but input layer is {}".format(
|
||||
supported_layers, layers)
|
||||
if layers == 18:
|
||||
depth = [2, 2, 2, 2]
|
||||
elif layers == 34 or layers == 50:
|
||||
depth = [3, 4, 6, 3]
|
||||
elif layers == 101:
|
||||
depth = [3, 4, 23, 3]
|
||||
elif layers == 152:
|
||||
depth = [3, 8, 36, 3]
|
||||
num_channels = [64, 256, 512,
|
||||
1024] if layers >= 50 else [64, 64, 128, 256]
|
||||
num_filters = [64, 128, 256, 512]
|
||||
self.conv = ConvBNLayer(
|
||||
num_channels=self.input_image_channel,
|
||||
num_filters=64,
|
||||
filter_size=7,
|
||||
stride=2,
|
||||
act="relu",
|
||||
lr_mult=lr_mult,
|
||||
name="conv1",
|
||||
data_format=self.data_format)
|
||||
self.pool2d_max = nn.MaxPool2D(
|
||||
kernel_size=3, stride=2, padding=1, data_format=self.data_format)
|
||||
self.block_list = []
|
||||
if layers >= 50:
|
||||
for block in range(len(depth)):
|
||||
shortcut = False
|
||||
for i in range(depth[block]):
|
||||
if layers in [101, 152] and block == 2:
|
||||
if i == 0:
|
||||
conv_name = "res" + str(block + 2) + "a"
|
||||
else:
|
||||
conv_name = "res" + str(block + 2) + "b" + str(i)
|
||||
else:
|
||||
conv_name = "res" + str(block + 2) + chr(97 + i)
|
||||
if i != 0 or block == 0:
|
||||
stride = 1
|
||||
elif block == len(depth) - 1:
|
||||
stride = last_conv_stride
|
||||
else:
|
||||
stride = 2
|
||||
bottleneck_block = self.add_sublayer(
|
||||
conv_name,
|
||||
BottleneckBlock(
|
||||
num_channels=num_channels[block]
|
||||
if i == 0 else num_filters[block] * 4,
|
||||
num_filters=num_filters[block],
|
||||
stride=stride,
|
||||
shortcut=shortcut,
|
||||
name=conv_name,
|
||||
lr_mult=lr_mult,
|
||||
dilation=last_conv_dilation
|
||||
if block == len(depth) - 1 else 1,
|
||||
data_format=self.data_format))
|
||||
self.block_list.append(bottleneck_block)
|
||||
shortcut = True
|
||||
else:
|
||||
for block in range(len(depth)):
|
||||
shortcut = False
|
||||
for i in range(depth[block]):
|
||||
conv_name = "res" + str(block + 2) + chr(97 + i)
|
||||
basic_block = self.add_sublayer(
|
||||
conv_name,
|
||||
BasicBlock(
|
||||
num_channels=num_channels[block]
|
||||
if i == 0 else num_filters[block],
|
||||
num_filters=num_filters[block],
|
||||
stride=2 if i == 0 and block != 0 else 1,
|
||||
shortcut=shortcut,
|
||||
name=conv_name,
|
||||
data_format=self.data_format))
|
||||
self.block_list.append(basic_block)
|
||||
shortcut = True
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self.conv(inputs)
|
||||
y = self.pool2d_max(y)
|
||||
for block in self.block_list:
|
||||
y = block(y)
|
||||
return y
|
||||
|
||||
|
||||
def ResNet18(**args):
|
||||
model = ResNet(layers=18, **args)
|
||||
return model
|
||||
|
||||
|
||||
def ResNet34(**args):
|
||||
model = ResNet(layers=34, **args)
|
||||
return model
|
||||
|
||||
|
||||
def ResNet50(pretrained=None, **args):
|
||||
model = ResNet(layers=50, **args)
|
||||
if pretrained is not None:
|
||||
if not (os.path.isdir(pretrained) or
|
||||
os.path.exists(pretrained + '.pdparams')):
|
||||
raise ValueError("Model pretrain path {} does not "
|
||||
"exists.".format(pretrained))
|
||||
param_state_dict = paddle.load(pretrained + '.pdparams')
|
||||
model.set_dict(param_state_dict)
|
||||
return model
|
||||
|
||||
|
||||
def ResNet101(pretrained=None, **args):
|
||||
model = ResNet(layers=101, **args)
|
||||
if pretrained is not None:
|
||||
if not (os.path.isdir(pretrained) or
|
||||
os.path.exists(pretrained + '.pdparams')):
|
||||
raise ValueError("Model pretrain path {} does not "
|
||||
"exists.".format(pretrained))
|
||||
param_state_dict = paddle.load(pretrained + '.pdparams')
|
||||
model.set_dict(param_state_dict)
|
||||
return model
|
||||
|
||||
|
||||
def ResNet152(**args):
|
||||
model = ResNet(layers=152, **args)
|
||||
return model
|
||||
41
paddle_detection/ppdet/modeling/reid/resnet_embedding.py
Normal file
41
paddle_detection/ppdet/modeling/reid/resnet_embedding.py
Normal file
@@ -0,0 +1,41 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import paddle
|
||||
import paddle.nn.functional as F
|
||||
from paddle import nn
|
||||
from .resnet import ResNet50, ResNet101
|
||||
from ppdet.core.workspace import register
|
||||
|
||||
__all__ = ['ResNetEmbedding']
|
||||
|
||||
|
||||
@register
|
||||
class ResNetEmbedding(nn.Layer):
|
||||
in_planes = 2048
|
||||
def __init__(self, model_name='ResNet50', last_stride=1):
|
||||
super(ResNetEmbedding, self).__init__()
|
||||
assert model_name in ['ResNet50', 'ResNet101'], "Unsupported ReID arch: {}".format(model_name)
|
||||
self.base = eval(model_name)(last_conv_stride=last_stride)
|
||||
self.gap = nn.AdaptiveAvgPool2D(output_size=1)
|
||||
self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
|
||||
self.bn = nn.BatchNorm1D(self.in_planes, bias_attr=False)
|
||||
|
||||
def forward(self, x):
|
||||
base_out = self.base(x)
|
||||
global_feat = self.gap(base_out)
|
||||
global_feat = self.flatten(global_feat)
|
||||
global_feat = self.bn(global_feat)
|
||||
return global_feat
|
||||
Reference in New Issue
Block a user