更换文档检测模型

This commit is contained in:
2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions

View File

@@ -0,0 +1,41 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import fpn
from . import yolo_fpn
from . import hrfpn
from . import ttf_fpn
from . import centernet_fpn
from . import bifpn
from . import csp_pan
from . import es_pan
from . import lc_pan
from . import custom_pan
from . import dilated_encoder
from . import clrnet_fpn
from .fpn import *
from .yolo_fpn import *
from .hrfpn import *
from .ttf_fpn import *
from .centernet_fpn import *
from .blazeface_fpn import *
from .bifpn import *
from .csp_pan import *
from .es_pan import *
from .lc_pan import *
from .custom_pan import *
from .dilated_encoder import *
from .channel_mapper import *
from .clrnet_fpn import *

View File

@@ -0,0 +1,300 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.nn.initializer import Constant
from ppdet.core.workspace import register, serializable
from ppdet.modeling.layers import ConvNormLayer
from ..shape_spec import ShapeSpec
__all__ = ['BiFPN']
class SeparableConvLayer(nn.Layer):
def __init__(self,
in_channels,
out_channels=None,
kernel_size=3,
norm_type='bn',
norm_groups=32,
act='swish'):
super(SeparableConvLayer, self).__init__()
assert norm_type in ['bn', 'sync_bn', 'gn', None]
assert act in ['swish', 'relu', None]
self.in_channels = in_channels
if out_channels is None:
self.out_channels = self.in_channels
self.norm_type = norm_type
self.norm_groups = norm_groups
self.depthwise_conv = nn.Conv2D(
in_channels,
in_channels,
kernel_size,
padding=kernel_size // 2,
groups=in_channels,
bias_attr=False)
self.pointwise_conv = nn.Conv2D(in_channels, self.out_channels, 1)
# norm type
if self.norm_type in ['bn', 'sync_bn']:
self.norm = nn.BatchNorm2D(self.out_channels)
elif self.norm_type == 'gn':
self.norm = nn.GroupNorm(
num_groups=self.norm_groups, num_channels=self.out_channels)
# activation
if act == 'swish':
self.act = nn.Swish()
elif act == 'relu':
self.act = nn.ReLU()
def forward(self, x):
if self.act is not None:
x = self.act(x)
out = self.depthwise_conv(x)
out = self.pointwise_conv(out)
if self.norm_type is not None:
out = self.norm(out)
return out
class BiFPNCell(nn.Layer):
def __init__(self,
channels=256,
num_levels=5,
eps=1e-5,
use_weighted_fusion=True,
kernel_size=3,
norm_type='bn',
norm_groups=32,
act='swish'):
super(BiFPNCell, self).__init__()
self.channels = channels
self.num_levels = num_levels
self.eps = eps
self.use_weighted_fusion = use_weighted_fusion
# up
self.conv_up = nn.LayerList([
SeparableConvLayer(
self.channels,
kernel_size=kernel_size,
norm_type=norm_type,
norm_groups=norm_groups,
act=act) for _ in range(self.num_levels - 1)
])
# down
self.conv_down = nn.LayerList([
SeparableConvLayer(
self.channels,
kernel_size=kernel_size,
norm_type=norm_type,
norm_groups=norm_groups,
act=act) for _ in range(self.num_levels - 1)
])
if self.use_weighted_fusion:
self.up_weights = self.create_parameter(
shape=[self.num_levels - 1, 2],
attr=ParamAttr(initializer=Constant(1.)))
self.down_weights = self.create_parameter(
shape=[self.num_levels - 1, 3],
attr=ParamAttr(initializer=Constant(1.)))
def _feature_fusion_cell(self,
conv_layer,
lateral_feat,
sampling_feat,
route_feat=None,
weights=None):
if self.use_weighted_fusion:
weights = F.relu(weights)
weights = weights / (weights.sum() + self.eps)
if route_feat is not None:
out_feat = weights[0] * lateral_feat + \
weights[1] * sampling_feat + \
weights[2] * route_feat
else:
out_feat = weights[0] * lateral_feat + \
weights[1] * sampling_feat
else:
if route_feat is not None:
out_feat = lateral_feat + sampling_feat + route_feat
else:
out_feat = lateral_feat + sampling_feat
out_feat = conv_layer(out_feat)
return out_feat
def forward(self, feats):
# feats: [P3 - P7]
lateral_feats = []
# up
up_feature = feats[-1]
for i, feature in enumerate(feats[::-1]):
if i == 0:
lateral_feats.append(feature)
else:
shape = paddle.shape(feature)
up_feature = F.interpolate(
up_feature, size=[shape[2], shape[3]])
lateral_feature = self._feature_fusion_cell(
self.conv_up[i - 1],
feature,
up_feature,
weights=self.up_weights[i - 1]
if self.use_weighted_fusion else None)
lateral_feats.append(lateral_feature)
up_feature = lateral_feature
out_feats = []
# down
down_feature = lateral_feats[-1]
for i, (lateral_feature,
route_feature) in enumerate(zip(lateral_feats[::-1], feats)):
if i == 0:
out_feats.append(lateral_feature)
else:
down_feature = F.max_pool2d(down_feature, 3, 2, 1)
if i == len(feats) - 1:
route_feature = None
weights = self.down_weights[
i - 1][:2] if self.use_weighted_fusion else None
else:
weights = self.down_weights[
i - 1] if self.use_weighted_fusion else None
out_feature = self._feature_fusion_cell(
self.conv_down[i - 1],
lateral_feature,
down_feature,
route_feature,
weights=weights)
out_feats.append(out_feature)
down_feature = out_feature
return out_feats
@register
@serializable
class BiFPN(nn.Layer):
"""
Bidirectional Feature Pyramid Network, see https://arxiv.org/abs/1911.09070
Args:
in_channels (list[int]): input channels of each level which can be
derived from the output shape of backbone by from_config.
out_channel (int): output channel of each level.
num_extra_levels (int): the number of extra stages added to the last level.
default: 2
fpn_strides (List): The stride of each level.
num_stacks (int): the number of stacks for BiFPN, default: 1.
use_weighted_fusion (bool): use weighted feature fusion in BiFPN, default: True.
norm_type (string|None): the normalization type in BiFPN module. If
norm_type is None, norm will not be used after conv and if
norm_type is string, bn, gn, sync_bn are available. default: bn.
norm_groups (int): if you use gn, set this param.
act (string|None): the activation function of BiFPN.
"""
def __init__(self,
in_channels=(512, 1024, 2048),
out_channel=256,
num_extra_levels=2,
fpn_strides=[8, 16, 32, 64, 128],
num_stacks=1,
use_weighted_fusion=True,
norm_type='bn',
norm_groups=32,
act='swish'):
super(BiFPN, self).__init__()
assert num_stacks > 0, "The number of stacks of BiFPN is at least 1."
assert norm_type in ['bn', 'sync_bn', 'gn', None]
assert act in ['swish', 'relu', None]
assert num_extra_levels >= 0, \
"The `num_extra_levels` must be non negative(>=0)."
self.in_channels = in_channels
self.out_channel = out_channel
self.num_extra_levels = num_extra_levels
self.num_stacks = num_stacks
self.use_weighted_fusion = use_weighted_fusion
self.norm_type = norm_type
self.norm_groups = norm_groups
self.act = act
self.num_levels = len(self.in_channels) + self.num_extra_levels
if len(fpn_strides) != self.num_levels:
for i in range(self.num_extra_levels):
fpn_strides += [fpn_strides[-1] * 2]
self.fpn_strides = fpn_strides
self.lateral_convs = nn.LayerList()
for in_c in in_channels:
self.lateral_convs.append(
ConvNormLayer(in_c, self.out_channel, 1, 1))
if self.num_extra_levels > 0:
self.extra_convs = nn.LayerList()
for i in range(self.num_extra_levels):
if i == 0:
self.extra_convs.append(
ConvNormLayer(self.in_channels[-1], self.out_channel, 3,
2))
else:
self.extra_convs.append(nn.MaxPool2D(3, 2, 1))
self.bifpn_cells = nn.LayerList()
for i in range(self.num_stacks):
self.bifpn_cells.append(
BiFPNCell(
self.out_channel,
self.num_levels,
use_weighted_fusion=self.use_weighted_fusion,
norm_type=self.norm_type,
norm_groups=self.norm_groups,
act=self.act))
@classmethod
def from_config(cls, cfg, input_shape):
return {
'in_channels': [i.channels for i in input_shape],
'fpn_strides': [i.stride for i in input_shape]
}
@property
def out_shape(self):
return [
ShapeSpec(
channels=self.out_channel, stride=s) for s in self.fpn_strides
]
def forward(self, feats):
assert len(feats) == len(self.in_channels)
fpn_feats = []
for conv_layer, feature in zip(self.lateral_convs, feats):
fpn_feats.append(conv_layer(feature))
if self.num_extra_levels > 0:
feat = feats[-1]
for conv_layer in self.extra_convs:
feat = conv_layer(feat)
fpn_feats.append(feat)
for bifpn_cell in self.bifpn_cells:
fpn_feats = bifpn_cell(fpn_feats)
return fpn_feats

View File

@@ -0,0 +1,213 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn.functional as F
from paddle import ParamAttr
import paddle.nn as nn
from paddle.nn.initializer import KaimingNormal
from ppdet.core.workspace import register, serializable
from ..shape_spec import ShapeSpec
__all__ = ['BlazeNeck']
def hard_swish(x):
return x * F.relu6(x + 3) / 6.
class ConvBNLayer(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
num_groups=1,
act='relu',
conv_lr=0.1,
conv_decay=0.,
norm_decay=0.,
norm_type='bn',
name=None):
super(ConvBNLayer, self).__init__()
self.act = act
self._conv = nn.Conv2D(
in_channels,
out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=num_groups,
weight_attr=ParamAttr(
learning_rate=conv_lr, initializer=KaimingNormal()),
bias_attr=False)
if norm_type in ['sync_bn', 'bn']:
self._batch_norm = nn.BatchNorm2D(out_channels)
def forward(self, x):
x = self._conv(x)
x = self._batch_norm(x)
if self.act == "relu":
x = F.relu(x)
elif self.act == "relu6":
x = F.relu6(x)
elif self.act == 'leaky':
x = F.leaky_relu(x)
elif self.act == 'hard_swish':
x = hard_swish(x)
return x
class FPN(nn.Layer):
def __init__(self, in_channels, out_channels, name=None):
super(FPN, self).__init__()
self.conv1_fpn = ConvBNLayer(
in_channels,
out_channels // 2,
kernel_size=1,
padding=0,
stride=1,
act='leaky',
name=name + '_output1')
self.conv2_fpn = ConvBNLayer(
in_channels,
out_channels // 2,
kernel_size=1,
padding=0,
stride=1,
act='leaky',
name=name + '_output2')
self.conv3_fpn = ConvBNLayer(
out_channels // 2,
out_channels // 2,
kernel_size=3,
padding=1,
stride=1,
act='leaky',
name=name + '_merge')
def forward(self, input):
output1 = self.conv1_fpn(input[0])
output2 = self.conv2_fpn(input[1])
up2 = F.upsample(
output2, size=paddle.shape(output1)[-2:], mode='nearest')
output1 = paddle.add(output1, up2)
output1 = self.conv3_fpn(output1)
return output1, output2
class SSH(nn.Layer):
def __init__(self, in_channels, out_channels, name=None):
super(SSH, self).__init__()
assert out_channels % 4 == 0
self.conv0_ssh = ConvBNLayer(
in_channels,
out_channels // 2,
kernel_size=3,
padding=1,
stride=1,
act=None,
name=name + 'ssh_conv3')
self.conv1_ssh = ConvBNLayer(
out_channels // 2,
out_channels // 4,
kernel_size=3,
padding=1,
stride=1,
act='leaky',
name=name + 'ssh_conv5_1')
self.conv2_ssh = ConvBNLayer(
out_channels // 4,
out_channels // 4,
kernel_size=3,
padding=1,
stride=1,
act=None,
name=name + 'ssh_conv5_2')
self.conv3_ssh = ConvBNLayer(
out_channels // 4,
out_channels // 4,
kernel_size=3,
padding=1,
stride=1,
act='leaky',
name=name + 'ssh_conv7_1')
self.conv4_ssh = ConvBNLayer(
out_channels // 4,
out_channels // 4,
kernel_size=3,
padding=1,
stride=1,
act=None,
name=name + 'ssh_conv7_2')
def forward(self, x):
conv0 = self.conv0_ssh(x)
conv1 = self.conv1_ssh(conv0)
conv2 = self.conv2_ssh(conv1)
conv3 = self.conv3_ssh(conv2)
conv4 = self.conv4_ssh(conv3)
concat = paddle.concat([conv0, conv2, conv4], axis=1)
return F.relu(concat)
@register
@serializable
class BlazeNeck(nn.Layer):
def __init__(self, in_channel, neck_type="None", data_format='NCHW'):
super(BlazeNeck, self).__init__()
self.neck_type = neck_type
self.reture_input = False
self._out_channels = in_channel
if self.neck_type == 'None':
self.reture_input = True
if "fpn" in self.neck_type:
self.fpn = FPN(self._out_channels[0],
self._out_channels[1],
name='fpn')
self._out_channels = [
self._out_channels[0] // 2, self._out_channels[1] // 2
]
if "ssh" in self.neck_type:
self.ssh1 = SSH(self._out_channels[0],
self._out_channels[0],
name='ssh1')
self.ssh2 = SSH(self._out_channels[1],
self._out_channels[1],
name='ssh2')
self._out_channels = [self._out_channels[0], self._out_channels[1]]
def forward(self, inputs):
if self.reture_input:
return inputs
output1, output2 = None, None
if "fpn" in self.neck_type:
backout_4, backout_1 = inputs
output1, output2 = self.fpn([backout_4, backout_1])
if self.neck_type == "only_fpn":
return [output1, output2]
if self.neck_type == "only_ssh":
output1, output2 = inputs
feature1 = self.ssh1(output1)
feature2 = self.ssh2(output2)
return [feature1, feature2]
@property
def out_shape(self):
return [
ShapeSpec(channels=c)
for c in [self._out_channels[0], self._out_channels[1]]
]

View File

@@ -0,0 +1,426 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import math
import paddle
import paddle.nn as nn
from paddle import ParamAttr
from paddle.nn.initializer import Uniform
import paddle.nn.functional as F
from ppdet.core.workspace import register, serializable
from ppdet.modeling.layers import ConvNormLayer
from ppdet.modeling.backbones.hardnet import ConvLayer, HarDBlock
from ..shape_spec import ShapeSpec
__all__ = ['CenterNetDLAFPN', 'CenterNetHarDNetFPN']
# SGE attention
class BasicConv(nn.Layer):
def __init__(self,
in_planes,
out_planes,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
relu=True,
bn=True,
bias_attr=False):
super(BasicConv, self).__init__()
self.out_channels = out_planes
self.conv = nn.Conv2D(
in_planes,
out_planes,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
bias_attr=bias_attr)
self.bn = nn.BatchNorm2D(
out_planes,
epsilon=1e-5,
momentum=0.01,
weight_attr=False,
bias_attr=False) if bn else None
self.relu = nn.ReLU() if relu else None
def forward(self, x):
x = self.conv(x)
if self.bn is not None:
x = self.bn(x)
if self.relu is not None:
x = self.relu(x)
return x
class ChannelPool(nn.Layer):
def forward(self, x):
return paddle.concat(
(paddle.max(x, 1).unsqueeze(1), paddle.mean(x, 1).unsqueeze(1)),
axis=1)
class SpatialGate(nn.Layer):
def __init__(self):
super(SpatialGate, self).__init__()
kernel_size = 7
self.compress = ChannelPool()
self.spatial = BasicConv(
2,
1,
kernel_size,
stride=1,
padding=(kernel_size - 1) // 2,
relu=False)
def forward(self, x):
x_compress = self.compress(x)
x_out = self.spatial(x_compress)
scale = F.sigmoid(x_out) # broadcasting
return x * scale
def fill_up_weights(up):
weight = up.weight.numpy()
f = math.ceil(weight.shape[2] / 2)
c = (2 * f - 1 - f % 2) / (2. * f)
for i in range(weight.shape[2]):
for j in range(weight.shape[3]):
weight[0, 0, i, j] = \
(1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
for c in range(1, weight.shape[0]):
weight[c, 0, :, :] = weight[0, 0, :, :]
up.weight.set_value(weight)
class IDAUp(nn.Layer):
def __init__(self, ch_ins, ch_out, up_strides, dcn_v2=True):
super(IDAUp, self).__init__()
for i in range(1, len(ch_ins)):
ch_in = ch_ins[i]
up_s = int(up_strides[i])
fan_in = ch_in * 3 * 3
stdv = 1. / math.sqrt(fan_in)
proj = nn.Sequential(
ConvNormLayer(
ch_in,
ch_out,
filter_size=3,
stride=1,
use_dcn=dcn_v2,
bias_on=dcn_v2,
norm_decay=None,
dcn_lr_scale=1.,
dcn_regularizer=None,
initializer=Uniform(-stdv, stdv)),
nn.ReLU())
node = nn.Sequential(
ConvNormLayer(
ch_out,
ch_out,
filter_size=3,
stride=1,
use_dcn=dcn_v2,
bias_on=dcn_v2,
norm_decay=None,
dcn_lr_scale=1.,
dcn_regularizer=None,
initializer=Uniform(-stdv, stdv)),
nn.ReLU())
kernel_size = up_s * 2
fan_in = ch_out * kernel_size * kernel_size
stdv = 1. / math.sqrt(fan_in)
up = nn.Conv2DTranspose(
ch_out,
ch_out,
kernel_size=up_s * 2,
stride=up_s,
padding=up_s // 2,
groups=ch_out,
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)),
bias_attr=False)
fill_up_weights(up)
setattr(self, 'proj_' + str(i), proj)
setattr(self, 'up_' + str(i), up)
setattr(self, 'node_' + str(i), node)
def forward(self, inputs, start_level, end_level):
for i in range(start_level + 1, end_level):
upsample = getattr(self, 'up_' + str(i - start_level))
project = getattr(self, 'proj_' + str(i - start_level))
inputs[i] = project(inputs[i])
inputs[i] = upsample(inputs[i])
node = getattr(self, 'node_' + str(i - start_level))
inputs[i] = node(paddle.add(inputs[i], inputs[i - 1]))
return inputs
class DLAUp(nn.Layer):
def __init__(self, start_level, channels, scales, ch_in=None, dcn_v2=True):
super(DLAUp, self).__init__()
self.start_level = start_level
if ch_in is None:
ch_in = channels
self.channels = channels
channels = list(channels)
scales = np.array(scales, dtype=int)
for i in range(len(channels) - 1):
j = -i - 2
setattr(
self,
'ida_{}'.format(i),
IDAUp(
ch_in[j:],
channels[j],
scales[j:] // scales[j],
dcn_v2=dcn_v2))
scales[j + 1:] = scales[j]
ch_in[j + 1:] = [channels[j] for _ in channels[j + 1:]]
def forward(self, inputs):
out = [inputs[-1]] # start with 32
for i in range(len(inputs) - self.start_level - 1):
ida = getattr(self, 'ida_{}'.format(i))
outputs = ida(inputs, len(inputs) - i - 2, len(inputs))
out.insert(0, outputs[-1])
return out
@register
@serializable
class CenterNetDLAFPN(nn.Layer):
"""
Args:
in_channels (list): number of input feature channels from backbone.
[16, 32, 64, 128, 256, 512] by default, means the channels of DLA-34
down_ratio (int): the down ratio from images to heatmap, 4 by default
last_level (int): the last level of input feature fed into the upsamplng block
out_channel (int): the channel of the output feature, 0 by default means
the channel of the input feature whose down ratio is `down_ratio`
first_level (None): the first level of input feature fed into the upsamplng block.
if None, the first level stands for logs(down_ratio)
dcn_v2 (bool): whether use the DCNv2, True by default
with_sge (bool): whether use SGE attention, False by default
"""
def __init__(self,
in_channels,
down_ratio=4,
last_level=5,
out_channel=0,
first_level=None,
dcn_v2=True,
with_sge=False):
super(CenterNetDLAFPN, self).__init__()
self.first_level = int(np.log2(
down_ratio)) if first_level is None else first_level
assert self.first_level >= 0, "first level in CenterNetDLAFPN should be greater or equal to 0, but received {}".format(
self.first_level)
self.down_ratio = down_ratio
self.last_level = last_level
scales = [2**i for i in range(len(in_channels[self.first_level:]))]
self.dla_up = DLAUp(
self.first_level,
in_channels[self.first_level:],
scales,
dcn_v2=dcn_v2)
self.out_channel = out_channel
if out_channel == 0:
self.out_channel = in_channels[self.first_level]
self.ida_up = IDAUp(
in_channels[self.first_level:self.last_level],
self.out_channel,
[2**i for i in range(self.last_level - self.first_level)],
dcn_v2=dcn_v2)
self.with_sge = with_sge
if self.with_sge:
self.sge_attention = SpatialGate()
@classmethod
def from_config(cls, cfg, input_shape):
return {'in_channels': [i.channels for i in input_shape]}
def forward(self, body_feats):
inputs = [body_feats[i] for i in range(len(body_feats))]
dla_up_feats = self.dla_up(inputs)
ida_up_feats = []
for i in range(self.last_level - self.first_level):
ida_up_feats.append(dla_up_feats[i].clone())
self.ida_up(ida_up_feats, 0, len(ida_up_feats))
feat = ida_up_feats[-1]
if self.with_sge:
feat = self.sge_attention(feat)
if self.down_ratio != 4:
feat = F.interpolate(
feat,
scale_factor=self.down_ratio // 4,
mode="bilinear",
align_corners=True)
return feat
@property
def out_shape(self):
return [ShapeSpec(channels=self.out_channel, stride=self.down_ratio)]
class TransitionUp(nn.Layer):
def __init__(self, in_channels, out_channels):
super().__init__()
def forward(self, x, skip):
w, h = skip.shape[2], skip.shape[3]
out = F.interpolate(x, size=(w, h), mode="bilinear", align_corners=True)
out = paddle.concat([out, skip], 1)
return out
@register
@serializable
class CenterNetHarDNetFPN(nn.Layer):
"""
Args:
in_channels (list): number of input feature channels from backbone.
[96, 214, 458, 784] by default, means the channels of HarDNet85
num_layers (int): HarDNet laters, 85 by default
down_ratio (int): the down ratio from images to heatmap, 4 by default
first_level (int|None): the first level of input feature fed into the upsamplng block.
if None, the first level stands for logs(down_ratio) - 1
last_level (int): the last level of input feature fed into the upsamplng block
out_channel (int): the channel of the output feature, 0 by default means
the channel of the input feature whose down ratio is `down_ratio`
"""
def __init__(self,
in_channels,
num_layers=85,
down_ratio=4,
first_level=None,
last_level=4,
out_channel=0):
super(CenterNetHarDNetFPN, self).__init__()
self.first_level = int(np.log2(
down_ratio)) - 1 if first_level is None else first_level
assert self.first_level >= 0, "first level in CenterNetDLAFPN should be greater or equal to 0, but received {}".format(
self.first_level)
self.down_ratio = down_ratio
self.last_level = last_level
self.last_pool = nn.AvgPool2D(kernel_size=2, stride=2)
assert num_layers in [68, 85], "HarDNet-{} not support.".format(
num_layers)
if num_layers == 85:
self.last_proj = ConvLayer(784, 256, kernel_size=1)
self.last_blk = HarDBlock(768, 80, 1.7, 8)
self.skip_nodes = [1, 3, 8, 13]
self.SC = [32, 32, 0]
gr = [64, 48, 28]
layers = [8, 8, 4]
ch_list2 = [224 + self.SC[0], 160 + self.SC[1], 96 + self.SC[2]]
channels = [96, 214, 458, 784]
self.skip_lv = 3
elif num_layers == 68:
self.last_proj = ConvLayer(654, 192, kernel_size=1)
self.last_blk = HarDBlock(576, 72, 1.7, 8)
self.skip_nodes = [1, 3, 8, 11]
self.SC = [32, 32, 0]
gr = [48, 32, 20]
layers = [8, 8, 4]
ch_list2 = [224 + self.SC[0], 96 + self.SC[1], 64 + self.SC[2]]
channels = [64, 124, 328, 654]
self.skip_lv = 2
self.transUpBlocks = nn.LayerList([])
self.denseBlocksUp = nn.LayerList([])
self.conv1x1_up = nn.LayerList([])
self.avg9x9 = nn.AvgPool2D(kernel_size=(9, 9), stride=1, padding=(4, 4))
prev_ch = self.last_blk.get_out_ch()
for i in range(3):
skip_ch = channels[3 - i]
self.transUpBlocks.append(TransitionUp(prev_ch, prev_ch))
if i < self.skip_lv:
cur_ch = prev_ch + skip_ch
else:
cur_ch = prev_ch
self.conv1x1_up.append(
ConvLayer(
cur_ch, ch_list2[i], kernel_size=1))
cur_ch = ch_list2[i]
cur_ch -= self.SC[i]
cur_ch *= 3
blk = HarDBlock(cur_ch, gr[i], 1.7, layers[i])
self.denseBlocksUp.append(blk)
prev_ch = blk.get_out_ch()
prev_ch += self.SC[0] + self.SC[1] + self.SC[2]
self.out_channel = prev_ch
@classmethod
def from_config(cls, cfg, input_shape):
return {'in_channels': [i.channels for i in input_shape]}
def forward(self, body_feats):
x = body_feats[-1]
x_sc = []
x = self.last_proj(x)
x = self.last_pool(x)
x2 = self.avg9x9(x)
x3 = x / (x.sum((2, 3), keepdim=True) + 0.1)
x = paddle.concat([x, x2, x3], 1)
x = self.last_blk(x)
for i in range(3):
skip_x = body_feats[3 - i]
x_up = self.transUpBlocks[i](x, skip_x)
x_ch = self.conv1x1_up[i](x_up)
if self.SC[i] > 0:
end = x_ch.shape[1]
new_st = end - self.SC[i]
x_sc.append(x_ch[:, new_st:, :, :])
x_ch = x_ch[:, :new_st, :, :]
x2 = self.avg9x9(x_ch)
x3 = x_ch / (x_ch.sum((2, 3), keepdim=True) + 0.1)
x_new = paddle.concat([x_ch, x2, x3], 1)
x = self.denseBlocksUp[i](x_new)
scs = [x]
for i in range(3):
if self.SC[i] > 0:
scs.insert(
0,
F.interpolate(
x_sc[i],
size=(x.shape[2], x.shape[3]),
mode="bilinear",
align_corners=True))
neck_feat = paddle.concat(scs, 1)
return neck_feat
@property
def out_shape(self):
return [ShapeSpec(channels=self.out_channel, stride=self.down_ratio)]

View File

@@ -0,0 +1,122 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
this code is base on mmdet: git@github.com:open-mmlab/mmdetection.git
"""
import paddle.nn as nn
from ppdet.core.workspace import register, serializable
from ..backbones.hrnet import ConvNormLayer
from ..shape_spec import ShapeSpec
from ..initializer import xavier_uniform_, constant_
__all__ = ['ChannelMapper']
@register
@serializable
class ChannelMapper(nn.Layer):
"""Channel Mapper to reduce/increase channels of backbone features.
This is used to reduce/increase channels of backbone features.
Args:
in_channels (List[int]): Number of input channels per scale.
out_channels (int): Number of output channels (used at each scale).
kernel_size (int, optional): kernel_size for reducing channels (used
at each scale). Default: 3.
conv_cfg (dict, optional): Config dict for convolution layer.
Default: None.
norm_cfg (dict, optional): Config dict for normalization layer.
Default: None.
act_cfg (dict, optional): Config dict for activation layer in
ConvModule. Default: dict(type='ReLU').
num_outs (int, optional): Number of output feature maps. There
would be extra_convs when num_outs larger than the length
of in_channels.
init_cfg (dict or list[dict], optional): Initialization config dict.
"""
def __init__(self,
in_channels,
out_channels,
kernel_size=3,
norm_type="gn",
norm_groups=32,
act='relu',
num_outs=None,
init_cfg=dict(
type='Xavier', layer='Conv2d', distribution='uniform')):
super(ChannelMapper, self).__init__()
assert isinstance(in_channels, list)
self.extra_convs = None
if num_outs is None:
num_outs = len(in_channels)
self.convs = nn.LayerList()
for in_channel in in_channels:
self.convs.append(
ConvNormLayer(
ch_in=in_channel,
ch_out=out_channels,
filter_size=kernel_size,
norm_type='gn',
norm_groups=32,
act=act))
if num_outs > len(in_channels):
self.extra_convs = nn.LayerList()
for i in range(len(in_channels), num_outs):
if i == len(in_channels):
in_channel = in_channels[-1]
else:
in_channel = out_channels
self.extra_convs.append(
ConvNormLayer(
ch_in=in_channel,
ch_out=out_channels,
filter_size=3,
stride=2,
norm_type='gn',
norm_groups=32,
act=act))
self.init_weights()
def forward(self, inputs):
"""Forward function."""
assert len(inputs) == len(self.convs)
outs = [self.convs[i](inputs[i]) for i in range(len(inputs))]
if self.extra_convs:
for i in range(len(self.extra_convs)):
if i == 0:
outs.append(self.extra_convs[0](inputs[-1]))
else:
outs.append(self.extra_convs[i](outs[-1]))
return tuple(outs)
@property
def out_shape(self):
return [
ShapeSpec(
channels=self.out_channel, stride=1. / s)
for s in self.spatial_scales
]
def init_weights(self):
"""Initialize the transformer weights."""
for p in self.parameters():
if p.rank() > 1:
xavier_uniform_(p)
if hasattr(p, 'bias') and p.bias is not None:
constant_(p.bais)

View File

@@ -0,0 +1,254 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.nn.initializer import XavierUniform
from ppdet.modeling.initializer import kaiming_normal_, constant_
from ppdet.core.workspace import register, serializable
from ppdet.modeling.layers import ConvNormLayer
from ppdet.modeling.shape_spec import ShapeSpec
__all__ = ['CLRFPN']
@register
@serializable
class CLRFPN(nn.Layer):
"""
Feature Pyramid Network, see https://arxiv.org/abs/1612.03144
Args:
in_channels (list[int]): input channels of each level which can be
derived from the output shape of backbone by from_config
out_channel (int): output channel of each level
spatial_scales (list[float]): the spatial scales between input feature
maps and original input image which can be derived from the output
shape of backbone by from_config
has_extra_convs (bool): whether to add extra conv to the last level.
default False
extra_stage (int): the number of extra stages added to the last level.
default 1
use_c5 (bool): Whether to use c5 as the input of extra stage,
otherwise p5 is used. default True
norm_type (string|None): The normalization type in FPN module. If
norm_type is None, norm will not be used after conv and if
norm_type is string, bn, gn, sync_bn are available. default None
norm_decay (float): weight decay for normalization layer weights.
default 0.
freeze_norm (bool): whether to freeze normalization layer.
default False
relu_before_extra_convs (bool): whether to add relu before extra convs.
default False
"""
def __init__(self,
in_channels,
out_channel,
spatial_scales=[0.25, 0.125, 0.0625, 0.03125],
has_extra_convs=False,
extra_stage=1,
use_c5=True,
norm_type=None,
norm_decay=0.,
freeze_norm=False,
relu_before_extra_convs=True):
super(CLRFPN, self).__init__()
self.out_channel = out_channel
for s in range(extra_stage):
spatial_scales = spatial_scales + [spatial_scales[-1] / 2.]
self.spatial_scales = spatial_scales
self.has_extra_convs = has_extra_convs
self.extra_stage = extra_stage
self.use_c5 = use_c5
self.relu_before_extra_convs = relu_before_extra_convs
self.norm_type = norm_type
self.norm_decay = norm_decay
self.freeze_norm = freeze_norm
self.in_channels = in_channels
self.lateral_convs = []
self.fpn_convs = []
fan = out_channel * 3 * 3
# stage index 0,1,2,3 stands for res2,res3,res4,res5 on ResNet Backbone
# 0 <= st_stage < ed_stage <= 3
st_stage = 4 - len(in_channels)
ed_stage = st_stage + len(in_channels) - 1
for i in range(st_stage, ed_stage + 1):
# if i == 3:
# lateral_name = 'fpn_inner_res5_sum'
# else:
# lateral_name = 'fpn_inner_res{}_sum_lateral'.format(i + 2)
lateral_name = "lateral_convs.{}.conv".format(i - 1)
in_c = in_channels[i - st_stage]
if self.norm_type is not None:
lateral = self.add_sublayer(
lateral_name,
ConvNormLayer(
ch_in=in_c,
ch_out=out_channel,
filter_size=1,
stride=1,
norm_type=self.norm_type,
norm_decay=self.norm_decay,
freeze_norm=self.freeze_norm,
initializer=XavierUniform(fan_out=in_c)))
else:
lateral = self.add_sublayer(
lateral_name,
nn.Conv2D(
in_channels=in_c,
out_channels=out_channel,
kernel_size=1,
weight_attr=ParamAttr(
initializer=XavierUniform(fan_out=in_c))))
self.lateral_convs.append(lateral)
fpn_name = "fpn_convs.{}.conv".format(i - 1)
if self.norm_type is not None:
fpn_conv = self.add_sublayer(
fpn_name,
ConvNormLayer(
ch_in=out_channel,
ch_out=out_channel,
filter_size=3,
stride=1,
norm_type=self.norm_type,
norm_decay=self.norm_decay,
freeze_norm=self.freeze_norm,
initializer=XavierUniform(fan_out=fan)))
else:
fpn_conv = self.add_sublayer(
fpn_name,
nn.Conv2D(
in_channels=out_channel,
out_channels=out_channel,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(
initializer=XavierUniform(fan_out=fan))))
self.fpn_convs.append(fpn_conv)
# add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
if self.has_extra_convs:
for i in range(self.extra_stage):
lvl = ed_stage + 1 + i
if i == 0 and self.use_c5:
in_c = in_channels[-1]
else:
in_c = out_channel
extra_fpn_name = 'fpn_{}'.format(lvl + 2)
if self.norm_type is not None:
extra_fpn_conv = self.add_sublayer(
extra_fpn_name,
ConvNormLayer(
ch_in=in_c,
ch_out=out_channel,
filter_size=3,
stride=2,
norm_type=self.norm_type,
norm_decay=self.norm_decay,
freeze_norm=self.freeze_norm,
initializer=XavierUniform(fan_out=fan)))
else:
extra_fpn_conv = self.add_sublayer(
extra_fpn_name,
nn.Conv2D(
in_channels=in_c,
out_channels=out_channel,
kernel_size=3,
stride=2,
padding=1,
weight_attr=ParamAttr(
initializer=XavierUniform(fan_out=fan))))
self.fpn_convs.append(extra_fpn_conv)
self.init_weights()
def init_weights(self):
for m in self.lateral_convs:
if isinstance(m, (nn.Conv1D, nn.Conv2D)):
kaiming_normal_(
m.weight, a=0, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
constant_(m.bias, value=0.)
elif isinstance(m, (nn.BatchNorm1D, nn.BatchNorm2D)):
constant_(m.weight, value=1)
constant_(m.bias, value=0)
for m in self.fpn_convs:
if isinstance(m, (nn.Conv1D, nn.Conv2D)):
kaiming_normal_(
m.weight, a=0, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
constant_(m.bias, value=0.)
elif isinstance(m, (nn.BatchNorm1D, nn.BatchNorm2D)):
constant_(m.weight, value=1)
constant_(m.bias, value=0)
@classmethod
def from_config(cls, cfg, input_shape):
return {}
def forward(self, body_feats):
laterals = []
if len(body_feats) > len(self.in_channels):
for _ in range(len(body_feats) - len(self.in_channels)):
del body_feats[0]
num_levels = len(body_feats)
# print("body_feats",num_levels)
for i in range(num_levels):
laterals.append(self.lateral_convs[i](body_feats[i]))
for i in range(1, num_levels):
lvl = num_levels - i
upsample = F.interpolate(
laterals[lvl],
scale_factor=2.,
mode='nearest', )
laterals[lvl - 1] += upsample
fpn_output = []
for lvl in range(num_levels):
fpn_output.append(self.fpn_convs[lvl](laterals[lvl]))
if self.extra_stage > 0:
# use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN)
if not self.has_extra_convs:
assert self.extra_stage == 1, 'extra_stage should be 1 if FPN has not extra convs'
fpn_output.append(F.max_pool2d(fpn_output[-1], 1, stride=2))
# add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
else:
if self.use_c5:
extra_source = body_feats[-1]
else:
extra_source = fpn_output[-1]
fpn_output.append(self.fpn_convs[num_levels](extra_source))
for i in range(1, self.extra_stage):
if self.relu_before_extra_convs:
fpn_output.append(self.fpn_convs[num_levels + i](F.relu(
fpn_output[-1])))
else:
fpn_output.append(self.fpn_convs[num_levels + i](
fpn_output[-1]))
return fpn_output
@property
def out_shape(self):
return [
ShapeSpec(
channels=self.out_channel, stride=1. / s)
for s in self.spatial_scales
]

View File

@@ -0,0 +1,363 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# The code is based on:
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/necks/yolox_pafpn.py
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from ppdet.core.workspace import register, serializable
from ..shape_spec import ShapeSpec
__all__ = ['CSPPAN']
class ConvBNLayer(nn.Layer):
def __init__(self,
in_channel=96,
out_channel=96,
kernel_size=3,
stride=1,
groups=1,
act='leaky_relu'):
super(ConvBNLayer, self).__init__()
initializer = nn.initializer.KaimingUniform()
self.conv = nn.Conv2D(
in_channels=in_channel,
out_channels=out_channel,
kernel_size=kernel_size,
groups=groups,
padding=(kernel_size - 1) // 2,
stride=stride,
weight_attr=ParamAttr(initializer=initializer),
bias_attr=False)
self.bn = nn.BatchNorm2D(out_channel)
if act == "hard_swish":
act = 'hardswish'
self.act = act
def forward(self, x):
x = self.bn(self.conv(x))
if self.act:
x = getattr(F, self.act)(x)
return x
class DPModule(nn.Layer):
"""
Depth-wise and point-wise module.
Args:
in_channel (int): The input channels of this Module.
out_channel (int): The output channels of this Module.
kernel_size (int): The conv2d kernel size of this Module.
stride (int): The conv2d's stride of this Module.
act (str): The activation function of this Module,
Now support `leaky_relu` and `hard_swish`.
"""
def __init__(self,
in_channel=96,
out_channel=96,
kernel_size=3,
stride=1,
act='leaky_relu',
use_act_in_out=True):
super(DPModule, self).__init__()
initializer = nn.initializer.KaimingUniform()
self.use_act_in_out = use_act_in_out
self.dwconv = nn.Conv2D(
in_channels=in_channel,
out_channels=out_channel,
kernel_size=kernel_size,
groups=out_channel,
padding=(kernel_size - 1) // 2,
stride=stride,
weight_attr=ParamAttr(initializer=initializer),
bias_attr=False)
self.bn1 = nn.BatchNorm2D(out_channel)
self.pwconv = nn.Conv2D(
in_channels=out_channel,
out_channels=out_channel,
kernel_size=1,
groups=1,
padding=0,
weight_attr=ParamAttr(initializer=initializer),
bias_attr=False)
self.bn2 = nn.BatchNorm2D(out_channel)
if act == "hard_swish":
act = 'hardswish'
self.act = act
def forward(self, x):
x = self.bn1(self.dwconv(x))
if self.act:
x = getattr(F, self.act)(x)
x = self.bn2(self.pwconv(x))
if self.use_act_in_out and self.act:
x = getattr(F, self.act)(x)
return x
class DarknetBottleneck(nn.Layer):
"""The basic bottleneck block used in Darknet.
Each Block consists of two ConvModules and the input is added to the
final output. Each ConvModule is composed of Conv, BN, and act.
The first convLayer has filter size of 1x1 and the second one has the
filter size of 3x3.
Args:
in_channels (int): The input channels of this Module.
out_channels (int): The output channels of this Module.
expansion (int): The kernel size of the convolution. Default: 0.5
add_identity (bool): Whether to add identity to the out.
Default: True
use_depthwise (bool): Whether to use depthwise separable convolution.
Default: False
"""
def __init__(self,
in_channels,
out_channels,
kernel_size=3,
expansion=0.5,
add_identity=True,
use_depthwise=False,
act="leaky_relu"):
super(DarknetBottleneck, self).__init__()
hidden_channels = int(out_channels * expansion)
conv_func = DPModule if use_depthwise else ConvBNLayer
self.conv1 = ConvBNLayer(
in_channel=in_channels,
out_channel=hidden_channels,
kernel_size=1,
act=act)
self.conv2 = conv_func(
in_channel=hidden_channels,
out_channel=out_channels,
kernel_size=kernel_size,
stride=1,
act=act)
self.add_identity = \
add_identity and in_channels == out_channels
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.conv2(out)
if self.add_identity:
return out + identity
else:
return out
class CSPLayer(nn.Layer):
"""Cross Stage Partial Layer.
Args:
in_channels (int): The input channels of the CSP layer.
out_channels (int): The output channels of the CSP layer.
expand_ratio (float): Ratio to adjust the number of channels of the
hidden layer. Default: 0.5
num_blocks (int): Number of blocks. Default: 1
add_identity (bool): Whether to add identity in blocks.
Default: True
use_depthwise (bool): Whether to depthwise separable convolution in
blocks. Default: False
"""
def __init__(self,
in_channels,
out_channels,
kernel_size=3,
expand_ratio=0.5,
num_blocks=1,
add_identity=True,
use_depthwise=False,
act="leaky_relu"):
super().__init__()
mid_channels = int(out_channels * expand_ratio)
self.main_conv = ConvBNLayer(in_channels, mid_channels, 1, act=act)
self.short_conv = ConvBNLayer(in_channels, mid_channels, 1, act=act)
self.final_conv = ConvBNLayer(
2 * mid_channels, out_channels, 1, act=act)
self.blocks = nn.Sequential(* [
DarknetBottleneck(
mid_channels,
mid_channels,
kernel_size,
1.0,
add_identity,
use_depthwise,
act=act) for _ in range(num_blocks)
])
def forward(self, x):
x_short = self.short_conv(x)
x_main = self.main_conv(x)
x_main = self.blocks(x_main)
x_final = paddle.concat((x_main, x_short), axis=1)
return self.final_conv(x_final)
class Channel_T(nn.Layer):
def __init__(self,
in_channels=[116, 232, 464],
out_channels=96,
act="leaky_relu"):
super(Channel_T, self).__init__()
self.convs = nn.LayerList()
for i in range(len(in_channels)):
self.convs.append(
ConvBNLayer(
in_channels[i], out_channels, 1, act=act))
def forward(self, x):
outs = [self.convs[i](x[i]) for i in range(len(x))]
return outs
@register
@serializable
class CSPPAN(nn.Layer):
"""Path Aggregation Network with CSP module.
Args:
in_channels (List[int]): Number of input channels per scale.
out_channels (int): Number of output channels (used at each scale)
kernel_size (int): The conv2d kernel size of this Module.
num_features (int): Number of output features of CSPPAN module.
num_csp_blocks (int): Number of bottlenecks in CSPLayer. Default: 1
use_depthwise (bool): Whether to depthwise separable convolution in
blocks. Default: True
"""
def __init__(self,
in_channels,
out_channels,
kernel_size=5,
num_features=3,
num_csp_blocks=1,
use_depthwise=True,
act='hard_swish',
spatial_scales=[0.125, 0.0625, 0.03125]):
super(CSPPAN, self).__init__()
self.conv_t = Channel_T(in_channels, out_channels, act=act)
in_channels = [out_channels] * len(spatial_scales)
self.in_channels = in_channels
self.out_channels = out_channels
self.spatial_scales = spatial_scales
self.num_features = num_features
conv_func = DPModule if use_depthwise else ConvBNLayer
if self.num_features == 4:
self.first_top_conv = conv_func(
in_channels[0], in_channels[0], kernel_size, stride=2, act=act)
self.second_top_conv = conv_func(
in_channels[0], in_channels[0], kernel_size, stride=2, act=act)
self.spatial_scales.append(self.spatial_scales[-1] / 2)
# build top-down blocks
self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
self.top_down_blocks = nn.LayerList()
for idx in range(len(in_channels) - 1, 0, -1):
self.top_down_blocks.append(
CSPLayer(
in_channels[idx - 1] * 2,
in_channels[idx - 1],
kernel_size=kernel_size,
num_blocks=num_csp_blocks,
add_identity=False,
use_depthwise=use_depthwise,
act=act))
# build bottom-up blocks
self.downsamples = nn.LayerList()
self.bottom_up_blocks = nn.LayerList()
for idx in range(len(in_channels) - 1):
self.downsamples.append(
conv_func(
in_channels[idx],
in_channels[idx],
kernel_size=kernel_size,
stride=2,
act=act))
self.bottom_up_blocks.append(
CSPLayer(
in_channels[idx] * 2,
in_channels[idx + 1],
kernel_size=kernel_size,
num_blocks=num_csp_blocks,
add_identity=False,
use_depthwise=use_depthwise,
act=act))
def forward(self, inputs):
"""
Args:
inputs (tuple[Tensor]): input features.
Returns:
tuple[Tensor]: CSPPAN features.
"""
assert len(inputs) == len(self.in_channels)
inputs = self.conv_t(inputs)
# top-down path
inner_outs = [inputs[-1]]
for idx in range(len(self.in_channels) - 1, 0, -1):
feat_heigh = inner_outs[0]
feat_low = inputs[idx - 1]
upsample_feat = self.upsample(feat_heigh)
inner_out = self.top_down_blocks[len(self.in_channels) - 1 - idx](
paddle.concat([upsample_feat, feat_low], 1))
inner_outs.insert(0, inner_out)
# bottom-up path
outs = [inner_outs[0]]
for idx in range(len(self.in_channels) - 1):
feat_low = outs[-1]
feat_height = inner_outs[idx + 1]
downsample_feat = self.downsamples[idx](feat_low)
out = self.bottom_up_blocks[idx](paddle.concat(
[downsample_feat, feat_height], 1))
outs.append(out)
top_features = None
if self.num_features == 4:
top_features = self.first_top_conv(inputs[-1])
top_features = top_features + self.second_top_conv(outs[-1])
outs.append(top_features)
return tuple(outs)
@property
def out_shape(self):
return [
ShapeSpec(
channels=self.out_channels, stride=1. / s)
for s in self.spatial_scales
]
@classmethod
def from_config(cls, cfg, input_shape):
return {'in_channels': [i.channels for i in input_shape], }

View File

@@ -0,0 +1,398 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import copy
import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register, serializable
from ppdet.modeling.layers import DropBlock, MultiHeadAttention
from ppdet.modeling.ops import get_act_fn
from ..backbones.cspresnet import ConvBNLayer, BasicBlock
from ..shape_spec import ShapeSpec
from ..initializer import linear_init_
__all__ = ['CustomCSPPAN']
def _get_clones(module, N):
return nn.LayerList([copy.deepcopy(module) for _ in range(N)])
class SPP(nn.Layer):
def __init__(self,
ch_in,
ch_out,
k,
pool_size,
act='swish',
data_format='NCHW'):
super(SPP, self).__init__()
self.pool = []
self.data_format = data_format
for i, size in enumerate(pool_size):
pool = self.add_sublayer(
'pool{}'.format(i),
nn.MaxPool2D(
kernel_size=size,
stride=1,
padding=size // 2,
data_format=data_format,
ceil_mode=False))
self.pool.append(pool)
self.conv = ConvBNLayer(ch_in, ch_out, k, padding=k // 2, act=act)
def forward(self, x):
outs = [x]
for pool in self.pool:
outs.append(pool(x))
if self.data_format == 'NCHW':
y = paddle.concat(outs, axis=1)
else:
y = paddle.concat(outs, axis=-1)
y = self.conv(y)
return y
class CSPStage(nn.Layer):
def __init__(self,
block_fn,
ch_in,
ch_out,
n,
act='swish',
spp=False,
use_alpha=False):
super(CSPStage, self).__init__()
ch_mid = int(ch_out // 2)
self.conv1 = ConvBNLayer(ch_in, ch_mid, 1, act=act)
self.conv2 = ConvBNLayer(ch_in, ch_mid, 1, act=act)
self.convs = nn.Sequential()
next_ch_in = ch_mid
for i in range(n):
self.convs.add_sublayer(
str(i),
eval(block_fn)(next_ch_in,
ch_mid,
act=act,
shortcut=False,
use_alpha=use_alpha))
if i == (n - 1) // 2 and spp:
self.convs.add_sublayer(
'spp', SPP(ch_mid * 4, ch_mid, 1, [5, 9, 13], act=act))
next_ch_in = ch_mid
self.conv3 = ConvBNLayer(ch_mid * 2, ch_out, 1, act=act)
def forward(self, x):
y1 = self.conv1(x)
y2 = self.conv2(x)
y2 = self.convs(y2)
y = paddle.concat([y1, y2], axis=1)
y = self.conv3(y)
return y
class TransformerEncoderLayer(nn.Layer):
def __init__(self,
d_model,
nhead,
dim_feedforward=2048,
dropout=0.1,
activation="relu",
attn_dropout=None,
act_dropout=None,
normalize_before=False):
super(TransformerEncoderLayer, self).__init__()
attn_dropout = dropout if attn_dropout is None else attn_dropout
act_dropout = dropout if act_dropout is None else act_dropout
self.normalize_before = normalize_before
self.self_attn = MultiHeadAttention(d_model, nhead, attn_dropout)
# Implementation of Feedforward model
self.linear1 = nn.Linear(d_model, dim_feedforward)
self.dropout = nn.Dropout(act_dropout, mode="upscale_in_train")
self.linear2 = nn.Linear(dim_feedforward, d_model)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.dropout1 = nn.Dropout(dropout, mode="upscale_in_train")
self.dropout2 = nn.Dropout(dropout, mode="upscale_in_train")
self.activation = getattr(F, activation)
self._reset_parameters()
def _reset_parameters(self):
linear_init_(self.linear1)
linear_init_(self.linear2)
@staticmethod
def with_pos_embed(tensor, pos_embed):
return tensor if pos_embed is None else tensor + pos_embed
def forward(self, src, src_mask=None, pos_embed=None):
residual = src
if self.normalize_before:
src = self.norm1(src)
q = k = self.with_pos_embed(src, pos_embed)
src = self.self_attn(q, k, value=src, attn_mask=src_mask)
src = residual + self.dropout1(src)
if not self.normalize_before:
src = self.norm1(src)
residual = src
if self.normalize_before:
src = self.norm2(src)
src = self.linear2(self.dropout(self.activation(self.linear1(src))))
src = residual + self.dropout2(src)
if not self.normalize_before:
src = self.norm2(src)
return src
class TransformerEncoder(nn.Layer):
def __init__(self, encoder_layer, num_layers, norm=None):
super(TransformerEncoder, self).__init__()
self.layers = _get_clones(encoder_layer, num_layers)
self.num_layers = num_layers
self.norm = norm
def forward(self, src, src_mask=None, pos_embed=None):
output = src
for layer in self.layers:
output = layer(output, src_mask=src_mask, pos_embed=pos_embed)
if self.norm is not None:
output = self.norm(output)
return output
@register
@serializable
class CustomCSPPAN(nn.Layer):
__shared__ = [
'norm_type', 'data_format', 'width_mult', 'depth_mult', 'trt',
'eval_size'
]
def __init__(self,
in_channels=[256, 512, 1024],
out_channels=[1024, 512, 256],
norm_type='bn',
act='leaky',
stage_fn='CSPStage',
block_fn='BasicBlock',
stage_num=1,
block_num=3,
drop_block=False,
block_size=3,
keep_prob=0.9,
spp=False,
data_format='NCHW',
width_mult=1.0,
depth_mult=1.0,
use_alpha=False,
trt=False,
dim_feedforward=2048,
dropout=0.1,
activation='gelu',
nhead=4,
num_layers=4,
attn_dropout=None,
act_dropout=None,
normalize_before=False,
use_trans=False,
eval_size=None):
super(CustomCSPPAN, self).__init__()
out_channels = [max(round(c * width_mult), 1) for c in out_channels]
block_num = max(round(block_num * depth_mult), 1)
act = get_act_fn(
act, trt=trt) if act is None or isinstance(act,
(str, dict)) else act
self.num_blocks = len(in_channels)
self.data_format = data_format
self._out_channels = out_channels
self.hidden_dim = in_channels[-1]
in_channels = in_channels[::-1]
self.use_trans = use_trans
self.eval_size = eval_size
if use_trans:
if eval_size is not None:
self.pos_embed = self.build_2d_sincos_position_embedding(
eval_size[1] // 32,
eval_size[0] // 32,
embed_dim=self.hidden_dim)
else:
self.pos_embed = None
encoder_layer = TransformerEncoderLayer(
self.hidden_dim, nhead, dim_feedforward, dropout, activation,
attn_dropout, act_dropout, normalize_before)
encoder_norm = nn.LayerNorm(
self.hidden_dim) if normalize_before else None
self.encoder = TransformerEncoder(encoder_layer, num_layers,
encoder_norm)
fpn_stages = []
fpn_routes = []
for i, (ch_in, ch_out) in enumerate(zip(in_channels, out_channels)):
if i > 0:
ch_in += ch_pre // 2
stage = nn.Sequential()
for j in range(stage_num):
stage.add_sublayer(
str(j),
eval(stage_fn)(block_fn,
ch_in if j == 0 else ch_out,
ch_out,
block_num,
act=act,
spp=(spp and i == 0),
use_alpha=use_alpha))
if drop_block:
stage.add_sublayer('drop', DropBlock(block_size, keep_prob))
fpn_stages.append(stage)
if i < self.num_blocks - 1:
fpn_routes.append(
ConvBNLayer(
ch_in=ch_out,
ch_out=ch_out // 2,
filter_size=1,
stride=1,
padding=0,
act=act))
ch_pre = ch_out
self.fpn_stages = nn.LayerList(fpn_stages)
self.fpn_routes = nn.LayerList(fpn_routes)
pan_stages = []
pan_routes = []
for i in reversed(range(self.num_blocks - 1)):
pan_routes.append(
ConvBNLayer(
ch_in=out_channels[i + 1],
ch_out=out_channels[i + 1],
filter_size=3,
stride=2,
padding=1,
act=act))
ch_in = out_channels[i] + out_channels[i + 1]
ch_out = out_channels[i]
stage = nn.Sequential()
for j in range(stage_num):
stage.add_sublayer(
str(j),
eval(stage_fn)(block_fn,
ch_in if j == 0 else ch_out,
ch_out,
block_num,
act=act,
spp=False,
use_alpha=use_alpha))
if drop_block:
stage.add_sublayer('drop', DropBlock(block_size, keep_prob))
pan_stages.append(stage)
self.pan_stages = nn.LayerList(pan_stages[::-1])
self.pan_routes = nn.LayerList(pan_routes[::-1])
def build_2d_sincos_position_embedding(
self,
w,
h,
embed_dim=1024,
temperature=10000., ):
grid_w = paddle.arange(int(w), dtype=paddle.float32)
grid_h = paddle.arange(int(h), dtype=paddle.float32)
grid_w, grid_h = paddle.meshgrid(grid_w, grid_h)
assert embed_dim % 4 == 0, 'Embed dimension must be divisible by 4 for 2D sin-cos position embedding'
pos_dim = embed_dim // 4
omega = paddle.arange(pos_dim, dtype=paddle.float32) / pos_dim
omega = 1. / (temperature**omega)
out_w = grid_w.flatten()[..., None] @omega[None]
out_h = grid_h.flatten()[..., None] @omega[None]
pos_emb = paddle.concat(
[
paddle.sin(out_w), paddle.cos(out_w), paddle.sin(out_h),
paddle.cos(out_h)
],
axis=1)[None, :, :]
return pos_emb
def forward(self, blocks, for_mot=False):
if self.use_trans:
last_feat = blocks[-1]
n, c, h, w = last_feat.shape
# flatten [B, C, H, W] to [B, HxW, C]
src_flatten = last_feat.flatten(2).transpose([0, 2, 1])
if self.eval_size is not None and not self.training:
pos_embed = self.pos_embed
else:
pos_embed = self.build_2d_sincos_position_embedding(
w=w, h=h, embed_dim=self.hidden_dim)
memory = self.encoder(src_flatten, pos_embed=pos_embed)
last_feat_encode = memory.transpose([0, 2, 1]).reshape([n, c, h, w])
blocks[-1] = last_feat_encode
blocks = blocks[::-1]
fpn_feats = []
for i, block in enumerate(blocks):
if i > 0:
block = paddle.concat([route, block], axis=1)
route = self.fpn_stages[i](block)
fpn_feats.append(route)
if i < self.num_blocks - 1:
route = self.fpn_routes[i](route)
route = F.interpolate(
route, scale_factor=2., data_format=self.data_format)
pan_feats = [fpn_feats[-1], ]
route = fpn_feats[-1]
for i in reversed(range(self.num_blocks - 1)):
block = fpn_feats[i]
route = self.pan_routes[i](route)
block = paddle.concat([route, block], axis=1)
route = self.pan_stages[i](block)
pan_feats.append(route)
return pan_feats[::-1]
@classmethod
def from_config(cls, cfg, input_shape):
return {'in_channels': [i.channels for i in input_shape], }
@property
def out_shape(self):
return [ShapeSpec(channels=c) for c in self._out_channels]

View File

@@ -0,0 +1,150 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
from paddle import ParamAttr
from paddle.regularizer import L2Decay
from paddle.nn.initializer import KaimingUniform, Constant, Normal
from ppdet.core.workspace import register, serializable
from ..shape_spec import ShapeSpec
__all__ = ['DilatedEncoder']
class Bottleneck(nn.Layer):
def __init__(self, in_channels, mid_channels, dilation):
super(Bottleneck, self).__init__()
self.conv1 = nn.Sequential(* [
nn.Conv2D(
in_channels,
mid_channels,
1,
padding=0,
weight_attr=ParamAttr(initializer=Normal(
mean=0, std=0.01)),
bias_attr=ParamAttr(initializer=Constant(0.0))),
nn.BatchNorm2D(
mid_channels,
weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
bias_attr=ParamAttr(regularizer=L2Decay(0.0))),
nn.ReLU(),
])
self.conv2 = nn.Sequential(* [
nn.Conv2D(
mid_channels,
mid_channels,
3,
padding=dilation,
dilation=dilation,
weight_attr=ParamAttr(initializer=Normal(
mean=0, std=0.01)),
bias_attr=ParamAttr(initializer=Constant(0.0))),
nn.BatchNorm2D(
mid_channels,
weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
bias_attr=ParamAttr(regularizer=L2Decay(0.0))),
nn.ReLU(),
])
self.conv3 = nn.Sequential(* [
nn.Conv2D(
mid_channels,
in_channels,
1,
padding=0,
weight_attr=ParamAttr(initializer=Normal(
mean=0, std=0.01)),
bias_attr=ParamAttr(initializer=Constant(0.0))),
nn.BatchNorm2D(
in_channels,
weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
bias_attr=ParamAttr(regularizer=L2Decay(0.0))),
nn.ReLU(),
])
def forward(self, x):
identity = x
y = self.conv3(self.conv2(self.conv1(x)))
return y + identity
@register
class DilatedEncoder(nn.Layer):
"""
DilatedEncoder used in YOLOF
"""
def __init__(self,
in_channels=[2048],
out_channels=[512],
block_mid_channels=128,
num_residual_blocks=4,
block_dilations=[2, 4, 6, 8]):
super(DilatedEncoder, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
assert len(self.in_channels) == 1, "YOLOF only has one level feature."
assert len(self.out_channels) == 1, "YOLOF only has one level feature."
self.block_mid_channels = block_mid_channels
self.num_residual_blocks = num_residual_blocks
self.block_dilations = block_dilations
out_ch = self.out_channels[0]
self.lateral_conv = nn.Conv2D(
self.in_channels[0],
out_ch,
1,
weight_attr=ParamAttr(initializer=KaimingUniform(
negative_slope=1, nonlinearity='leaky_relu')),
bias_attr=ParamAttr(initializer=Constant(value=0.0)))
self.lateral_norm = nn.BatchNorm2D(
out_ch,
weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
self.fpn_conv = nn.Conv2D(
out_ch,
out_ch,
3,
padding=1,
weight_attr=ParamAttr(initializer=KaimingUniform(
negative_slope=1, nonlinearity='leaky_relu')))
self.fpn_norm = nn.BatchNorm2D(
out_ch,
weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
encoder_blocks = []
for i in range(self.num_residual_blocks):
encoder_blocks.append(
Bottleneck(
out_ch,
self.block_mid_channels,
dilation=block_dilations[i]))
self.dilated_encoder_blocks = nn.Sequential(*encoder_blocks)
def forward(self, inputs, for_mot=False):
out = self.lateral_norm(self.lateral_conv(inputs[0]))
out = self.fpn_norm(self.fpn_conv(out))
out = self.dilated_encoder_blocks(out)
return [out]
@classmethod
def from_config(cls, cfg, input_shape):
return {'in_channels': [i.channels for i in input_shape], }
@property
def out_shape(self):
return [ShapeSpec(channels=c) for c in self.out_channels]

View File

@@ -0,0 +1,212 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.regularizer import L2Decay
from ppdet.core.workspace import register, serializable
from ..shape_spec import ShapeSpec
from ..backbones.esnet import SEModule
from .csp_pan import ConvBNLayer, Channel_T, DPModule
__all__ = ['ESPAN']
class ES_Block(nn.Layer):
def __init__(self,
in_channels,
mid_channels,
out_channels,
kernel_size=5,
stride=1,
act='leaky_relu'):
super(ES_Block, self).__init__()
self._residual = ConvBNLayer(
in_channel=in_channels,
out_channel=out_channels,
kernel_size=1,
stride=1,
groups=1,
act=act)
self._conv_pw = ConvBNLayer(
in_channel=in_channels,
out_channel=mid_channels // 2,
kernel_size=1,
stride=1,
groups=1,
act=act)
self._conv_dw = ConvBNLayer(
in_channel=mid_channels // 2,
out_channel=mid_channels // 2,
kernel_size=kernel_size,
stride=stride,
groups=mid_channels // 2,
act=None)
self._se = SEModule(mid_channels)
self._conv_linear = ConvBNLayer(
in_channel=mid_channels,
out_channel=out_channels,
kernel_size=1,
stride=1,
groups=1,
act=act)
self._out_conv = ConvBNLayer(
in_channel=out_channels * 2,
out_channel=out_channels,
kernel_size=1,
stride=1,
groups=1,
act=act)
def forward(self, inputs):
x1 = self._residual(inputs)
x2 = self._conv_pw(inputs)
x3 = self._conv_dw(x2)
x3 = paddle.concat([x2, x3], axis=1)
x3 = self._se(x3)
x3 = self._conv_linear(x3)
out = paddle.concat([x1, x3], axis=1)
out = self._out_conv(out)
return out
@register
@serializable
class ESPAN(nn.Layer):
"""Path Aggregation Network with ES module.
Args:
in_channels (List[int]): Number of input channels per scale.
out_channels (int): Number of output channels (used at each scale)
kernel_size (int): The conv2d kernel size of this Module.
num_features (int): Number of output features of CSPPAN module.
num_csp_blocks (int): Number of bottlenecks in CSPLayer. Default: 1
use_depthwise (bool): Whether to depthwise separable convolution in
blocks. Default: True
"""
def __init__(self,
in_channels,
out_channels,
kernel_size=5,
num_features=3,
use_depthwise=True,
act='hard_swish',
spatial_scales=[0.125, 0.0625, 0.03125]):
super(ESPAN, self).__init__()
self.conv_t = Channel_T(in_channels, out_channels, act=act)
in_channels = [out_channels] * len(spatial_scales)
self.in_channels = in_channels
self.out_channels = out_channels
self.spatial_scales = spatial_scales
self.num_features = num_features
conv_func = DPModule if use_depthwise else ConvBNLayer
if self.num_features == 4:
self.first_top_conv = conv_func(
in_channels[0], in_channels[0], kernel_size, stride=2, act=act)
self.second_top_conv = conv_func(
in_channels[0], in_channels[0], kernel_size, stride=2, act=act)
self.spatial_scales.append(self.spatial_scales[-1] / 2)
# build top-down blocks
self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
self.top_down_blocks = nn.LayerList()
for idx in range(len(in_channels) - 1, 0, -1):
self.top_down_blocks.append(
ES_Block(
in_channels[idx - 1] * 2,
in_channels[idx - 1],
in_channels[idx - 1],
kernel_size=kernel_size,
stride=1,
act=act))
# build bottom-up blocks
self.downsamples = nn.LayerList()
self.bottom_up_blocks = nn.LayerList()
for idx in range(len(in_channels) - 1):
self.downsamples.append(
conv_func(
in_channels[idx],
in_channels[idx],
kernel_size=kernel_size,
stride=2,
act=act))
self.bottom_up_blocks.append(
ES_Block(
in_channels[idx] * 2,
in_channels[idx + 1],
in_channels[idx + 1],
kernel_size=kernel_size,
stride=1,
act=act))
def forward(self, inputs):
"""
Args:
inputs (tuple[Tensor]): input features.
Returns:
tuple[Tensor]: CSPPAN features.
"""
assert len(inputs) == len(self.in_channels)
inputs = self.conv_t(inputs)
# top-down path
inner_outs = [inputs[-1]]
for idx in range(len(self.in_channels) - 1, 0, -1):
feat_heigh = inner_outs[0]
feat_low = inputs[idx - 1]
upsample_feat = self.upsample(feat_heigh)
inner_out = self.top_down_blocks[len(self.in_channels) - 1 - idx](
paddle.concat([upsample_feat, feat_low], 1))
inner_outs.insert(0, inner_out)
# bottom-up path
outs = [inner_outs[0]]
for idx in range(len(self.in_channels) - 1):
feat_low = outs[-1]
feat_height = inner_outs[idx + 1]
downsample_feat = self.downsamples[idx](feat_low)
out = self.bottom_up_blocks[idx](paddle.concat(
[downsample_feat, feat_height], 1))
outs.append(out)
top_features = None
if self.num_features == 4:
top_features = self.first_top_conv(inputs[-1])
top_features = top_features + self.second_top_conv(outs[-1])
outs.append(top_features)
return tuple(outs)
@property
def out_shape(self):
return [
ShapeSpec(
channels=self.out_channels, stride=1. / s)
for s in self.spatial_scales
]
@classmethod
def from_config(cls, cfg, input_shape):
return {'in_channels': [i.channels for i in input_shape], }

View File

@@ -0,0 +1,231 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.nn.initializer import XavierUniform
from ppdet.core.workspace import register, serializable
from ppdet.modeling.layers import ConvNormLayer
from ..shape_spec import ShapeSpec
__all__ = ['FPN']
@register
@serializable
class FPN(nn.Layer):
"""
Feature Pyramid Network, see https://arxiv.org/abs/1612.03144
Args:
in_channels (list[int]): input channels of each level which can be
derived from the output shape of backbone by from_config
out_channel (int): output channel of each level
spatial_scales (list[float]): the spatial scales between input feature
maps and original input image which can be derived from the output
shape of backbone by from_config
has_extra_convs (bool): whether to add extra conv to the last level.
default False
extra_stage (int): the number of extra stages added to the last level.
default 1
use_c5 (bool): Whether to use c5 as the input of extra stage,
otherwise p5 is used. default True
norm_type (string|None): The normalization type in FPN module. If
norm_type is None, norm will not be used after conv and if
norm_type is string, bn, gn, sync_bn are available. default None
norm_decay (float): weight decay for normalization layer weights.
default 0.
freeze_norm (bool): whether to freeze normalization layer.
default False
relu_before_extra_convs (bool): whether to add relu before extra convs.
default False
"""
def __init__(self,
in_channels,
out_channel,
spatial_scales=[0.25, 0.125, 0.0625, 0.03125],
has_extra_convs=False,
extra_stage=1,
use_c5=True,
norm_type=None,
norm_decay=0.,
freeze_norm=False,
relu_before_extra_convs=True):
super(FPN, self).__init__()
self.out_channel = out_channel
for s in range(extra_stage):
spatial_scales = spatial_scales + [spatial_scales[-1] / 2.]
self.spatial_scales = spatial_scales
self.has_extra_convs = has_extra_convs
self.extra_stage = extra_stage
self.use_c5 = use_c5
self.relu_before_extra_convs = relu_before_extra_convs
self.norm_type = norm_type
self.norm_decay = norm_decay
self.freeze_norm = freeze_norm
self.lateral_convs = []
self.fpn_convs = []
fan = out_channel * 3 * 3
# stage index 0,1,2,3 stands for res2,res3,res4,res5 on ResNet Backbone
# 0 <= st_stage < ed_stage <= 3
st_stage = 4 - len(in_channels)
ed_stage = st_stage + len(in_channels) - 1
for i in range(st_stage, ed_stage + 1):
if i == 3:
lateral_name = 'fpn_inner_res5_sum'
else:
lateral_name = 'fpn_inner_res{}_sum_lateral'.format(i + 2)
in_c = in_channels[i - st_stage]
if self.norm_type is not None:
lateral = self.add_sublayer(
lateral_name,
ConvNormLayer(
ch_in=in_c,
ch_out=out_channel,
filter_size=1,
stride=1,
norm_type=self.norm_type,
norm_decay=self.norm_decay,
freeze_norm=self.freeze_norm,
initializer=XavierUniform(fan_out=in_c)))
else:
lateral = self.add_sublayer(
lateral_name,
nn.Conv2D(
in_channels=in_c,
out_channels=out_channel,
kernel_size=1,
weight_attr=ParamAttr(
initializer=XavierUniform(fan_out=in_c))))
self.lateral_convs.append(lateral)
fpn_name = 'fpn_res{}_sum'.format(i + 2)
if self.norm_type is not None:
fpn_conv = self.add_sublayer(
fpn_name,
ConvNormLayer(
ch_in=out_channel,
ch_out=out_channel,
filter_size=3,
stride=1,
norm_type=self.norm_type,
norm_decay=self.norm_decay,
freeze_norm=self.freeze_norm,
initializer=XavierUniform(fan_out=fan)))
else:
fpn_conv = self.add_sublayer(
fpn_name,
nn.Conv2D(
in_channels=out_channel,
out_channels=out_channel,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(
initializer=XavierUniform(fan_out=fan))))
self.fpn_convs.append(fpn_conv)
# add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
if self.has_extra_convs:
for i in range(self.extra_stage):
lvl = ed_stage + 1 + i
if i == 0 and self.use_c5:
in_c = in_channels[-1]
else:
in_c = out_channel
extra_fpn_name = 'fpn_{}'.format(lvl + 2)
if self.norm_type is not None:
extra_fpn_conv = self.add_sublayer(
extra_fpn_name,
ConvNormLayer(
ch_in=in_c,
ch_out=out_channel,
filter_size=3,
stride=2,
norm_type=self.norm_type,
norm_decay=self.norm_decay,
freeze_norm=self.freeze_norm,
initializer=XavierUniform(fan_out=fan)))
else:
extra_fpn_conv = self.add_sublayer(
extra_fpn_name,
nn.Conv2D(
in_channels=in_c,
out_channels=out_channel,
kernel_size=3,
stride=2,
padding=1,
weight_attr=ParamAttr(
initializer=XavierUniform(fan_out=fan))))
self.fpn_convs.append(extra_fpn_conv)
@classmethod
def from_config(cls, cfg, input_shape):
return {
'in_channels': [i.channels for i in input_shape],
'spatial_scales': [1.0 / i.stride for i in input_shape],
}
def forward(self, body_feats):
laterals = []
num_levels = len(body_feats)
for i in range(num_levels):
laterals.append(self.lateral_convs[i](body_feats[i]))
for i in range(1, num_levels):
lvl = num_levels - i
upsample = F.interpolate(
laterals[lvl],
scale_factor=2.,
mode='nearest', )
laterals[lvl - 1] += upsample
fpn_output = []
for lvl in range(num_levels):
fpn_output.append(self.fpn_convs[lvl](laterals[lvl]))
if self.extra_stage > 0:
# use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN)
if not self.has_extra_convs:
assert self.extra_stage == 1, 'extra_stage should be 1 if FPN has not extra convs'
fpn_output.append(F.max_pool2d(fpn_output[-1], 1, stride=2))
# add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
else:
if self.use_c5:
extra_source = body_feats[-1]
else:
extra_source = fpn_output[-1]
fpn_output.append(self.fpn_convs[num_levels](extra_source))
for i in range(1, self.extra_stage):
if self.relu_before_extra_convs:
fpn_output.append(self.fpn_convs[num_levels + i](F.relu(
fpn_output[-1])))
else:
fpn_output.append(self.fpn_convs[num_levels + i](
fpn_output[-1]))
return fpn_output
@property
def out_shape(self):
return [
ShapeSpec(
channels=self.out_channel, stride=1. / s)
for s in self.spatial_scales
]

View File

@@ -0,0 +1,129 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn.functional as F
import paddle.nn as nn
from ppdet.core.workspace import register
from ..shape_spec import ShapeSpec
__all__ = ['HRFPN']
@register
class HRFPN(nn.Layer):
"""
Args:
in_channels (list): number of input feature channels from backbone
out_channel (int): number of output feature channels
share_conv (bool): whether to share conv for different layers' reduction
extra_stage (int): add extra stage for returning HRFPN fpn_feats
spatial_scales (list): feature map scaling factor
"""
def __init__(self,
in_channels=[18, 36, 72, 144],
out_channel=256,
share_conv=False,
extra_stage=1,
spatial_scales=[1. / 4, 1. / 8, 1. / 16, 1. / 32],
use_bias=False):
super(HRFPN, self).__init__()
in_channel = sum(in_channels)
self.in_channel = in_channel
self.out_channel = out_channel
self.share_conv = share_conv
for i in range(extra_stage):
spatial_scales = spatial_scales + [spatial_scales[-1] / 2.]
self.spatial_scales = spatial_scales
self.num_out = len(self.spatial_scales)
self.use_bias = use_bias
bias_attr = False if use_bias is False else None
self.reduction = nn.Conv2D(
in_channels=in_channel,
out_channels=out_channel,
kernel_size=1,
bias_attr=bias_attr)
if share_conv:
self.fpn_conv = nn.Conv2D(
in_channels=out_channel,
out_channels=out_channel,
kernel_size=3,
padding=1,
bias_attr=bias_attr)
else:
self.fpn_conv = []
for i in range(self.num_out):
conv_name = "fpn_conv_" + str(i)
conv = self.add_sublayer(
conv_name,
nn.Conv2D(
in_channels=out_channel,
out_channels=out_channel,
kernel_size=3,
padding=1,
bias_attr=bias_attr))
self.fpn_conv.append(conv)
def forward(self, body_feats):
num_backbone_stages = len(body_feats)
outs = []
outs.append(body_feats[0])
# resize
for i in range(1, num_backbone_stages):
resized = F.interpolate(
body_feats[i], scale_factor=2**i, mode='bilinear')
outs.append(resized)
# concat
out = paddle.concat(outs, axis=1)
assert out.shape[
1] == self.in_channel, 'in_channel should be {}, be received {}'.format(
out.shape[1], self.in_channel)
# reduction
out = self.reduction(out)
# conv
outs = [out]
for i in range(1, self.num_out):
outs.append(F.avg_pool2d(out, kernel_size=2**i, stride=2**i))
outputs = []
for i in range(self.num_out):
conv_func = self.fpn_conv if self.share_conv else self.fpn_conv[i]
conv = conv_func(outs[i])
outputs.append(conv)
fpn_feats = [outputs[k] for k in range(self.num_out)]
return fpn_feats
@classmethod
def from_config(cls, cfg, input_shape):
return {
'in_channels': [i.channels for i in input_shape],
'spatial_scales': [1.0 / i.stride for i in input_shape],
}
@property
def out_shape(self):
return [
ShapeSpec(
channels=self.out_channel, stride=1. / s)
for s in self.spatial_scales
]

View File

@@ -0,0 +1,168 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.regularizer import L2Decay
from ppdet.core.workspace import register, serializable
from ..shape_spec import ShapeSpec
from ..backbones.lcnet import DepthwiseSeparable
from .csp_pan import ConvBNLayer, Channel_T, DPModule
__all__ = ['LCPAN']
@register
@serializable
class LCPAN(nn.Layer):
"""Path Aggregation Network with LCNet module.
Args:
in_channels (List[int]): Number of input channels per scale.
out_channels (int): Number of output channels (used at each scale)
kernel_size (int): The conv2d kernel size of this Module.
num_features (int): Number of output features of CSPPAN module.
num_csp_blocks (int): Number of bottlenecks in CSPLayer. Default: 1
use_depthwise (bool): Whether to depthwise separable convolution in
blocks. Default: True
"""
def __init__(self,
in_channels,
out_channels,
kernel_size=5,
num_features=3,
use_depthwise=True,
act='hard_swish',
spatial_scales=[0.125, 0.0625, 0.03125]):
super(LCPAN, self).__init__()
self.conv_t = Channel_T(in_channels, out_channels, act=act)
in_channels = [out_channels] * len(spatial_scales)
self.in_channels = in_channels
self.out_channels = out_channels
self.spatial_scales = spatial_scales
self.num_features = num_features
conv_func = DPModule if use_depthwise else ConvBNLayer
NET_CONFIG = {
#k, in_c, out_c, stride, use_se
"block1": [
[kernel_size, out_channels * 2, out_channels * 2, 1, False],
[kernel_size, out_channels * 2, out_channels, 1, False],
],
"block2": [
[kernel_size, out_channels * 2, out_channels * 2, 1, False],
[kernel_size, out_channels * 2, out_channels, 1, False],
]
}
if self.num_features == 4:
self.first_top_conv = conv_func(
in_channels[0], in_channels[0], kernel_size, stride=2, act=act)
self.second_top_conv = conv_func(
in_channels[0], in_channels[0], kernel_size, stride=2, act=act)
self.spatial_scales.append(self.spatial_scales[-1] / 2)
# build top-down blocks
self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
self.top_down_blocks = nn.LayerList()
for idx in range(len(in_channels) - 1, 0, -1):
self.top_down_blocks.append(
nn.Sequential(* [
DepthwiseSeparable(
num_channels=in_c,
num_filters=out_c,
dw_size=k,
stride=s,
use_se=se)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG[
"block1"])
]))
# build bottom-up blocks
self.downsamples = nn.LayerList()
self.bottom_up_blocks = nn.LayerList()
for idx in range(len(in_channels) - 1):
self.downsamples.append(
conv_func(
in_channels[idx],
in_channels[idx],
kernel_size=kernel_size,
stride=2,
act=act))
self.bottom_up_blocks.append(
nn.Sequential(* [
DepthwiseSeparable(
num_channels=in_c,
num_filters=out_c,
dw_size=k,
stride=s,
use_se=se)
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG[
"block2"])
]))
def forward(self, inputs):
"""
Args:
inputs (tuple[Tensor]): input features.
Returns:
tuple[Tensor]: CSPPAN features.
"""
assert len(inputs) == len(self.in_channels)
inputs = self.conv_t(inputs)
# top-down path
inner_outs = [inputs[-1]]
for idx in range(len(self.in_channels) - 1, 0, -1):
feat_heigh = inner_outs[0]
feat_low = inputs[idx - 1]
upsample_feat = self.upsample(feat_heigh)
inner_out = self.top_down_blocks[len(self.in_channels) - 1 - idx](
paddle.concat([upsample_feat, feat_low], 1))
inner_outs.insert(0, inner_out)
# bottom-up path
outs = [inner_outs[0]]
for idx in range(len(self.in_channels) - 1):
feat_low = outs[-1]
feat_height = inner_outs[idx + 1]
downsample_feat = self.downsamples[idx](feat_low)
out = self.bottom_up_blocks[idx](paddle.concat(
[downsample_feat, feat_height], 1))
outs.append(out)
top_features = None
if self.num_features == 4:
top_features = self.first_top_conv(inputs[-1])
top_features = top_features + self.second_top_conv(outs[-1])
outs.append(top_features)
return tuple(outs)
@property
def out_shape(self):
return [
ShapeSpec(
channels=self.out_channels, stride=1. / s)
for s in self.spatial_scales
]
@classmethod
def from_config(cls, cfg, input_shape):
return {'in_channels': [i.channels for i in input_shape], }

View File

@@ -0,0 +1,242 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.nn.initializer import Constant, Uniform, Normal, XavierUniform
from ppdet.core.workspace import register, serializable
from paddle.regularizer import L2Decay
from ppdet.modeling.layers import DeformableConvV2, ConvNormLayer, LiteConv
import math
from ppdet.modeling.ops import batch_norm
from ..shape_spec import ShapeSpec
__all__ = ['TTFFPN']
class Upsample(nn.Layer):
def __init__(self, ch_in, ch_out, norm_type='bn'):
super(Upsample, self).__init__()
fan_in = ch_in * 3 * 3
stdv = 1. / math.sqrt(fan_in)
self.dcn = DeformableConvV2(
ch_in,
ch_out,
kernel_size=3,
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)),
bias_attr=ParamAttr(
initializer=Constant(0),
regularizer=L2Decay(0.),
learning_rate=2.),
lr_scale=2.,
regularizer=L2Decay(0.))
self.bn = batch_norm(
ch_out, norm_type=norm_type, initializer=Constant(1.))
def forward(self, feat):
dcn = self.dcn(feat)
bn = self.bn(dcn)
relu = F.relu(bn)
out = F.interpolate(relu, scale_factor=2., mode='bilinear')
return out
class DeConv(nn.Layer):
def __init__(self, ch_in, ch_out, norm_type='bn'):
super(DeConv, self).__init__()
self.deconv = nn.Sequential()
conv1 = ConvNormLayer(
ch_in=ch_in,
ch_out=ch_out,
stride=1,
filter_size=1,
norm_type=norm_type,
initializer=XavierUniform())
conv2 = nn.Conv2DTranspose(
in_channels=ch_out,
out_channels=ch_out,
kernel_size=4,
padding=1,
stride=2,
groups=ch_out,
weight_attr=ParamAttr(initializer=XavierUniform()),
bias_attr=False)
bn = batch_norm(ch_out, norm_type=norm_type, norm_decay=0.)
conv3 = ConvNormLayer(
ch_in=ch_out,
ch_out=ch_out,
stride=1,
filter_size=1,
norm_type=norm_type,
initializer=XavierUniform())
self.deconv.add_sublayer('conv1', conv1)
self.deconv.add_sublayer('relu6_1', nn.ReLU6())
self.deconv.add_sublayer('conv2', conv2)
self.deconv.add_sublayer('bn', bn)
self.deconv.add_sublayer('relu6_2', nn.ReLU6())
self.deconv.add_sublayer('conv3', conv3)
self.deconv.add_sublayer('relu6_3', nn.ReLU6())
def forward(self, inputs):
return self.deconv(inputs)
class LiteUpsample(nn.Layer):
def __init__(self, ch_in, ch_out, norm_type='bn'):
super(LiteUpsample, self).__init__()
self.deconv = DeConv(ch_in, ch_out, norm_type=norm_type)
self.conv = LiteConv(ch_in, ch_out, norm_type=norm_type)
def forward(self, inputs):
deconv_up = self.deconv(inputs)
conv = self.conv(inputs)
interp_up = F.interpolate(conv, scale_factor=2., mode='bilinear')
return deconv_up + interp_up
class ShortCut(nn.Layer):
def __init__(self,
layer_num,
ch_in,
ch_out,
norm_type='bn',
lite_neck=False,
name=None):
super(ShortCut, self).__init__()
shortcut_conv = nn.Sequential()
for i in range(layer_num):
fan_out = 3 * 3 * ch_out
std = math.sqrt(2. / fan_out)
in_channels = ch_in if i == 0 else ch_out
shortcut_name = name + '.conv.{}'.format(i)
if lite_neck:
shortcut_conv.add_sublayer(
shortcut_name,
LiteConv(
in_channels=in_channels,
out_channels=ch_out,
with_act=i < layer_num - 1,
norm_type=norm_type))
else:
shortcut_conv.add_sublayer(
shortcut_name,
nn.Conv2D(
in_channels=in_channels,
out_channels=ch_out,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(initializer=Normal(0, std)),
bias_attr=ParamAttr(
learning_rate=2., regularizer=L2Decay(0.))))
if i < layer_num - 1:
shortcut_conv.add_sublayer(shortcut_name + '.act',
nn.ReLU())
self.shortcut = self.add_sublayer('shortcut', shortcut_conv)
def forward(self, feat):
out = self.shortcut(feat)
return out
@register
@serializable
class TTFFPN(nn.Layer):
"""
Args:
in_channels (list): number of input feature channels from backbone.
[128,256,512,1024] by default, means the channels of DarkNet53
backbone return_idx [1,2,3,4].
planes (list): the number of output feature channels of FPN.
[256, 128, 64] by default
shortcut_num (list): the number of convolution layers in each shortcut.
[3,2,1] by default, means DarkNet53 backbone return_idx_1 has 3 convs
in its shortcut, return_idx_2 has 2 convs and return_idx_3 has 1 conv.
norm_type (string): norm type, 'sync_bn', 'bn', 'gn' are optional.
bn by default
lite_neck (bool): whether to use lite conv in TTFNet FPN,
False by default
fusion_method (string): the method to fusion upsample and lateral layer.
'add' and 'concat' are optional, add by default
"""
__shared__ = ['norm_type']
def __init__(self,
in_channels,
planes=[256, 128, 64],
shortcut_num=[3, 2, 1],
norm_type='bn',
lite_neck=False,
fusion_method='add'):
super(TTFFPN, self).__init__()
self.planes = planes
self.shortcut_num = shortcut_num[::-1]
self.shortcut_len = len(shortcut_num)
self.ch_in = in_channels[::-1]
self.fusion_method = fusion_method
self.upsample_list = []
self.shortcut_list = []
self.upper_list = []
for i, out_c in enumerate(self.planes):
in_c = self.ch_in[i] if i == 0 else self.upper_list[-1]
upsample_module = LiteUpsample if lite_neck else Upsample
upsample = self.add_sublayer(
'upsample.' + str(i),
upsample_module(
in_c, out_c, norm_type=norm_type))
self.upsample_list.append(upsample)
if i < self.shortcut_len:
shortcut = self.add_sublayer(
'shortcut.' + str(i),
ShortCut(
self.shortcut_num[i],
self.ch_in[i + 1],
out_c,
norm_type=norm_type,
lite_neck=lite_neck,
name='shortcut.' + str(i)))
self.shortcut_list.append(shortcut)
if self.fusion_method == 'add':
upper_c = out_c
elif self.fusion_method == 'concat':
upper_c = out_c * 2
else:
raise ValueError('Illegal fusion method. Expected add or\
concat, but received {}'.format(self.fusion_method))
self.upper_list.append(upper_c)
def forward(self, inputs):
feat = inputs[-1]
for i, out_c in enumerate(self.planes):
feat = self.upsample_list[i](feat)
if i < self.shortcut_len:
shortcut = self.shortcut_list[i](inputs[-i - 2])
if self.fusion_method == 'add':
feat = feat + shortcut
else:
feat = paddle.concat([feat, shortcut], axis=1)
return feat
@classmethod
def from_config(cls, cfg, input_shape):
return {'in_channels': [i.channels for i in input_shape], }
@property
def out_shape(self):
return [ShapeSpec(channels=self.upper_list[-1], )]

File diff suppressed because it is too large Load Diff