更换文档检测模型
This commit is contained in:
41
paddle_detection/ppdet/modeling/necks/__init__.py
Normal file
41
paddle_detection/ppdet/modeling/necks/__init__.py
Normal file
@@ -0,0 +1,41 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from . import fpn
|
||||
from . import yolo_fpn
|
||||
from . import hrfpn
|
||||
from . import ttf_fpn
|
||||
from . import centernet_fpn
|
||||
from . import bifpn
|
||||
from . import csp_pan
|
||||
from . import es_pan
|
||||
from . import lc_pan
|
||||
from . import custom_pan
|
||||
from . import dilated_encoder
|
||||
from . import clrnet_fpn
|
||||
|
||||
from .fpn import *
|
||||
from .yolo_fpn import *
|
||||
from .hrfpn import *
|
||||
from .ttf_fpn import *
|
||||
from .centernet_fpn import *
|
||||
from .blazeface_fpn import *
|
||||
from .bifpn import *
|
||||
from .csp_pan import *
|
||||
from .es_pan import *
|
||||
from .lc_pan import *
|
||||
from .custom_pan import *
|
||||
from .dilated_encoder import *
|
||||
from .channel_mapper import *
|
||||
from .clrnet_fpn import *
|
||||
300
paddle_detection/ppdet/modeling/necks/bifpn.py
Normal file
300
paddle_detection/ppdet/modeling/necks/bifpn.py
Normal file
@@ -0,0 +1,300 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle import ParamAttr
|
||||
from paddle.nn.initializer import Constant
|
||||
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from ppdet.modeling.layers import ConvNormLayer
|
||||
from ..shape_spec import ShapeSpec
|
||||
|
||||
__all__ = ['BiFPN']
|
||||
|
||||
|
||||
class SeparableConvLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels=None,
|
||||
kernel_size=3,
|
||||
norm_type='bn',
|
||||
norm_groups=32,
|
||||
act='swish'):
|
||||
super(SeparableConvLayer, self).__init__()
|
||||
assert norm_type in ['bn', 'sync_bn', 'gn', None]
|
||||
assert act in ['swish', 'relu', None]
|
||||
|
||||
self.in_channels = in_channels
|
||||
if out_channels is None:
|
||||
self.out_channels = self.in_channels
|
||||
self.norm_type = norm_type
|
||||
self.norm_groups = norm_groups
|
||||
self.depthwise_conv = nn.Conv2D(
|
||||
in_channels,
|
||||
in_channels,
|
||||
kernel_size,
|
||||
padding=kernel_size // 2,
|
||||
groups=in_channels,
|
||||
bias_attr=False)
|
||||
self.pointwise_conv = nn.Conv2D(in_channels, self.out_channels, 1)
|
||||
|
||||
# norm type
|
||||
if self.norm_type in ['bn', 'sync_bn']:
|
||||
self.norm = nn.BatchNorm2D(self.out_channels)
|
||||
elif self.norm_type == 'gn':
|
||||
self.norm = nn.GroupNorm(
|
||||
num_groups=self.norm_groups, num_channels=self.out_channels)
|
||||
|
||||
# activation
|
||||
if act == 'swish':
|
||||
self.act = nn.Swish()
|
||||
elif act == 'relu':
|
||||
self.act = nn.ReLU()
|
||||
|
||||
def forward(self, x):
|
||||
if self.act is not None:
|
||||
x = self.act(x)
|
||||
out = self.depthwise_conv(x)
|
||||
out = self.pointwise_conv(out)
|
||||
if self.norm_type is not None:
|
||||
out = self.norm(out)
|
||||
return out
|
||||
|
||||
|
||||
class BiFPNCell(nn.Layer):
|
||||
def __init__(self,
|
||||
channels=256,
|
||||
num_levels=5,
|
||||
eps=1e-5,
|
||||
use_weighted_fusion=True,
|
||||
kernel_size=3,
|
||||
norm_type='bn',
|
||||
norm_groups=32,
|
||||
act='swish'):
|
||||
super(BiFPNCell, self).__init__()
|
||||
self.channels = channels
|
||||
self.num_levels = num_levels
|
||||
self.eps = eps
|
||||
self.use_weighted_fusion = use_weighted_fusion
|
||||
|
||||
# up
|
||||
self.conv_up = nn.LayerList([
|
||||
SeparableConvLayer(
|
||||
self.channels,
|
||||
kernel_size=kernel_size,
|
||||
norm_type=norm_type,
|
||||
norm_groups=norm_groups,
|
||||
act=act) for _ in range(self.num_levels - 1)
|
||||
])
|
||||
# down
|
||||
self.conv_down = nn.LayerList([
|
||||
SeparableConvLayer(
|
||||
self.channels,
|
||||
kernel_size=kernel_size,
|
||||
norm_type=norm_type,
|
||||
norm_groups=norm_groups,
|
||||
act=act) for _ in range(self.num_levels - 1)
|
||||
])
|
||||
|
||||
if self.use_weighted_fusion:
|
||||
self.up_weights = self.create_parameter(
|
||||
shape=[self.num_levels - 1, 2],
|
||||
attr=ParamAttr(initializer=Constant(1.)))
|
||||
self.down_weights = self.create_parameter(
|
||||
shape=[self.num_levels - 1, 3],
|
||||
attr=ParamAttr(initializer=Constant(1.)))
|
||||
|
||||
def _feature_fusion_cell(self,
|
||||
conv_layer,
|
||||
lateral_feat,
|
||||
sampling_feat,
|
||||
route_feat=None,
|
||||
weights=None):
|
||||
if self.use_weighted_fusion:
|
||||
weights = F.relu(weights)
|
||||
weights = weights / (weights.sum() + self.eps)
|
||||
if route_feat is not None:
|
||||
out_feat = weights[0] * lateral_feat + \
|
||||
weights[1] * sampling_feat + \
|
||||
weights[2] * route_feat
|
||||
else:
|
||||
out_feat = weights[0] * lateral_feat + \
|
||||
weights[1] * sampling_feat
|
||||
else:
|
||||
if route_feat is not None:
|
||||
out_feat = lateral_feat + sampling_feat + route_feat
|
||||
else:
|
||||
out_feat = lateral_feat + sampling_feat
|
||||
|
||||
out_feat = conv_layer(out_feat)
|
||||
return out_feat
|
||||
|
||||
def forward(self, feats):
|
||||
# feats: [P3 - P7]
|
||||
lateral_feats = []
|
||||
|
||||
# up
|
||||
up_feature = feats[-1]
|
||||
for i, feature in enumerate(feats[::-1]):
|
||||
if i == 0:
|
||||
lateral_feats.append(feature)
|
||||
else:
|
||||
shape = paddle.shape(feature)
|
||||
up_feature = F.interpolate(
|
||||
up_feature, size=[shape[2], shape[3]])
|
||||
lateral_feature = self._feature_fusion_cell(
|
||||
self.conv_up[i - 1],
|
||||
feature,
|
||||
up_feature,
|
||||
weights=self.up_weights[i - 1]
|
||||
if self.use_weighted_fusion else None)
|
||||
lateral_feats.append(lateral_feature)
|
||||
up_feature = lateral_feature
|
||||
|
||||
out_feats = []
|
||||
# down
|
||||
down_feature = lateral_feats[-1]
|
||||
for i, (lateral_feature,
|
||||
route_feature) in enumerate(zip(lateral_feats[::-1], feats)):
|
||||
if i == 0:
|
||||
out_feats.append(lateral_feature)
|
||||
else:
|
||||
down_feature = F.max_pool2d(down_feature, 3, 2, 1)
|
||||
if i == len(feats) - 1:
|
||||
route_feature = None
|
||||
weights = self.down_weights[
|
||||
i - 1][:2] if self.use_weighted_fusion else None
|
||||
else:
|
||||
weights = self.down_weights[
|
||||
i - 1] if self.use_weighted_fusion else None
|
||||
out_feature = self._feature_fusion_cell(
|
||||
self.conv_down[i - 1],
|
||||
lateral_feature,
|
||||
down_feature,
|
||||
route_feature,
|
||||
weights=weights)
|
||||
out_feats.append(out_feature)
|
||||
down_feature = out_feature
|
||||
|
||||
return out_feats
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class BiFPN(nn.Layer):
|
||||
"""
|
||||
Bidirectional Feature Pyramid Network, see https://arxiv.org/abs/1911.09070
|
||||
|
||||
Args:
|
||||
in_channels (list[int]): input channels of each level which can be
|
||||
derived from the output shape of backbone by from_config.
|
||||
out_channel (int): output channel of each level.
|
||||
num_extra_levels (int): the number of extra stages added to the last level.
|
||||
default: 2
|
||||
fpn_strides (List): The stride of each level.
|
||||
num_stacks (int): the number of stacks for BiFPN, default: 1.
|
||||
use_weighted_fusion (bool): use weighted feature fusion in BiFPN, default: True.
|
||||
norm_type (string|None): the normalization type in BiFPN module. If
|
||||
norm_type is None, norm will not be used after conv and if
|
||||
norm_type is string, bn, gn, sync_bn are available. default: bn.
|
||||
norm_groups (int): if you use gn, set this param.
|
||||
act (string|None): the activation function of BiFPN.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels=(512, 1024, 2048),
|
||||
out_channel=256,
|
||||
num_extra_levels=2,
|
||||
fpn_strides=[8, 16, 32, 64, 128],
|
||||
num_stacks=1,
|
||||
use_weighted_fusion=True,
|
||||
norm_type='bn',
|
||||
norm_groups=32,
|
||||
act='swish'):
|
||||
super(BiFPN, self).__init__()
|
||||
assert num_stacks > 0, "The number of stacks of BiFPN is at least 1."
|
||||
assert norm_type in ['bn', 'sync_bn', 'gn', None]
|
||||
assert act in ['swish', 'relu', None]
|
||||
assert num_extra_levels >= 0, \
|
||||
"The `num_extra_levels` must be non negative(>=0)."
|
||||
|
||||
self.in_channels = in_channels
|
||||
self.out_channel = out_channel
|
||||
self.num_extra_levels = num_extra_levels
|
||||
self.num_stacks = num_stacks
|
||||
self.use_weighted_fusion = use_weighted_fusion
|
||||
self.norm_type = norm_type
|
||||
self.norm_groups = norm_groups
|
||||
self.act = act
|
||||
self.num_levels = len(self.in_channels) + self.num_extra_levels
|
||||
if len(fpn_strides) != self.num_levels:
|
||||
for i in range(self.num_extra_levels):
|
||||
fpn_strides += [fpn_strides[-1] * 2]
|
||||
self.fpn_strides = fpn_strides
|
||||
|
||||
self.lateral_convs = nn.LayerList()
|
||||
for in_c in in_channels:
|
||||
self.lateral_convs.append(
|
||||
ConvNormLayer(in_c, self.out_channel, 1, 1))
|
||||
if self.num_extra_levels > 0:
|
||||
self.extra_convs = nn.LayerList()
|
||||
for i in range(self.num_extra_levels):
|
||||
if i == 0:
|
||||
self.extra_convs.append(
|
||||
ConvNormLayer(self.in_channels[-1], self.out_channel, 3,
|
||||
2))
|
||||
else:
|
||||
self.extra_convs.append(nn.MaxPool2D(3, 2, 1))
|
||||
|
||||
self.bifpn_cells = nn.LayerList()
|
||||
for i in range(self.num_stacks):
|
||||
self.bifpn_cells.append(
|
||||
BiFPNCell(
|
||||
self.out_channel,
|
||||
self.num_levels,
|
||||
use_weighted_fusion=self.use_weighted_fusion,
|
||||
norm_type=self.norm_type,
|
||||
norm_groups=self.norm_groups,
|
||||
act=self.act))
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, input_shape):
|
||||
return {
|
||||
'in_channels': [i.channels for i in input_shape],
|
||||
'fpn_strides': [i.stride for i in input_shape]
|
||||
}
|
||||
|
||||
@property
|
||||
def out_shape(self):
|
||||
return [
|
||||
ShapeSpec(
|
||||
channels=self.out_channel, stride=s) for s in self.fpn_strides
|
||||
]
|
||||
|
||||
def forward(self, feats):
|
||||
assert len(feats) == len(self.in_channels)
|
||||
fpn_feats = []
|
||||
for conv_layer, feature in zip(self.lateral_convs, feats):
|
||||
fpn_feats.append(conv_layer(feature))
|
||||
if self.num_extra_levels > 0:
|
||||
feat = feats[-1]
|
||||
for conv_layer in self.extra_convs:
|
||||
feat = conv_layer(feat)
|
||||
fpn_feats.append(feat)
|
||||
|
||||
for bifpn_cell in self.bifpn_cells:
|
||||
fpn_feats = bifpn_cell(fpn_feats)
|
||||
return fpn_feats
|
||||
213
paddle_detection/ppdet/modeling/necks/blazeface_fpn.py
Normal file
213
paddle_detection/ppdet/modeling/necks/blazeface_fpn.py
Normal file
@@ -0,0 +1,213 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import paddle
|
||||
import paddle.nn.functional as F
|
||||
from paddle import ParamAttr
|
||||
import paddle.nn as nn
|
||||
from paddle.nn.initializer import KaimingNormal
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from ..shape_spec import ShapeSpec
|
||||
|
||||
__all__ = ['BlazeNeck']
|
||||
|
||||
|
||||
def hard_swish(x):
|
||||
return x * F.relu6(x + 3) / 6.
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
num_groups=1,
|
||||
act='relu',
|
||||
conv_lr=0.1,
|
||||
conv_decay=0.,
|
||||
norm_decay=0.,
|
||||
norm_type='bn',
|
||||
name=None):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
self.act = act
|
||||
self._conv = nn.Conv2D(
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
groups=num_groups,
|
||||
weight_attr=ParamAttr(
|
||||
learning_rate=conv_lr, initializer=KaimingNormal()),
|
||||
bias_attr=False)
|
||||
|
||||
if norm_type in ['sync_bn', 'bn']:
|
||||
self._batch_norm = nn.BatchNorm2D(out_channels)
|
||||
|
||||
def forward(self, x):
|
||||
x = self._conv(x)
|
||||
x = self._batch_norm(x)
|
||||
if self.act == "relu":
|
||||
x = F.relu(x)
|
||||
elif self.act == "relu6":
|
||||
x = F.relu6(x)
|
||||
elif self.act == 'leaky':
|
||||
x = F.leaky_relu(x)
|
||||
elif self.act == 'hard_swish':
|
||||
x = hard_swish(x)
|
||||
return x
|
||||
|
||||
|
||||
class FPN(nn.Layer):
|
||||
def __init__(self, in_channels, out_channels, name=None):
|
||||
super(FPN, self).__init__()
|
||||
self.conv1_fpn = ConvBNLayer(
|
||||
in_channels,
|
||||
out_channels // 2,
|
||||
kernel_size=1,
|
||||
padding=0,
|
||||
stride=1,
|
||||
act='leaky',
|
||||
name=name + '_output1')
|
||||
self.conv2_fpn = ConvBNLayer(
|
||||
in_channels,
|
||||
out_channels // 2,
|
||||
kernel_size=1,
|
||||
padding=0,
|
||||
stride=1,
|
||||
act='leaky',
|
||||
name=name + '_output2')
|
||||
self.conv3_fpn = ConvBNLayer(
|
||||
out_channels // 2,
|
||||
out_channels // 2,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
stride=1,
|
||||
act='leaky',
|
||||
name=name + '_merge')
|
||||
|
||||
def forward(self, input):
|
||||
output1 = self.conv1_fpn(input[0])
|
||||
output2 = self.conv2_fpn(input[1])
|
||||
up2 = F.upsample(
|
||||
output2, size=paddle.shape(output1)[-2:], mode='nearest')
|
||||
output1 = paddle.add(output1, up2)
|
||||
output1 = self.conv3_fpn(output1)
|
||||
return output1, output2
|
||||
|
||||
|
||||
class SSH(nn.Layer):
|
||||
def __init__(self, in_channels, out_channels, name=None):
|
||||
super(SSH, self).__init__()
|
||||
assert out_channels % 4 == 0
|
||||
self.conv0_ssh = ConvBNLayer(
|
||||
in_channels,
|
||||
out_channels // 2,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
stride=1,
|
||||
act=None,
|
||||
name=name + 'ssh_conv3')
|
||||
self.conv1_ssh = ConvBNLayer(
|
||||
out_channels // 2,
|
||||
out_channels // 4,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
stride=1,
|
||||
act='leaky',
|
||||
name=name + 'ssh_conv5_1')
|
||||
self.conv2_ssh = ConvBNLayer(
|
||||
out_channels // 4,
|
||||
out_channels // 4,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
stride=1,
|
||||
act=None,
|
||||
name=name + 'ssh_conv5_2')
|
||||
self.conv3_ssh = ConvBNLayer(
|
||||
out_channels // 4,
|
||||
out_channels // 4,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
stride=1,
|
||||
act='leaky',
|
||||
name=name + 'ssh_conv7_1')
|
||||
self.conv4_ssh = ConvBNLayer(
|
||||
out_channels // 4,
|
||||
out_channels // 4,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
stride=1,
|
||||
act=None,
|
||||
name=name + 'ssh_conv7_2')
|
||||
|
||||
def forward(self, x):
|
||||
conv0 = self.conv0_ssh(x)
|
||||
conv1 = self.conv1_ssh(conv0)
|
||||
conv2 = self.conv2_ssh(conv1)
|
||||
conv3 = self.conv3_ssh(conv2)
|
||||
conv4 = self.conv4_ssh(conv3)
|
||||
concat = paddle.concat([conv0, conv2, conv4], axis=1)
|
||||
return F.relu(concat)
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class BlazeNeck(nn.Layer):
|
||||
def __init__(self, in_channel, neck_type="None", data_format='NCHW'):
|
||||
super(BlazeNeck, self).__init__()
|
||||
self.neck_type = neck_type
|
||||
self.reture_input = False
|
||||
self._out_channels = in_channel
|
||||
if self.neck_type == 'None':
|
||||
self.reture_input = True
|
||||
if "fpn" in self.neck_type:
|
||||
self.fpn = FPN(self._out_channels[0],
|
||||
self._out_channels[1],
|
||||
name='fpn')
|
||||
self._out_channels = [
|
||||
self._out_channels[0] // 2, self._out_channels[1] // 2
|
||||
]
|
||||
if "ssh" in self.neck_type:
|
||||
self.ssh1 = SSH(self._out_channels[0],
|
||||
self._out_channels[0],
|
||||
name='ssh1')
|
||||
self.ssh2 = SSH(self._out_channels[1],
|
||||
self._out_channels[1],
|
||||
name='ssh2')
|
||||
self._out_channels = [self._out_channels[0], self._out_channels[1]]
|
||||
|
||||
def forward(self, inputs):
|
||||
if self.reture_input:
|
||||
return inputs
|
||||
output1, output2 = None, None
|
||||
if "fpn" in self.neck_type:
|
||||
backout_4, backout_1 = inputs
|
||||
output1, output2 = self.fpn([backout_4, backout_1])
|
||||
if self.neck_type == "only_fpn":
|
||||
return [output1, output2]
|
||||
if self.neck_type == "only_ssh":
|
||||
output1, output2 = inputs
|
||||
feature1 = self.ssh1(output1)
|
||||
feature2 = self.ssh2(output2)
|
||||
return [feature1, feature2]
|
||||
|
||||
@property
|
||||
def out_shape(self):
|
||||
return [
|
||||
ShapeSpec(channels=c)
|
||||
for c in [self._out_channels[0], self._out_channels[1]]
|
||||
]
|
||||
426
paddle_detection/ppdet/modeling/necks/centernet_fpn.py
Normal file
426
paddle_detection/ppdet/modeling/necks/centernet_fpn.py
Normal file
@@ -0,0 +1,426 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
import math
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
from paddle import ParamAttr
|
||||
from paddle.nn.initializer import Uniform
|
||||
import paddle.nn.functional as F
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from ppdet.modeling.layers import ConvNormLayer
|
||||
from ppdet.modeling.backbones.hardnet import ConvLayer, HarDBlock
|
||||
from ..shape_spec import ShapeSpec
|
||||
|
||||
__all__ = ['CenterNetDLAFPN', 'CenterNetHarDNetFPN']
|
||||
|
||||
|
||||
# SGE attention
|
||||
class BasicConv(nn.Layer):
|
||||
def __init__(self,
|
||||
in_planes,
|
||||
out_planes,
|
||||
kernel_size,
|
||||
stride=1,
|
||||
padding=0,
|
||||
dilation=1,
|
||||
groups=1,
|
||||
relu=True,
|
||||
bn=True,
|
||||
bias_attr=False):
|
||||
super(BasicConv, self).__init__()
|
||||
self.out_channels = out_planes
|
||||
self.conv = nn.Conv2D(
|
||||
in_planes,
|
||||
out_planes,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
dilation=dilation,
|
||||
groups=groups,
|
||||
bias_attr=bias_attr)
|
||||
self.bn = nn.BatchNorm2D(
|
||||
out_planes,
|
||||
epsilon=1e-5,
|
||||
momentum=0.01,
|
||||
weight_attr=False,
|
||||
bias_attr=False) if bn else None
|
||||
self.relu = nn.ReLU() if relu else None
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
if self.bn is not None:
|
||||
x = self.bn(x)
|
||||
if self.relu is not None:
|
||||
x = self.relu(x)
|
||||
return x
|
||||
|
||||
|
||||
class ChannelPool(nn.Layer):
|
||||
def forward(self, x):
|
||||
return paddle.concat(
|
||||
(paddle.max(x, 1).unsqueeze(1), paddle.mean(x, 1).unsqueeze(1)),
|
||||
axis=1)
|
||||
|
||||
|
||||
class SpatialGate(nn.Layer):
|
||||
def __init__(self):
|
||||
super(SpatialGate, self).__init__()
|
||||
kernel_size = 7
|
||||
self.compress = ChannelPool()
|
||||
self.spatial = BasicConv(
|
||||
2,
|
||||
1,
|
||||
kernel_size,
|
||||
stride=1,
|
||||
padding=(kernel_size - 1) // 2,
|
||||
relu=False)
|
||||
|
||||
def forward(self, x):
|
||||
x_compress = self.compress(x)
|
||||
x_out = self.spatial(x_compress)
|
||||
scale = F.sigmoid(x_out) # broadcasting
|
||||
return x * scale
|
||||
|
||||
|
||||
def fill_up_weights(up):
|
||||
weight = up.weight.numpy()
|
||||
f = math.ceil(weight.shape[2] / 2)
|
||||
c = (2 * f - 1 - f % 2) / (2. * f)
|
||||
for i in range(weight.shape[2]):
|
||||
for j in range(weight.shape[3]):
|
||||
weight[0, 0, i, j] = \
|
||||
(1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
|
||||
for c in range(1, weight.shape[0]):
|
||||
weight[c, 0, :, :] = weight[0, 0, :, :]
|
||||
up.weight.set_value(weight)
|
||||
|
||||
|
||||
class IDAUp(nn.Layer):
|
||||
def __init__(self, ch_ins, ch_out, up_strides, dcn_v2=True):
|
||||
super(IDAUp, self).__init__()
|
||||
for i in range(1, len(ch_ins)):
|
||||
ch_in = ch_ins[i]
|
||||
up_s = int(up_strides[i])
|
||||
fan_in = ch_in * 3 * 3
|
||||
stdv = 1. / math.sqrt(fan_in)
|
||||
proj = nn.Sequential(
|
||||
ConvNormLayer(
|
||||
ch_in,
|
||||
ch_out,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
use_dcn=dcn_v2,
|
||||
bias_on=dcn_v2,
|
||||
norm_decay=None,
|
||||
dcn_lr_scale=1.,
|
||||
dcn_regularizer=None,
|
||||
initializer=Uniform(-stdv, stdv)),
|
||||
nn.ReLU())
|
||||
node = nn.Sequential(
|
||||
ConvNormLayer(
|
||||
ch_out,
|
||||
ch_out,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
use_dcn=dcn_v2,
|
||||
bias_on=dcn_v2,
|
||||
norm_decay=None,
|
||||
dcn_lr_scale=1.,
|
||||
dcn_regularizer=None,
|
||||
initializer=Uniform(-stdv, stdv)),
|
||||
nn.ReLU())
|
||||
|
||||
kernel_size = up_s * 2
|
||||
fan_in = ch_out * kernel_size * kernel_size
|
||||
stdv = 1. / math.sqrt(fan_in)
|
||||
up = nn.Conv2DTranspose(
|
||||
ch_out,
|
||||
ch_out,
|
||||
kernel_size=up_s * 2,
|
||||
stride=up_s,
|
||||
padding=up_s // 2,
|
||||
groups=ch_out,
|
||||
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)),
|
||||
bias_attr=False)
|
||||
fill_up_weights(up)
|
||||
setattr(self, 'proj_' + str(i), proj)
|
||||
setattr(self, 'up_' + str(i), up)
|
||||
setattr(self, 'node_' + str(i), node)
|
||||
|
||||
def forward(self, inputs, start_level, end_level):
|
||||
for i in range(start_level + 1, end_level):
|
||||
upsample = getattr(self, 'up_' + str(i - start_level))
|
||||
project = getattr(self, 'proj_' + str(i - start_level))
|
||||
inputs[i] = project(inputs[i])
|
||||
inputs[i] = upsample(inputs[i])
|
||||
node = getattr(self, 'node_' + str(i - start_level))
|
||||
inputs[i] = node(paddle.add(inputs[i], inputs[i - 1]))
|
||||
return inputs
|
||||
|
||||
|
||||
class DLAUp(nn.Layer):
|
||||
def __init__(self, start_level, channels, scales, ch_in=None, dcn_v2=True):
|
||||
super(DLAUp, self).__init__()
|
||||
self.start_level = start_level
|
||||
if ch_in is None:
|
||||
ch_in = channels
|
||||
self.channels = channels
|
||||
channels = list(channels)
|
||||
scales = np.array(scales, dtype=int)
|
||||
for i in range(len(channels) - 1):
|
||||
j = -i - 2
|
||||
setattr(
|
||||
self,
|
||||
'ida_{}'.format(i),
|
||||
IDAUp(
|
||||
ch_in[j:],
|
||||
channels[j],
|
||||
scales[j:] // scales[j],
|
||||
dcn_v2=dcn_v2))
|
||||
scales[j + 1:] = scales[j]
|
||||
ch_in[j + 1:] = [channels[j] for _ in channels[j + 1:]]
|
||||
|
||||
def forward(self, inputs):
|
||||
out = [inputs[-1]] # start with 32
|
||||
for i in range(len(inputs) - self.start_level - 1):
|
||||
ida = getattr(self, 'ida_{}'.format(i))
|
||||
outputs = ida(inputs, len(inputs) - i - 2, len(inputs))
|
||||
out.insert(0, outputs[-1])
|
||||
return out
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class CenterNetDLAFPN(nn.Layer):
|
||||
"""
|
||||
Args:
|
||||
in_channels (list): number of input feature channels from backbone.
|
||||
[16, 32, 64, 128, 256, 512] by default, means the channels of DLA-34
|
||||
down_ratio (int): the down ratio from images to heatmap, 4 by default
|
||||
last_level (int): the last level of input feature fed into the upsamplng block
|
||||
out_channel (int): the channel of the output feature, 0 by default means
|
||||
the channel of the input feature whose down ratio is `down_ratio`
|
||||
first_level (None): the first level of input feature fed into the upsamplng block.
|
||||
if None, the first level stands for logs(down_ratio)
|
||||
dcn_v2 (bool): whether use the DCNv2, True by default
|
||||
with_sge (bool): whether use SGE attention, False by default
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
down_ratio=4,
|
||||
last_level=5,
|
||||
out_channel=0,
|
||||
first_level=None,
|
||||
dcn_v2=True,
|
||||
with_sge=False):
|
||||
super(CenterNetDLAFPN, self).__init__()
|
||||
self.first_level = int(np.log2(
|
||||
down_ratio)) if first_level is None else first_level
|
||||
assert self.first_level >= 0, "first level in CenterNetDLAFPN should be greater or equal to 0, but received {}".format(
|
||||
self.first_level)
|
||||
self.down_ratio = down_ratio
|
||||
self.last_level = last_level
|
||||
scales = [2**i for i in range(len(in_channels[self.first_level:]))]
|
||||
self.dla_up = DLAUp(
|
||||
self.first_level,
|
||||
in_channels[self.first_level:],
|
||||
scales,
|
||||
dcn_v2=dcn_v2)
|
||||
self.out_channel = out_channel
|
||||
if out_channel == 0:
|
||||
self.out_channel = in_channels[self.first_level]
|
||||
self.ida_up = IDAUp(
|
||||
in_channels[self.first_level:self.last_level],
|
||||
self.out_channel,
|
||||
[2**i for i in range(self.last_level - self.first_level)],
|
||||
dcn_v2=dcn_v2)
|
||||
|
||||
self.with_sge = with_sge
|
||||
if self.with_sge:
|
||||
self.sge_attention = SpatialGate()
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, input_shape):
|
||||
return {'in_channels': [i.channels for i in input_shape]}
|
||||
|
||||
def forward(self, body_feats):
|
||||
|
||||
inputs = [body_feats[i] for i in range(len(body_feats))]
|
||||
|
||||
dla_up_feats = self.dla_up(inputs)
|
||||
|
||||
ida_up_feats = []
|
||||
for i in range(self.last_level - self.first_level):
|
||||
ida_up_feats.append(dla_up_feats[i].clone())
|
||||
|
||||
self.ida_up(ida_up_feats, 0, len(ida_up_feats))
|
||||
|
||||
feat = ida_up_feats[-1]
|
||||
if self.with_sge:
|
||||
feat = self.sge_attention(feat)
|
||||
if self.down_ratio != 4:
|
||||
feat = F.interpolate(
|
||||
feat,
|
||||
scale_factor=self.down_ratio // 4,
|
||||
mode="bilinear",
|
||||
align_corners=True)
|
||||
return feat
|
||||
|
||||
@property
|
||||
def out_shape(self):
|
||||
return [ShapeSpec(channels=self.out_channel, stride=self.down_ratio)]
|
||||
|
||||
|
||||
class TransitionUp(nn.Layer):
|
||||
def __init__(self, in_channels, out_channels):
|
||||
super().__init__()
|
||||
|
||||
def forward(self, x, skip):
|
||||
w, h = skip.shape[2], skip.shape[3]
|
||||
out = F.interpolate(x, size=(w, h), mode="bilinear", align_corners=True)
|
||||
out = paddle.concat([out, skip], 1)
|
||||
return out
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class CenterNetHarDNetFPN(nn.Layer):
|
||||
"""
|
||||
Args:
|
||||
in_channels (list): number of input feature channels from backbone.
|
||||
[96, 214, 458, 784] by default, means the channels of HarDNet85
|
||||
num_layers (int): HarDNet laters, 85 by default
|
||||
down_ratio (int): the down ratio from images to heatmap, 4 by default
|
||||
first_level (int|None): the first level of input feature fed into the upsamplng block.
|
||||
if None, the first level stands for logs(down_ratio) - 1
|
||||
|
||||
last_level (int): the last level of input feature fed into the upsamplng block
|
||||
out_channel (int): the channel of the output feature, 0 by default means
|
||||
the channel of the input feature whose down ratio is `down_ratio`
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
num_layers=85,
|
||||
down_ratio=4,
|
||||
first_level=None,
|
||||
last_level=4,
|
||||
out_channel=0):
|
||||
super(CenterNetHarDNetFPN, self).__init__()
|
||||
self.first_level = int(np.log2(
|
||||
down_ratio)) - 1 if first_level is None else first_level
|
||||
assert self.first_level >= 0, "first level in CenterNetDLAFPN should be greater or equal to 0, but received {}".format(
|
||||
self.first_level)
|
||||
self.down_ratio = down_ratio
|
||||
self.last_level = last_level
|
||||
self.last_pool = nn.AvgPool2D(kernel_size=2, stride=2)
|
||||
|
||||
assert num_layers in [68, 85], "HarDNet-{} not support.".format(
|
||||
num_layers)
|
||||
if num_layers == 85:
|
||||
self.last_proj = ConvLayer(784, 256, kernel_size=1)
|
||||
self.last_blk = HarDBlock(768, 80, 1.7, 8)
|
||||
self.skip_nodes = [1, 3, 8, 13]
|
||||
self.SC = [32, 32, 0]
|
||||
gr = [64, 48, 28]
|
||||
layers = [8, 8, 4]
|
||||
ch_list2 = [224 + self.SC[0], 160 + self.SC[1], 96 + self.SC[2]]
|
||||
channels = [96, 214, 458, 784]
|
||||
self.skip_lv = 3
|
||||
|
||||
elif num_layers == 68:
|
||||
self.last_proj = ConvLayer(654, 192, kernel_size=1)
|
||||
self.last_blk = HarDBlock(576, 72, 1.7, 8)
|
||||
self.skip_nodes = [1, 3, 8, 11]
|
||||
self.SC = [32, 32, 0]
|
||||
gr = [48, 32, 20]
|
||||
layers = [8, 8, 4]
|
||||
ch_list2 = [224 + self.SC[0], 96 + self.SC[1], 64 + self.SC[2]]
|
||||
channels = [64, 124, 328, 654]
|
||||
self.skip_lv = 2
|
||||
|
||||
self.transUpBlocks = nn.LayerList([])
|
||||
self.denseBlocksUp = nn.LayerList([])
|
||||
self.conv1x1_up = nn.LayerList([])
|
||||
self.avg9x9 = nn.AvgPool2D(kernel_size=(9, 9), stride=1, padding=(4, 4))
|
||||
prev_ch = self.last_blk.get_out_ch()
|
||||
|
||||
for i in range(3):
|
||||
skip_ch = channels[3 - i]
|
||||
self.transUpBlocks.append(TransitionUp(prev_ch, prev_ch))
|
||||
if i < self.skip_lv:
|
||||
cur_ch = prev_ch + skip_ch
|
||||
else:
|
||||
cur_ch = prev_ch
|
||||
self.conv1x1_up.append(
|
||||
ConvLayer(
|
||||
cur_ch, ch_list2[i], kernel_size=1))
|
||||
cur_ch = ch_list2[i]
|
||||
cur_ch -= self.SC[i]
|
||||
cur_ch *= 3
|
||||
|
||||
blk = HarDBlock(cur_ch, gr[i], 1.7, layers[i])
|
||||
self.denseBlocksUp.append(blk)
|
||||
prev_ch = blk.get_out_ch()
|
||||
|
||||
prev_ch += self.SC[0] + self.SC[1] + self.SC[2]
|
||||
self.out_channel = prev_ch
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, input_shape):
|
||||
return {'in_channels': [i.channels for i in input_shape]}
|
||||
|
||||
def forward(self, body_feats):
|
||||
x = body_feats[-1]
|
||||
x_sc = []
|
||||
x = self.last_proj(x)
|
||||
x = self.last_pool(x)
|
||||
x2 = self.avg9x9(x)
|
||||
x3 = x / (x.sum((2, 3), keepdim=True) + 0.1)
|
||||
x = paddle.concat([x, x2, x3], 1)
|
||||
x = self.last_blk(x)
|
||||
|
||||
for i in range(3):
|
||||
skip_x = body_feats[3 - i]
|
||||
x_up = self.transUpBlocks[i](x, skip_x)
|
||||
x_ch = self.conv1x1_up[i](x_up)
|
||||
if self.SC[i] > 0:
|
||||
end = x_ch.shape[1]
|
||||
new_st = end - self.SC[i]
|
||||
x_sc.append(x_ch[:, new_st:, :, :])
|
||||
x_ch = x_ch[:, :new_st, :, :]
|
||||
x2 = self.avg9x9(x_ch)
|
||||
x3 = x_ch / (x_ch.sum((2, 3), keepdim=True) + 0.1)
|
||||
x_new = paddle.concat([x_ch, x2, x3], 1)
|
||||
x = self.denseBlocksUp[i](x_new)
|
||||
|
||||
scs = [x]
|
||||
for i in range(3):
|
||||
if self.SC[i] > 0:
|
||||
scs.insert(
|
||||
0,
|
||||
F.interpolate(
|
||||
x_sc[i],
|
||||
size=(x.shape[2], x.shape[3]),
|
||||
mode="bilinear",
|
||||
align_corners=True))
|
||||
neck_feat = paddle.concat(scs, 1)
|
||||
return neck_feat
|
||||
|
||||
@property
|
||||
def out_shape(self):
|
||||
return [ShapeSpec(channels=self.out_channel, stride=self.down_ratio)]
|
||||
122
paddle_detection/ppdet/modeling/necks/channel_mapper.py
Normal file
122
paddle_detection/ppdet/modeling/necks/channel_mapper.py
Normal file
@@ -0,0 +1,122 @@
|
||||
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
this code is base on mmdet: git@github.com:open-mmlab/mmdetection.git
|
||||
"""
|
||||
import paddle.nn as nn
|
||||
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from ..backbones.hrnet import ConvNormLayer
|
||||
from ..shape_spec import ShapeSpec
|
||||
from ..initializer import xavier_uniform_, constant_
|
||||
|
||||
__all__ = ['ChannelMapper']
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class ChannelMapper(nn.Layer):
|
||||
"""Channel Mapper to reduce/increase channels of backbone features.
|
||||
|
||||
This is used to reduce/increase channels of backbone features.
|
||||
|
||||
Args:
|
||||
in_channels (List[int]): Number of input channels per scale.
|
||||
out_channels (int): Number of output channels (used at each scale).
|
||||
kernel_size (int, optional): kernel_size for reducing channels (used
|
||||
at each scale). Default: 3.
|
||||
conv_cfg (dict, optional): Config dict for convolution layer.
|
||||
Default: None.
|
||||
norm_cfg (dict, optional): Config dict for normalization layer.
|
||||
Default: None.
|
||||
act_cfg (dict, optional): Config dict for activation layer in
|
||||
ConvModule. Default: dict(type='ReLU').
|
||||
num_outs (int, optional): Number of output feature maps. There
|
||||
would be extra_convs when num_outs larger than the length
|
||||
of in_channels.
|
||||
init_cfg (dict or list[dict], optional): Initialization config dict.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size=3,
|
||||
norm_type="gn",
|
||||
norm_groups=32,
|
||||
act='relu',
|
||||
num_outs=None,
|
||||
init_cfg=dict(
|
||||
type='Xavier', layer='Conv2d', distribution='uniform')):
|
||||
super(ChannelMapper, self).__init__()
|
||||
assert isinstance(in_channels, list)
|
||||
self.extra_convs = None
|
||||
if num_outs is None:
|
||||
num_outs = len(in_channels)
|
||||
self.convs = nn.LayerList()
|
||||
for in_channel in in_channels:
|
||||
self.convs.append(
|
||||
ConvNormLayer(
|
||||
ch_in=in_channel,
|
||||
ch_out=out_channels,
|
||||
filter_size=kernel_size,
|
||||
norm_type='gn',
|
||||
norm_groups=32,
|
||||
act=act))
|
||||
|
||||
if num_outs > len(in_channels):
|
||||
self.extra_convs = nn.LayerList()
|
||||
for i in range(len(in_channels), num_outs):
|
||||
if i == len(in_channels):
|
||||
in_channel = in_channels[-1]
|
||||
else:
|
||||
in_channel = out_channels
|
||||
self.extra_convs.append(
|
||||
ConvNormLayer(
|
||||
ch_in=in_channel,
|
||||
ch_out=out_channels,
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
norm_type='gn',
|
||||
norm_groups=32,
|
||||
act=act))
|
||||
self.init_weights()
|
||||
|
||||
def forward(self, inputs):
|
||||
"""Forward function."""
|
||||
assert len(inputs) == len(self.convs)
|
||||
outs = [self.convs[i](inputs[i]) for i in range(len(inputs))]
|
||||
if self.extra_convs:
|
||||
for i in range(len(self.extra_convs)):
|
||||
if i == 0:
|
||||
outs.append(self.extra_convs[0](inputs[-1]))
|
||||
else:
|
||||
outs.append(self.extra_convs[i](outs[-1]))
|
||||
return tuple(outs)
|
||||
|
||||
@property
|
||||
def out_shape(self):
|
||||
return [
|
||||
ShapeSpec(
|
||||
channels=self.out_channel, stride=1. / s)
|
||||
for s in self.spatial_scales
|
||||
]
|
||||
|
||||
def init_weights(self):
|
||||
"""Initialize the transformer weights."""
|
||||
for p in self.parameters():
|
||||
if p.rank() > 1:
|
||||
xavier_uniform_(p)
|
||||
if hasattr(p, 'bias') and p.bias is not None:
|
||||
constant_(p.bais)
|
||||
254
paddle_detection/ppdet/modeling/necks/clrnet_fpn.py
Normal file
254
paddle_detection/ppdet/modeling/necks/clrnet_fpn.py
Normal file
@@ -0,0 +1,254 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle import ParamAttr
|
||||
from paddle.nn.initializer import XavierUniform
|
||||
from ppdet.modeling.initializer import kaiming_normal_, constant_
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from ppdet.modeling.layers import ConvNormLayer
|
||||
from ppdet.modeling.shape_spec import ShapeSpec
|
||||
|
||||
__all__ = ['CLRFPN']
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class CLRFPN(nn.Layer):
|
||||
"""
|
||||
Feature Pyramid Network, see https://arxiv.org/abs/1612.03144
|
||||
Args:
|
||||
in_channels (list[int]): input channels of each level which can be
|
||||
derived from the output shape of backbone by from_config
|
||||
out_channel (int): output channel of each level
|
||||
spatial_scales (list[float]): the spatial scales between input feature
|
||||
maps and original input image which can be derived from the output
|
||||
shape of backbone by from_config
|
||||
has_extra_convs (bool): whether to add extra conv to the last level.
|
||||
default False
|
||||
extra_stage (int): the number of extra stages added to the last level.
|
||||
default 1
|
||||
use_c5 (bool): Whether to use c5 as the input of extra stage,
|
||||
otherwise p5 is used. default True
|
||||
norm_type (string|None): The normalization type in FPN module. If
|
||||
norm_type is None, norm will not be used after conv and if
|
||||
norm_type is string, bn, gn, sync_bn are available. default None
|
||||
norm_decay (float): weight decay for normalization layer weights.
|
||||
default 0.
|
||||
freeze_norm (bool): whether to freeze normalization layer.
|
||||
default False
|
||||
relu_before_extra_convs (bool): whether to add relu before extra convs.
|
||||
default False
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channel,
|
||||
spatial_scales=[0.25, 0.125, 0.0625, 0.03125],
|
||||
has_extra_convs=False,
|
||||
extra_stage=1,
|
||||
use_c5=True,
|
||||
norm_type=None,
|
||||
norm_decay=0.,
|
||||
freeze_norm=False,
|
||||
relu_before_extra_convs=True):
|
||||
super(CLRFPN, self).__init__()
|
||||
self.out_channel = out_channel
|
||||
for s in range(extra_stage):
|
||||
spatial_scales = spatial_scales + [spatial_scales[-1] / 2.]
|
||||
self.spatial_scales = spatial_scales
|
||||
self.has_extra_convs = has_extra_convs
|
||||
self.extra_stage = extra_stage
|
||||
self.use_c5 = use_c5
|
||||
self.relu_before_extra_convs = relu_before_extra_convs
|
||||
self.norm_type = norm_type
|
||||
self.norm_decay = norm_decay
|
||||
self.freeze_norm = freeze_norm
|
||||
self.in_channels = in_channels
|
||||
self.lateral_convs = []
|
||||
self.fpn_convs = []
|
||||
fan = out_channel * 3 * 3
|
||||
|
||||
# stage index 0,1,2,3 stands for res2,res3,res4,res5 on ResNet Backbone
|
||||
# 0 <= st_stage < ed_stage <= 3
|
||||
st_stage = 4 - len(in_channels)
|
||||
ed_stage = st_stage + len(in_channels) - 1
|
||||
|
||||
for i in range(st_stage, ed_stage + 1):
|
||||
# if i == 3:
|
||||
# lateral_name = 'fpn_inner_res5_sum'
|
||||
# else:
|
||||
# lateral_name = 'fpn_inner_res{}_sum_lateral'.format(i + 2)
|
||||
lateral_name = "lateral_convs.{}.conv".format(i - 1)
|
||||
in_c = in_channels[i - st_stage]
|
||||
if self.norm_type is not None:
|
||||
lateral = self.add_sublayer(
|
||||
lateral_name,
|
||||
ConvNormLayer(
|
||||
ch_in=in_c,
|
||||
ch_out=out_channel,
|
||||
filter_size=1,
|
||||
stride=1,
|
||||
norm_type=self.norm_type,
|
||||
norm_decay=self.norm_decay,
|
||||
freeze_norm=self.freeze_norm,
|
||||
initializer=XavierUniform(fan_out=in_c)))
|
||||
else:
|
||||
lateral = self.add_sublayer(
|
||||
lateral_name,
|
||||
nn.Conv2D(
|
||||
in_channels=in_c,
|
||||
out_channels=out_channel,
|
||||
kernel_size=1,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=XavierUniform(fan_out=in_c))))
|
||||
self.lateral_convs.append(lateral)
|
||||
|
||||
fpn_name = "fpn_convs.{}.conv".format(i - 1)
|
||||
if self.norm_type is not None:
|
||||
fpn_conv = self.add_sublayer(
|
||||
fpn_name,
|
||||
ConvNormLayer(
|
||||
ch_in=out_channel,
|
||||
ch_out=out_channel,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
norm_type=self.norm_type,
|
||||
norm_decay=self.norm_decay,
|
||||
freeze_norm=self.freeze_norm,
|
||||
initializer=XavierUniform(fan_out=fan)))
|
||||
else:
|
||||
fpn_conv = self.add_sublayer(
|
||||
fpn_name,
|
||||
nn.Conv2D(
|
||||
in_channels=out_channel,
|
||||
out_channels=out_channel,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=XavierUniform(fan_out=fan))))
|
||||
self.fpn_convs.append(fpn_conv)
|
||||
|
||||
# add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
|
||||
if self.has_extra_convs:
|
||||
for i in range(self.extra_stage):
|
||||
lvl = ed_stage + 1 + i
|
||||
if i == 0 and self.use_c5:
|
||||
in_c = in_channels[-1]
|
||||
else:
|
||||
in_c = out_channel
|
||||
extra_fpn_name = 'fpn_{}'.format(lvl + 2)
|
||||
if self.norm_type is not None:
|
||||
extra_fpn_conv = self.add_sublayer(
|
||||
extra_fpn_name,
|
||||
ConvNormLayer(
|
||||
ch_in=in_c,
|
||||
ch_out=out_channel,
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
norm_type=self.norm_type,
|
||||
norm_decay=self.norm_decay,
|
||||
freeze_norm=self.freeze_norm,
|
||||
initializer=XavierUniform(fan_out=fan)))
|
||||
else:
|
||||
extra_fpn_conv = self.add_sublayer(
|
||||
extra_fpn_name,
|
||||
nn.Conv2D(
|
||||
in_channels=in_c,
|
||||
out_channels=out_channel,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
padding=1,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=XavierUniform(fan_out=fan))))
|
||||
self.fpn_convs.append(extra_fpn_conv)
|
||||
self.init_weights()
|
||||
|
||||
def init_weights(self):
|
||||
for m in self.lateral_convs:
|
||||
if isinstance(m, (nn.Conv1D, nn.Conv2D)):
|
||||
kaiming_normal_(
|
||||
m.weight, a=0, mode='fan_out', nonlinearity='relu')
|
||||
if m.bias is not None:
|
||||
constant_(m.bias, value=0.)
|
||||
elif isinstance(m, (nn.BatchNorm1D, nn.BatchNorm2D)):
|
||||
constant_(m.weight, value=1)
|
||||
constant_(m.bias, value=0)
|
||||
for m in self.fpn_convs:
|
||||
if isinstance(m, (nn.Conv1D, nn.Conv2D)):
|
||||
kaiming_normal_(
|
||||
m.weight, a=0, mode='fan_out', nonlinearity='relu')
|
||||
if m.bias is not None:
|
||||
constant_(m.bias, value=0.)
|
||||
elif isinstance(m, (nn.BatchNorm1D, nn.BatchNorm2D)):
|
||||
constant_(m.weight, value=1)
|
||||
constant_(m.bias, value=0)
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, input_shape):
|
||||
return {}
|
||||
|
||||
def forward(self, body_feats):
|
||||
laterals = []
|
||||
if len(body_feats) > len(self.in_channels):
|
||||
for _ in range(len(body_feats) - len(self.in_channels)):
|
||||
del body_feats[0]
|
||||
num_levels = len(body_feats)
|
||||
# print("body_feats",num_levels)
|
||||
for i in range(num_levels):
|
||||
laterals.append(self.lateral_convs[i](body_feats[i]))
|
||||
|
||||
for i in range(1, num_levels):
|
||||
lvl = num_levels - i
|
||||
upsample = F.interpolate(
|
||||
laterals[lvl],
|
||||
scale_factor=2.,
|
||||
mode='nearest', )
|
||||
laterals[lvl - 1] += upsample
|
||||
|
||||
fpn_output = []
|
||||
for lvl in range(num_levels):
|
||||
fpn_output.append(self.fpn_convs[lvl](laterals[lvl]))
|
||||
|
||||
if self.extra_stage > 0:
|
||||
# use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN)
|
||||
if not self.has_extra_convs:
|
||||
assert self.extra_stage == 1, 'extra_stage should be 1 if FPN has not extra convs'
|
||||
fpn_output.append(F.max_pool2d(fpn_output[-1], 1, stride=2))
|
||||
# add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
|
||||
else:
|
||||
if self.use_c5:
|
||||
extra_source = body_feats[-1]
|
||||
else:
|
||||
extra_source = fpn_output[-1]
|
||||
fpn_output.append(self.fpn_convs[num_levels](extra_source))
|
||||
|
||||
for i in range(1, self.extra_stage):
|
||||
if self.relu_before_extra_convs:
|
||||
fpn_output.append(self.fpn_convs[num_levels + i](F.relu(
|
||||
fpn_output[-1])))
|
||||
else:
|
||||
fpn_output.append(self.fpn_convs[num_levels + i](
|
||||
fpn_output[-1]))
|
||||
return fpn_output
|
||||
|
||||
@property
|
||||
def out_shape(self):
|
||||
return [
|
||||
ShapeSpec(
|
||||
channels=self.out_channel, stride=1. / s)
|
||||
for s in self.spatial_scales
|
||||
]
|
||||
363
paddle_detection/ppdet/modeling/necks/csp_pan.py
Normal file
363
paddle_detection/ppdet/modeling/necks/csp_pan.py
Normal file
@@ -0,0 +1,363 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# The code is based on:
|
||||
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/necks/yolox_pafpn.py
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle import ParamAttr
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from ..shape_spec import ShapeSpec
|
||||
|
||||
__all__ = ['CSPPAN']
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channel=96,
|
||||
out_channel=96,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act='leaky_relu'):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
initializer = nn.initializer.KaimingUniform()
|
||||
self.conv = nn.Conv2D(
|
||||
in_channels=in_channel,
|
||||
out_channels=out_channel,
|
||||
kernel_size=kernel_size,
|
||||
groups=groups,
|
||||
padding=(kernel_size - 1) // 2,
|
||||
stride=stride,
|
||||
weight_attr=ParamAttr(initializer=initializer),
|
||||
bias_attr=False)
|
||||
self.bn = nn.BatchNorm2D(out_channel)
|
||||
if act == "hard_swish":
|
||||
act = 'hardswish'
|
||||
self.act = act
|
||||
|
||||
def forward(self, x):
|
||||
x = self.bn(self.conv(x))
|
||||
if self.act:
|
||||
x = getattr(F, self.act)(x)
|
||||
return x
|
||||
|
||||
|
||||
class DPModule(nn.Layer):
|
||||
"""
|
||||
Depth-wise and point-wise module.
|
||||
Args:
|
||||
in_channel (int): The input channels of this Module.
|
||||
out_channel (int): The output channels of this Module.
|
||||
kernel_size (int): The conv2d kernel size of this Module.
|
||||
stride (int): The conv2d's stride of this Module.
|
||||
act (str): The activation function of this Module,
|
||||
Now support `leaky_relu` and `hard_swish`.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channel=96,
|
||||
out_channel=96,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
act='leaky_relu',
|
||||
use_act_in_out=True):
|
||||
super(DPModule, self).__init__()
|
||||
initializer = nn.initializer.KaimingUniform()
|
||||
self.use_act_in_out = use_act_in_out
|
||||
self.dwconv = nn.Conv2D(
|
||||
in_channels=in_channel,
|
||||
out_channels=out_channel,
|
||||
kernel_size=kernel_size,
|
||||
groups=out_channel,
|
||||
padding=(kernel_size - 1) // 2,
|
||||
stride=stride,
|
||||
weight_attr=ParamAttr(initializer=initializer),
|
||||
bias_attr=False)
|
||||
self.bn1 = nn.BatchNorm2D(out_channel)
|
||||
self.pwconv = nn.Conv2D(
|
||||
in_channels=out_channel,
|
||||
out_channels=out_channel,
|
||||
kernel_size=1,
|
||||
groups=1,
|
||||
padding=0,
|
||||
weight_attr=ParamAttr(initializer=initializer),
|
||||
bias_attr=False)
|
||||
self.bn2 = nn.BatchNorm2D(out_channel)
|
||||
if act == "hard_swish":
|
||||
act = 'hardswish'
|
||||
self.act = act
|
||||
|
||||
def forward(self, x):
|
||||
x = self.bn1(self.dwconv(x))
|
||||
if self.act:
|
||||
x = getattr(F, self.act)(x)
|
||||
x = self.bn2(self.pwconv(x))
|
||||
if self.use_act_in_out and self.act:
|
||||
x = getattr(F, self.act)(x)
|
||||
return x
|
||||
|
||||
|
||||
class DarknetBottleneck(nn.Layer):
|
||||
"""The basic bottleneck block used in Darknet.
|
||||
|
||||
Each Block consists of two ConvModules and the input is added to the
|
||||
final output. Each ConvModule is composed of Conv, BN, and act.
|
||||
The first convLayer has filter size of 1x1 and the second one has the
|
||||
filter size of 3x3.
|
||||
|
||||
Args:
|
||||
in_channels (int): The input channels of this Module.
|
||||
out_channels (int): The output channels of this Module.
|
||||
expansion (int): The kernel size of the convolution. Default: 0.5
|
||||
add_identity (bool): Whether to add identity to the out.
|
||||
Default: True
|
||||
use_depthwise (bool): Whether to use depthwise separable convolution.
|
||||
Default: False
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size=3,
|
||||
expansion=0.5,
|
||||
add_identity=True,
|
||||
use_depthwise=False,
|
||||
act="leaky_relu"):
|
||||
super(DarknetBottleneck, self).__init__()
|
||||
hidden_channels = int(out_channels * expansion)
|
||||
conv_func = DPModule if use_depthwise else ConvBNLayer
|
||||
self.conv1 = ConvBNLayer(
|
||||
in_channel=in_channels,
|
||||
out_channel=hidden_channels,
|
||||
kernel_size=1,
|
||||
act=act)
|
||||
self.conv2 = conv_func(
|
||||
in_channel=hidden_channels,
|
||||
out_channel=out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=1,
|
||||
act=act)
|
||||
self.add_identity = \
|
||||
add_identity and in_channels == out_channels
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
out = self.conv1(x)
|
||||
out = self.conv2(out)
|
||||
|
||||
if self.add_identity:
|
||||
return out + identity
|
||||
else:
|
||||
return out
|
||||
|
||||
|
||||
class CSPLayer(nn.Layer):
|
||||
"""Cross Stage Partial Layer.
|
||||
|
||||
Args:
|
||||
in_channels (int): The input channels of the CSP layer.
|
||||
out_channels (int): The output channels of the CSP layer.
|
||||
expand_ratio (float): Ratio to adjust the number of channels of the
|
||||
hidden layer. Default: 0.5
|
||||
num_blocks (int): Number of blocks. Default: 1
|
||||
add_identity (bool): Whether to add identity in blocks.
|
||||
Default: True
|
||||
use_depthwise (bool): Whether to depthwise separable convolution in
|
||||
blocks. Default: False
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size=3,
|
||||
expand_ratio=0.5,
|
||||
num_blocks=1,
|
||||
add_identity=True,
|
||||
use_depthwise=False,
|
||||
act="leaky_relu"):
|
||||
super().__init__()
|
||||
mid_channels = int(out_channels * expand_ratio)
|
||||
self.main_conv = ConvBNLayer(in_channels, mid_channels, 1, act=act)
|
||||
self.short_conv = ConvBNLayer(in_channels, mid_channels, 1, act=act)
|
||||
self.final_conv = ConvBNLayer(
|
||||
2 * mid_channels, out_channels, 1, act=act)
|
||||
|
||||
self.blocks = nn.Sequential(* [
|
||||
DarknetBottleneck(
|
||||
mid_channels,
|
||||
mid_channels,
|
||||
kernel_size,
|
||||
1.0,
|
||||
add_identity,
|
||||
use_depthwise,
|
||||
act=act) for _ in range(num_blocks)
|
||||
])
|
||||
|
||||
def forward(self, x):
|
||||
x_short = self.short_conv(x)
|
||||
|
||||
x_main = self.main_conv(x)
|
||||
x_main = self.blocks(x_main)
|
||||
|
||||
x_final = paddle.concat((x_main, x_short), axis=1)
|
||||
return self.final_conv(x_final)
|
||||
|
||||
|
||||
class Channel_T(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels=[116, 232, 464],
|
||||
out_channels=96,
|
||||
act="leaky_relu"):
|
||||
super(Channel_T, self).__init__()
|
||||
self.convs = nn.LayerList()
|
||||
for i in range(len(in_channels)):
|
||||
self.convs.append(
|
||||
ConvBNLayer(
|
||||
in_channels[i], out_channels, 1, act=act))
|
||||
|
||||
def forward(self, x):
|
||||
outs = [self.convs[i](x[i]) for i in range(len(x))]
|
||||
return outs
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class CSPPAN(nn.Layer):
|
||||
"""Path Aggregation Network with CSP module.
|
||||
|
||||
Args:
|
||||
in_channels (List[int]): Number of input channels per scale.
|
||||
out_channels (int): Number of output channels (used at each scale)
|
||||
kernel_size (int): The conv2d kernel size of this Module.
|
||||
num_features (int): Number of output features of CSPPAN module.
|
||||
num_csp_blocks (int): Number of bottlenecks in CSPLayer. Default: 1
|
||||
use_depthwise (bool): Whether to depthwise separable convolution in
|
||||
blocks. Default: True
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size=5,
|
||||
num_features=3,
|
||||
num_csp_blocks=1,
|
||||
use_depthwise=True,
|
||||
act='hard_swish',
|
||||
spatial_scales=[0.125, 0.0625, 0.03125]):
|
||||
super(CSPPAN, self).__init__()
|
||||
self.conv_t = Channel_T(in_channels, out_channels, act=act)
|
||||
in_channels = [out_channels] * len(spatial_scales)
|
||||
self.in_channels = in_channels
|
||||
self.out_channels = out_channels
|
||||
self.spatial_scales = spatial_scales
|
||||
self.num_features = num_features
|
||||
conv_func = DPModule if use_depthwise else ConvBNLayer
|
||||
|
||||
if self.num_features == 4:
|
||||
self.first_top_conv = conv_func(
|
||||
in_channels[0], in_channels[0], kernel_size, stride=2, act=act)
|
||||
self.second_top_conv = conv_func(
|
||||
in_channels[0], in_channels[0], kernel_size, stride=2, act=act)
|
||||
self.spatial_scales.append(self.spatial_scales[-1] / 2)
|
||||
|
||||
# build top-down blocks
|
||||
self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
|
||||
self.top_down_blocks = nn.LayerList()
|
||||
for idx in range(len(in_channels) - 1, 0, -1):
|
||||
self.top_down_blocks.append(
|
||||
CSPLayer(
|
||||
in_channels[idx - 1] * 2,
|
||||
in_channels[idx - 1],
|
||||
kernel_size=kernel_size,
|
||||
num_blocks=num_csp_blocks,
|
||||
add_identity=False,
|
||||
use_depthwise=use_depthwise,
|
||||
act=act))
|
||||
|
||||
# build bottom-up blocks
|
||||
self.downsamples = nn.LayerList()
|
||||
self.bottom_up_blocks = nn.LayerList()
|
||||
for idx in range(len(in_channels) - 1):
|
||||
self.downsamples.append(
|
||||
conv_func(
|
||||
in_channels[idx],
|
||||
in_channels[idx],
|
||||
kernel_size=kernel_size,
|
||||
stride=2,
|
||||
act=act))
|
||||
self.bottom_up_blocks.append(
|
||||
CSPLayer(
|
||||
in_channels[idx] * 2,
|
||||
in_channels[idx + 1],
|
||||
kernel_size=kernel_size,
|
||||
num_blocks=num_csp_blocks,
|
||||
add_identity=False,
|
||||
use_depthwise=use_depthwise,
|
||||
act=act))
|
||||
|
||||
def forward(self, inputs):
|
||||
"""
|
||||
Args:
|
||||
inputs (tuple[Tensor]): input features.
|
||||
|
||||
Returns:
|
||||
tuple[Tensor]: CSPPAN features.
|
||||
"""
|
||||
assert len(inputs) == len(self.in_channels)
|
||||
inputs = self.conv_t(inputs)
|
||||
|
||||
# top-down path
|
||||
inner_outs = [inputs[-1]]
|
||||
for idx in range(len(self.in_channels) - 1, 0, -1):
|
||||
feat_heigh = inner_outs[0]
|
||||
feat_low = inputs[idx - 1]
|
||||
|
||||
upsample_feat = self.upsample(feat_heigh)
|
||||
|
||||
inner_out = self.top_down_blocks[len(self.in_channels) - 1 - idx](
|
||||
paddle.concat([upsample_feat, feat_low], 1))
|
||||
inner_outs.insert(0, inner_out)
|
||||
|
||||
# bottom-up path
|
||||
outs = [inner_outs[0]]
|
||||
for idx in range(len(self.in_channels) - 1):
|
||||
feat_low = outs[-1]
|
||||
feat_height = inner_outs[idx + 1]
|
||||
downsample_feat = self.downsamples[idx](feat_low)
|
||||
out = self.bottom_up_blocks[idx](paddle.concat(
|
||||
[downsample_feat, feat_height], 1))
|
||||
outs.append(out)
|
||||
|
||||
top_features = None
|
||||
if self.num_features == 4:
|
||||
top_features = self.first_top_conv(inputs[-1])
|
||||
top_features = top_features + self.second_top_conv(outs[-1])
|
||||
outs.append(top_features)
|
||||
|
||||
return tuple(outs)
|
||||
|
||||
@property
|
||||
def out_shape(self):
|
||||
return [
|
||||
ShapeSpec(
|
||||
channels=self.out_channels, stride=1. / s)
|
||||
for s in self.spatial_scales
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, input_shape):
|
||||
return {'in_channels': [i.channels for i in input_shape], }
|
||||
398
paddle_detection/ppdet/modeling/necks/custom_pan.py
Normal file
398
paddle_detection/ppdet/modeling/necks/custom_pan.py
Normal file
@@ -0,0 +1,398 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import math
|
||||
import copy
|
||||
import numpy as np
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from ppdet.modeling.layers import DropBlock, MultiHeadAttention
|
||||
from ppdet.modeling.ops import get_act_fn
|
||||
from ..backbones.cspresnet import ConvBNLayer, BasicBlock
|
||||
from ..shape_spec import ShapeSpec
|
||||
from ..initializer import linear_init_
|
||||
|
||||
__all__ = ['CustomCSPPAN']
|
||||
|
||||
|
||||
def _get_clones(module, N):
|
||||
return nn.LayerList([copy.deepcopy(module) for _ in range(N)])
|
||||
|
||||
|
||||
class SPP(nn.Layer):
|
||||
def __init__(self,
|
||||
ch_in,
|
||||
ch_out,
|
||||
k,
|
||||
pool_size,
|
||||
act='swish',
|
||||
data_format='NCHW'):
|
||||
super(SPP, self).__init__()
|
||||
self.pool = []
|
||||
self.data_format = data_format
|
||||
for i, size in enumerate(pool_size):
|
||||
pool = self.add_sublayer(
|
||||
'pool{}'.format(i),
|
||||
nn.MaxPool2D(
|
||||
kernel_size=size,
|
||||
stride=1,
|
||||
padding=size // 2,
|
||||
data_format=data_format,
|
||||
ceil_mode=False))
|
||||
self.pool.append(pool)
|
||||
self.conv = ConvBNLayer(ch_in, ch_out, k, padding=k // 2, act=act)
|
||||
|
||||
def forward(self, x):
|
||||
outs = [x]
|
||||
for pool in self.pool:
|
||||
outs.append(pool(x))
|
||||
if self.data_format == 'NCHW':
|
||||
y = paddle.concat(outs, axis=1)
|
||||
else:
|
||||
y = paddle.concat(outs, axis=-1)
|
||||
|
||||
y = self.conv(y)
|
||||
return y
|
||||
|
||||
|
||||
class CSPStage(nn.Layer):
|
||||
def __init__(self,
|
||||
block_fn,
|
||||
ch_in,
|
||||
ch_out,
|
||||
n,
|
||||
act='swish',
|
||||
spp=False,
|
||||
use_alpha=False):
|
||||
super(CSPStage, self).__init__()
|
||||
|
||||
ch_mid = int(ch_out // 2)
|
||||
self.conv1 = ConvBNLayer(ch_in, ch_mid, 1, act=act)
|
||||
self.conv2 = ConvBNLayer(ch_in, ch_mid, 1, act=act)
|
||||
self.convs = nn.Sequential()
|
||||
next_ch_in = ch_mid
|
||||
for i in range(n):
|
||||
self.convs.add_sublayer(
|
||||
str(i),
|
||||
eval(block_fn)(next_ch_in,
|
||||
ch_mid,
|
||||
act=act,
|
||||
shortcut=False,
|
||||
use_alpha=use_alpha))
|
||||
if i == (n - 1) // 2 and spp:
|
||||
self.convs.add_sublayer(
|
||||
'spp', SPP(ch_mid * 4, ch_mid, 1, [5, 9, 13], act=act))
|
||||
next_ch_in = ch_mid
|
||||
self.conv3 = ConvBNLayer(ch_mid * 2, ch_out, 1, act=act)
|
||||
|
||||
def forward(self, x):
|
||||
y1 = self.conv1(x)
|
||||
y2 = self.conv2(x)
|
||||
y2 = self.convs(y2)
|
||||
y = paddle.concat([y1, y2], axis=1)
|
||||
y = self.conv3(y)
|
||||
return y
|
||||
|
||||
|
||||
class TransformerEncoderLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
d_model,
|
||||
nhead,
|
||||
dim_feedforward=2048,
|
||||
dropout=0.1,
|
||||
activation="relu",
|
||||
attn_dropout=None,
|
||||
act_dropout=None,
|
||||
normalize_before=False):
|
||||
super(TransformerEncoderLayer, self).__init__()
|
||||
attn_dropout = dropout if attn_dropout is None else attn_dropout
|
||||
act_dropout = dropout if act_dropout is None else act_dropout
|
||||
self.normalize_before = normalize_before
|
||||
|
||||
self.self_attn = MultiHeadAttention(d_model, nhead, attn_dropout)
|
||||
# Implementation of Feedforward model
|
||||
self.linear1 = nn.Linear(d_model, dim_feedforward)
|
||||
self.dropout = nn.Dropout(act_dropout, mode="upscale_in_train")
|
||||
self.linear2 = nn.Linear(dim_feedforward, d_model)
|
||||
|
||||
self.norm1 = nn.LayerNorm(d_model)
|
||||
self.norm2 = nn.LayerNorm(d_model)
|
||||
self.dropout1 = nn.Dropout(dropout, mode="upscale_in_train")
|
||||
self.dropout2 = nn.Dropout(dropout, mode="upscale_in_train")
|
||||
self.activation = getattr(F, activation)
|
||||
self._reset_parameters()
|
||||
|
||||
def _reset_parameters(self):
|
||||
linear_init_(self.linear1)
|
||||
linear_init_(self.linear2)
|
||||
|
||||
@staticmethod
|
||||
def with_pos_embed(tensor, pos_embed):
|
||||
return tensor if pos_embed is None else tensor + pos_embed
|
||||
|
||||
def forward(self, src, src_mask=None, pos_embed=None):
|
||||
residual = src
|
||||
if self.normalize_before:
|
||||
src = self.norm1(src)
|
||||
q = k = self.with_pos_embed(src, pos_embed)
|
||||
src = self.self_attn(q, k, value=src, attn_mask=src_mask)
|
||||
|
||||
src = residual + self.dropout1(src)
|
||||
if not self.normalize_before:
|
||||
src = self.norm1(src)
|
||||
|
||||
residual = src
|
||||
if self.normalize_before:
|
||||
src = self.norm2(src)
|
||||
src = self.linear2(self.dropout(self.activation(self.linear1(src))))
|
||||
src = residual + self.dropout2(src)
|
||||
if not self.normalize_before:
|
||||
src = self.norm2(src)
|
||||
return src
|
||||
|
||||
|
||||
class TransformerEncoder(nn.Layer):
|
||||
def __init__(self, encoder_layer, num_layers, norm=None):
|
||||
super(TransformerEncoder, self).__init__()
|
||||
self.layers = _get_clones(encoder_layer, num_layers)
|
||||
self.num_layers = num_layers
|
||||
self.norm = norm
|
||||
|
||||
def forward(self, src, src_mask=None, pos_embed=None):
|
||||
output = src
|
||||
for layer in self.layers:
|
||||
output = layer(output, src_mask=src_mask, pos_embed=pos_embed)
|
||||
|
||||
if self.norm is not None:
|
||||
output = self.norm(output)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class CustomCSPPAN(nn.Layer):
|
||||
__shared__ = [
|
||||
'norm_type', 'data_format', 'width_mult', 'depth_mult', 'trt',
|
||||
'eval_size'
|
||||
]
|
||||
|
||||
def __init__(self,
|
||||
in_channels=[256, 512, 1024],
|
||||
out_channels=[1024, 512, 256],
|
||||
norm_type='bn',
|
||||
act='leaky',
|
||||
stage_fn='CSPStage',
|
||||
block_fn='BasicBlock',
|
||||
stage_num=1,
|
||||
block_num=3,
|
||||
drop_block=False,
|
||||
block_size=3,
|
||||
keep_prob=0.9,
|
||||
spp=False,
|
||||
data_format='NCHW',
|
||||
width_mult=1.0,
|
||||
depth_mult=1.0,
|
||||
use_alpha=False,
|
||||
trt=False,
|
||||
dim_feedforward=2048,
|
||||
dropout=0.1,
|
||||
activation='gelu',
|
||||
nhead=4,
|
||||
num_layers=4,
|
||||
attn_dropout=None,
|
||||
act_dropout=None,
|
||||
normalize_before=False,
|
||||
use_trans=False,
|
||||
eval_size=None):
|
||||
|
||||
super(CustomCSPPAN, self).__init__()
|
||||
out_channels = [max(round(c * width_mult), 1) for c in out_channels]
|
||||
block_num = max(round(block_num * depth_mult), 1)
|
||||
act = get_act_fn(
|
||||
act, trt=trt) if act is None or isinstance(act,
|
||||
(str, dict)) else act
|
||||
self.num_blocks = len(in_channels)
|
||||
self.data_format = data_format
|
||||
self._out_channels = out_channels
|
||||
|
||||
self.hidden_dim = in_channels[-1]
|
||||
in_channels = in_channels[::-1]
|
||||
|
||||
self.use_trans = use_trans
|
||||
self.eval_size = eval_size
|
||||
if use_trans:
|
||||
if eval_size is not None:
|
||||
self.pos_embed = self.build_2d_sincos_position_embedding(
|
||||
eval_size[1] // 32,
|
||||
eval_size[0] // 32,
|
||||
embed_dim=self.hidden_dim)
|
||||
else:
|
||||
self.pos_embed = None
|
||||
|
||||
encoder_layer = TransformerEncoderLayer(
|
||||
self.hidden_dim, nhead, dim_feedforward, dropout, activation,
|
||||
attn_dropout, act_dropout, normalize_before)
|
||||
encoder_norm = nn.LayerNorm(
|
||||
self.hidden_dim) if normalize_before else None
|
||||
self.encoder = TransformerEncoder(encoder_layer, num_layers,
|
||||
encoder_norm)
|
||||
|
||||
fpn_stages = []
|
||||
fpn_routes = []
|
||||
for i, (ch_in, ch_out) in enumerate(zip(in_channels, out_channels)):
|
||||
if i > 0:
|
||||
ch_in += ch_pre // 2
|
||||
|
||||
stage = nn.Sequential()
|
||||
for j in range(stage_num):
|
||||
stage.add_sublayer(
|
||||
str(j),
|
||||
eval(stage_fn)(block_fn,
|
||||
ch_in if j == 0 else ch_out,
|
||||
ch_out,
|
||||
block_num,
|
||||
act=act,
|
||||
spp=(spp and i == 0),
|
||||
use_alpha=use_alpha))
|
||||
|
||||
if drop_block:
|
||||
stage.add_sublayer('drop', DropBlock(block_size, keep_prob))
|
||||
|
||||
fpn_stages.append(stage)
|
||||
|
||||
if i < self.num_blocks - 1:
|
||||
fpn_routes.append(
|
||||
ConvBNLayer(
|
||||
ch_in=ch_out,
|
||||
ch_out=ch_out // 2,
|
||||
filter_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
act=act))
|
||||
|
||||
ch_pre = ch_out
|
||||
|
||||
self.fpn_stages = nn.LayerList(fpn_stages)
|
||||
self.fpn_routes = nn.LayerList(fpn_routes)
|
||||
|
||||
pan_stages = []
|
||||
pan_routes = []
|
||||
for i in reversed(range(self.num_blocks - 1)):
|
||||
pan_routes.append(
|
||||
ConvBNLayer(
|
||||
ch_in=out_channels[i + 1],
|
||||
ch_out=out_channels[i + 1],
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
padding=1,
|
||||
act=act))
|
||||
|
||||
ch_in = out_channels[i] + out_channels[i + 1]
|
||||
ch_out = out_channels[i]
|
||||
stage = nn.Sequential()
|
||||
for j in range(stage_num):
|
||||
stage.add_sublayer(
|
||||
str(j),
|
||||
eval(stage_fn)(block_fn,
|
||||
ch_in if j == 0 else ch_out,
|
||||
ch_out,
|
||||
block_num,
|
||||
act=act,
|
||||
spp=False,
|
||||
use_alpha=use_alpha))
|
||||
if drop_block:
|
||||
stage.add_sublayer('drop', DropBlock(block_size, keep_prob))
|
||||
|
||||
pan_stages.append(stage)
|
||||
|
||||
self.pan_stages = nn.LayerList(pan_stages[::-1])
|
||||
self.pan_routes = nn.LayerList(pan_routes[::-1])
|
||||
|
||||
def build_2d_sincos_position_embedding(
|
||||
self,
|
||||
w,
|
||||
h,
|
||||
embed_dim=1024,
|
||||
temperature=10000., ):
|
||||
grid_w = paddle.arange(int(w), dtype=paddle.float32)
|
||||
grid_h = paddle.arange(int(h), dtype=paddle.float32)
|
||||
grid_w, grid_h = paddle.meshgrid(grid_w, grid_h)
|
||||
assert embed_dim % 4 == 0, 'Embed dimension must be divisible by 4 for 2D sin-cos position embedding'
|
||||
pos_dim = embed_dim // 4
|
||||
omega = paddle.arange(pos_dim, dtype=paddle.float32) / pos_dim
|
||||
omega = 1. / (temperature**omega)
|
||||
|
||||
out_w = grid_w.flatten()[..., None] @omega[None]
|
||||
out_h = grid_h.flatten()[..., None] @omega[None]
|
||||
|
||||
pos_emb = paddle.concat(
|
||||
[
|
||||
paddle.sin(out_w), paddle.cos(out_w), paddle.sin(out_h),
|
||||
paddle.cos(out_h)
|
||||
],
|
||||
axis=1)[None, :, :]
|
||||
|
||||
return pos_emb
|
||||
|
||||
def forward(self, blocks, for_mot=False):
|
||||
if self.use_trans:
|
||||
last_feat = blocks[-1]
|
||||
n, c, h, w = last_feat.shape
|
||||
|
||||
# flatten [B, C, H, W] to [B, HxW, C]
|
||||
src_flatten = last_feat.flatten(2).transpose([0, 2, 1])
|
||||
if self.eval_size is not None and not self.training:
|
||||
pos_embed = self.pos_embed
|
||||
else:
|
||||
pos_embed = self.build_2d_sincos_position_embedding(
|
||||
w=w, h=h, embed_dim=self.hidden_dim)
|
||||
|
||||
memory = self.encoder(src_flatten, pos_embed=pos_embed)
|
||||
last_feat_encode = memory.transpose([0, 2, 1]).reshape([n, c, h, w])
|
||||
blocks[-1] = last_feat_encode
|
||||
|
||||
blocks = blocks[::-1]
|
||||
fpn_feats = []
|
||||
|
||||
for i, block in enumerate(blocks):
|
||||
if i > 0:
|
||||
block = paddle.concat([route, block], axis=1)
|
||||
route = self.fpn_stages[i](block)
|
||||
fpn_feats.append(route)
|
||||
|
||||
if i < self.num_blocks - 1:
|
||||
route = self.fpn_routes[i](route)
|
||||
route = F.interpolate(
|
||||
route, scale_factor=2., data_format=self.data_format)
|
||||
|
||||
pan_feats = [fpn_feats[-1], ]
|
||||
route = fpn_feats[-1]
|
||||
for i in reversed(range(self.num_blocks - 1)):
|
||||
block = fpn_feats[i]
|
||||
route = self.pan_routes[i](route)
|
||||
block = paddle.concat([route, block], axis=1)
|
||||
route = self.pan_stages[i](block)
|
||||
pan_feats.append(route)
|
||||
|
||||
return pan_feats[::-1]
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, input_shape):
|
||||
return {'in_channels': [i.channels for i in input_shape], }
|
||||
|
||||
@property
|
||||
def out_shape(self):
|
||||
return [ShapeSpec(channels=c) for c in self._out_channels]
|
||||
150
paddle_detection/ppdet/modeling/necks/dilated_encoder.py
Normal file
150
paddle_detection/ppdet/modeling/necks/dilated_encoder.py
Normal file
@@ -0,0 +1,150 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
from paddle import ParamAttr
|
||||
from paddle.regularizer import L2Decay
|
||||
from paddle.nn.initializer import KaimingUniform, Constant, Normal
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from ..shape_spec import ShapeSpec
|
||||
|
||||
__all__ = ['DilatedEncoder']
|
||||
|
||||
|
||||
class Bottleneck(nn.Layer):
|
||||
def __init__(self, in_channels, mid_channels, dilation):
|
||||
super(Bottleneck, self).__init__()
|
||||
self.conv1 = nn.Sequential(* [
|
||||
nn.Conv2D(
|
||||
in_channels,
|
||||
mid_channels,
|
||||
1,
|
||||
padding=0,
|
||||
weight_attr=ParamAttr(initializer=Normal(
|
||||
mean=0, std=0.01)),
|
||||
bias_attr=ParamAttr(initializer=Constant(0.0))),
|
||||
nn.BatchNorm2D(
|
||||
mid_channels,
|
||||
weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
|
||||
bias_attr=ParamAttr(regularizer=L2Decay(0.0))),
|
||||
nn.ReLU(),
|
||||
])
|
||||
self.conv2 = nn.Sequential(* [
|
||||
nn.Conv2D(
|
||||
mid_channels,
|
||||
mid_channels,
|
||||
3,
|
||||
padding=dilation,
|
||||
dilation=dilation,
|
||||
weight_attr=ParamAttr(initializer=Normal(
|
||||
mean=0, std=0.01)),
|
||||
bias_attr=ParamAttr(initializer=Constant(0.0))),
|
||||
nn.BatchNorm2D(
|
||||
mid_channels,
|
||||
weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
|
||||
bias_attr=ParamAttr(regularizer=L2Decay(0.0))),
|
||||
nn.ReLU(),
|
||||
])
|
||||
self.conv3 = nn.Sequential(* [
|
||||
nn.Conv2D(
|
||||
mid_channels,
|
||||
in_channels,
|
||||
1,
|
||||
padding=0,
|
||||
weight_attr=ParamAttr(initializer=Normal(
|
||||
mean=0, std=0.01)),
|
||||
bias_attr=ParamAttr(initializer=Constant(0.0))),
|
||||
nn.BatchNorm2D(
|
||||
in_channels,
|
||||
weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
|
||||
bias_attr=ParamAttr(regularizer=L2Decay(0.0))),
|
||||
nn.ReLU(),
|
||||
])
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
y = self.conv3(self.conv2(self.conv1(x)))
|
||||
return y + identity
|
||||
|
||||
|
||||
@register
|
||||
class DilatedEncoder(nn.Layer):
|
||||
"""
|
||||
DilatedEncoder used in YOLOF
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels=[2048],
|
||||
out_channels=[512],
|
||||
block_mid_channels=128,
|
||||
num_residual_blocks=4,
|
||||
block_dilations=[2, 4, 6, 8]):
|
||||
super(DilatedEncoder, self).__init__()
|
||||
self.in_channels = in_channels
|
||||
self.out_channels = out_channels
|
||||
assert len(self.in_channels) == 1, "YOLOF only has one level feature."
|
||||
assert len(self.out_channels) == 1, "YOLOF only has one level feature."
|
||||
|
||||
self.block_mid_channels = block_mid_channels
|
||||
self.num_residual_blocks = num_residual_blocks
|
||||
self.block_dilations = block_dilations
|
||||
|
||||
out_ch = self.out_channels[0]
|
||||
self.lateral_conv = nn.Conv2D(
|
||||
self.in_channels[0],
|
||||
out_ch,
|
||||
1,
|
||||
weight_attr=ParamAttr(initializer=KaimingUniform(
|
||||
negative_slope=1, nonlinearity='leaky_relu')),
|
||||
bias_attr=ParamAttr(initializer=Constant(value=0.0)))
|
||||
self.lateral_norm = nn.BatchNorm2D(
|
||||
out_ch,
|
||||
weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
|
||||
bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
|
||||
|
||||
self.fpn_conv = nn.Conv2D(
|
||||
out_ch,
|
||||
out_ch,
|
||||
3,
|
||||
padding=1,
|
||||
weight_attr=ParamAttr(initializer=KaimingUniform(
|
||||
negative_slope=1, nonlinearity='leaky_relu')))
|
||||
self.fpn_norm = nn.BatchNorm2D(
|
||||
out_ch,
|
||||
weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
|
||||
bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
|
||||
|
||||
encoder_blocks = []
|
||||
for i in range(self.num_residual_blocks):
|
||||
encoder_blocks.append(
|
||||
Bottleneck(
|
||||
out_ch,
|
||||
self.block_mid_channels,
|
||||
dilation=block_dilations[i]))
|
||||
self.dilated_encoder_blocks = nn.Sequential(*encoder_blocks)
|
||||
|
||||
def forward(self, inputs, for_mot=False):
|
||||
out = self.lateral_norm(self.lateral_conv(inputs[0]))
|
||||
out = self.fpn_norm(self.fpn_conv(out))
|
||||
out = self.dilated_encoder_blocks(out)
|
||||
return [out]
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, input_shape):
|
||||
return {'in_channels': [i.channels for i in input_shape], }
|
||||
|
||||
@property
|
||||
def out_shape(self):
|
||||
return [ShapeSpec(channels=c) for c in self.out_channels]
|
||||
212
paddle_detection/ppdet/modeling/necks/es_pan.py
Normal file
212
paddle_detection/ppdet/modeling/necks/es_pan.py
Normal file
@@ -0,0 +1,212 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle import ParamAttr
|
||||
from paddle.regularizer import L2Decay
|
||||
from ppdet.core.workspace import register, serializable
|
||||
|
||||
from ..shape_spec import ShapeSpec
|
||||
from ..backbones.esnet import SEModule
|
||||
from .csp_pan import ConvBNLayer, Channel_T, DPModule
|
||||
|
||||
__all__ = ['ESPAN']
|
||||
|
||||
|
||||
class ES_Block(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
mid_channels,
|
||||
out_channels,
|
||||
kernel_size=5,
|
||||
stride=1,
|
||||
act='leaky_relu'):
|
||||
super(ES_Block, self).__init__()
|
||||
self._residual = ConvBNLayer(
|
||||
in_channel=in_channels,
|
||||
out_channel=out_channels,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act=act)
|
||||
self._conv_pw = ConvBNLayer(
|
||||
in_channel=in_channels,
|
||||
out_channel=mid_channels // 2,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act=act)
|
||||
self._conv_dw = ConvBNLayer(
|
||||
in_channel=mid_channels // 2,
|
||||
out_channel=mid_channels // 2,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
groups=mid_channels // 2,
|
||||
act=None)
|
||||
self._se = SEModule(mid_channels)
|
||||
|
||||
self._conv_linear = ConvBNLayer(
|
||||
in_channel=mid_channels,
|
||||
out_channel=out_channels,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act=act)
|
||||
|
||||
self._out_conv = ConvBNLayer(
|
||||
in_channel=out_channels * 2,
|
||||
out_channel=out_channels,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
groups=1,
|
||||
act=act)
|
||||
|
||||
def forward(self, inputs):
|
||||
x1 = self._residual(inputs)
|
||||
x2 = self._conv_pw(inputs)
|
||||
x3 = self._conv_dw(x2)
|
||||
x3 = paddle.concat([x2, x3], axis=1)
|
||||
x3 = self._se(x3)
|
||||
x3 = self._conv_linear(x3)
|
||||
out = paddle.concat([x1, x3], axis=1)
|
||||
out = self._out_conv(out)
|
||||
return out
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class ESPAN(nn.Layer):
|
||||
"""Path Aggregation Network with ES module.
|
||||
|
||||
Args:
|
||||
in_channels (List[int]): Number of input channels per scale.
|
||||
out_channels (int): Number of output channels (used at each scale)
|
||||
kernel_size (int): The conv2d kernel size of this Module.
|
||||
num_features (int): Number of output features of CSPPAN module.
|
||||
num_csp_blocks (int): Number of bottlenecks in CSPLayer. Default: 1
|
||||
use_depthwise (bool): Whether to depthwise separable convolution in
|
||||
blocks. Default: True
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size=5,
|
||||
num_features=3,
|
||||
use_depthwise=True,
|
||||
act='hard_swish',
|
||||
spatial_scales=[0.125, 0.0625, 0.03125]):
|
||||
super(ESPAN, self).__init__()
|
||||
self.conv_t = Channel_T(in_channels, out_channels, act=act)
|
||||
in_channels = [out_channels] * len(spatial_scales)
|
||||
self.in_channels = in_channels
|
||||
self.out_channels = out_channels
|
||||
self.spatial_scales = spatial_scales
|
||||
self.num_features = num_features
|
||||
conv_func = DPModule if use_depthwise else ConvBNLayer
|
||||
|
||||
if self.num_features == 4:
|
||||
self.first_top_conv = conv_func(
|
||||
in_channels[0], in_channels[0], kernel_size, stride=2, act=act)
|
||||
self.second_top_conv = conv_func(
|
||||
in_channels[0], in_channels[0], kernel_size, stride=2, act=act)
|
||||
self.spatial_scales.append(self.spatial_scales[-1] / 2)
|
||||
|
||||
# build top-down blocks
|
||||
self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
|
||||
self.top_down_blocks = nn.LayerList()
|
||||
for idx in range(len(in_channels) - 1, 0, -1):
|
||||
self.top_down_blocks.append(
|
||||
ES_Block(
|
||||
in_channels[idx - 1] * 2,
|
||||
in_channels[idx - 1],
|
||||
in_channels[idx - 1],
|
||||
kernel_size=kernel_size,
|
||||
stride=1,
|
||||
act=act))
|
||||
|
||||
# build bottom-up blocks
|
||||
self.downsamples = nn.LayerList()
|
||||
self.bottom_up_blocks = nn.LayerList()
|
||||
for idx in range(len(in_channels) - 1):
|
||||
self.downsamples.append(
|
||||
conv_func(
|
||||
in_channels[idx],
|
||||
in_channels[idx],
|
||||
kernel_size=kernel_size,
|
||||
stride=2,
|
||||
act=act))
|
||||
self.bottom_up_blocks.append(
|
||||
ES_Block(
|
||||
in_channels[idx] * 2,
|
||||
in_channels[idx + 1],
|
||||
in_channels[idx + 1],
|
||||
kernel_size=kernel_size,
|
||||
stride=1,
|
||||
act=act))
|
||||
|
||||
def forward(self, inputs):
|
||||
"""
|
||||
Args:
|
||||
inputs (tuple[Tensor]): input features.
|
||||
|
||||
Returns:
|
||||
tuple[Tensor]: CSPPAN features.
|
||||
"""
|
||||
assert len(inputs) == len(self.in_channels)
|
||||
inputs = self.conv_t(inputs)
|
||||
|
||||
# top-down path
|
||||
inner_outs = [inputs[-1]]
|
||||
for idx in range(len(self.in_channels) - 1, 0, -1):
|
||||
feat_heigh = inner_outs[0]
|
||||
feat_low = inputs[idx - 1]
|
||||
|
||||
upsample_feat = self.upsample(feat_heigh)
|
||||
|
||||
inner_out = self.top_down_blocks[len(self.in_channels) - 1 - idx](
|
||||
paddle.concat([upsample_feat, feat_low], 1))
|
||||
inner_outs.insert(0, inner_out)
|
||||
|
||||
# bottom-up path
|
||||
outs = [inner_outs[0]]
|
||||
for idx in range(len(self.in_channels) - 1):
|
||||
feat_low = outs[-1]
|
||||
feat_height = inner_outs[idx + 1]
|
||||
downsample_feat = self.downsamples[idx](feat_low)
|
||||
out = self.bottom_up_blocks[idx](paddle.concat(
|
||||
[downsample_feat, feat_height], 1))
|
||||
outs.append(out)
|
||||
|
||||
top_features = None
|
||||
if self.num_features == 4:
|
||||
top_features = self.first_top_conv(inputs[-1])
|
||||
top_features = top_features + self.second_top_conv(outs[-1])
|
||||
outs.append(top_features)
|
||||
|
||||
return tuple(outs)
|
||||
|
||||
@property
|
||||
def out_shape(self):
|
||||
return [
|
||||
ShapeSpec(
|
||||
channels=self.out_channels, stride=1. / s)
|
||||
for s in self.spatial_scales
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, input_shape):
|
||||
return {'in_channels': [i.channels for i in input_shape], }
|
||||
231
paddle_detection/ppdet/modeling/necks/fpn.py
Normal file
231
paddle_detection/ppdet/modeling/necks/fpn.py
Normal file
@@ -0,0 +1,231 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle import ParamAttr
|
||||
from paddle.nn.initializer import XavierUniform
|
||||
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from ppdet.modeling.layers import ConvNormLayer
|
||||
from ..shape_spec import ShapeSpec
|
||||
|
||||
__all__ = ['FPN']
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class FPN(nn.Layer):
|
||||
"""
|
||||
Feature Pyramid Network, see https://arxiv.org/abs/1612.03144
|
||||
|
||||
Args:
|
||||
in_channels (list[int]): input channels of each level which can be
|
||||
derived from the output shape of backbone by from_config
|
||||
out_channel (int): output channel of each level
|
||||
spatial_scales (list[float]): the spatial scales between input feature
|
||||
maps and original input image which can be derived from the output
|
||||
shape of backbone by from_config
|
||||
has_extra_convs (bool): whether to add extra conv to the last level.
|
||||
default False
|
||||
extra_stage (int): the number of extra stages added to the last level.
|
||||
default 1
|
||||
use_c5 (bool): Whether to use c5 as the input of extra stage,
|
||||
otherwise p5 is used. default True
|
||||
norm_type (string|None): The normalization type in FPN module. If
|
||||
norm_type is None, norm will not be used after conv and if
|
||||
norm_type is string, bn, gn, sync_bn are available. default None
|
||||
norm_decay (float): weight decay for normalization layer weights.
|
||||
default 0.
|
||||
freeze_norm (bool): whether to freeze normalization layer.
|
||||
default False
|
||||
relu_before_extra_convs (bool): whether to add relu before extra convs.
|
||||
default False
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channel,
|
||||
spatial_scales=[0.25, 0.125, 0.0625, 0.03125],
|
||||
has_extra_convs=False,
|
||||
extra_stage=1,
|
||||
use_c5=True,
|
||||
norm_type=None,
|
||||
norm_decay=0.,
|
||||
freeze_norm=False,
|
||||
relu_before_extra_convs=True):
|
||||
super(FPN, self).__init__()
|
||||
self.out_channel = out_channel
|
||||
for s in range(extra_stage):
|
||||
spatial_scales = spatial_scales + [spatial_scales[-1] / 2.]
|
||||
self.spatial_scales = spatial_scales
|
||||
self.has_extra_convs = has_extra_convs
|
||||
self.extra_stage = extra_stage
|
||||
self.use_c5 = use_c5
|
||||
self.relu_before_extra_convs = relu_before_extra_convs
|
||||
self.norm_type = norm_type
|
||||
self.norm_decay = norm_decay
|
||||
self.freeze_norm = freeze_norm
|
||||
|
||||
self.lateral_convs = []
|
||||
self.fpn_convs = []
|
||||
fan = out_channel * 3 * 3
|
||||
|
||||
# stage index 0,1,2,3 stands for res2,res3,res4,res5 on ResNet Backbone
|
||||
# 0 <= st_stage < ed_stage <= 3
|
||||
st_stage = 4 - len(in_channels)
|
||||
ed_stage = st_stage + len(in_channels) - 1
|
||||
for i in range(st_stage, ed_stage + 1):
|
||||
if i == 3:
|
||||
lateral_name = 'fpn_inner_res5_sum'
|
||||
else:
|
||||
lateral_name = 'fpn_inner_res{}_sum_lateral'.format(i + 2)
|
||||
in_c = in_channels[i - st_stage]
|
||||
if self.norm_type is not None:
|
||||
lateral = self.add_sublayer(
|
||||
lateral_name,
|
||||
ConvNormLayer(
|
||||
ch_in=in_c,
|
||||
ch_out=out_channel,
|
||||
filter_size=1,
|
||||
stride=1,
|
||||
norm_type=self.norm_type,
|
||||
norm_decay=self.norm_decay,
|
||||
freeze_norm=self.freeze_norm,
|
||||
initializer=XavierUniform(fan_out=in_c)))
|
||||
else:
|
||||
lateral = self.add_sublayer(
|
||||
lateral_name,
|
||||
nn.Conv2D(
|
||||
in_channels=in_c,
|
||||
out_channels=out_channel,
|
||||
kernel_size=1,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=XavierUniform(fan_out=in_c))))
|
||||
self.lateral_convs.append(lateral)
|
||||
|
||||
fpn_name = 'fpn_res{}_sum'.format(i + 2)
|
||||
if self.norm_type is not None:
|
||||
fpn_conv = self.add_sublayer(
|
||||
fpn_name,
|
||||
ConvNormLayer(
|
||||
ch_in=out_channel,
|
||||
ch_out=out_channel,
|
||||
filter_size=3,
|
||||
stride=1,
|
||||
norm_type=self.norm_type,
|
||||
norm_decay=self.norm_decay,
|
||||
freeze_norm=self.freeze_norm,
|
||||
initializer=XavierUniform(fan_out=fan)))
|
||||
else:
|
||||
fpn_conv = self.add_sublayer(
|
||||
fpn_name,
|
||||
nn.Conv2D(
|
||||
in_channels=out_channel,
|
||||
out_channels=out_channel,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=XavierUniform(fan_out=fan))))
|
||||
self.fpn_convs.append(fpn_conv)
|
||||
|
||||
# add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
|
||||
if self.has_extra_convs:
|
||||
for i in range(self.extra_stage):
|
||||
lvl = ed_stage + 1 + i
|
||||
if i == 0 and self.use_c5:
|
||||
in_c = in_channels[-1]
|
||||
else:
|
||||
in_c = out_channel
|
||||
extra_fpn_name = 'fpn_{}'.format(lvl + 2)
|
||||
if self.norm_type is not None:
|
||||
extra_fpn_conv = self.add_sublayer(
|
||||
extra_fpn_name,
|
||||
ConvNormLayer(
|
||||
ch_in=in_c,
|
||||
ch_out=out_channel,
|
||||
filter_size=3,
|
||||
stride=2,
|
||||
norm_type=self.norm_type,
|
||||
norm_decay=self.norm_decay,
|
||||
freeze_norm=self.freeze_norm,
|
||||
initializer=XavierUniform(fan_out=fan)))
|
||||
else:
|
||||
extra_fpn_conv = self.add_sublayer(
|
||||
extra_fpn_name,
|
||||
nn.Conv2D(
|
||||
in_channels=in_c,
|
||||
out_channels=out_channel,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
padding=1,
|
||||
weight_attr=ParamAttr(
|
||||
initializer=XavierUniform(fan_out=fan))))
|
||||
self.fpn_convs.append(extra_fpn_conv)
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, input_shape):
|
||||
return {
|
||||
'in_channels': [i.channels for i in input_shape],
|
||||
'spatial_scales': [1.0 / i.stride for i in input_shape],
|
||||
}
|
||||
|
||||
def forward(self, body_feats):
|
||||
laterals = []
|
||||
num_levels = len(body_feats)
|
||||
for i in range(num_levels):
|
||||
laterals.append(self.lateral_convs[i](body_feats[i]))
|
||||
|
||||
for i in range(1, num_levels):
|
||||
lvl = num_levels - i
|
||||
upsample = F.interpolate(
|
||||
laterals[lvl],
|
||||
scale_factor=2.,
|
||||
mode='nearest', )
|
||||
laterals[lvl - 1] += upsample
|
||||
|
||||
fpn_output = []
|
||||
for lvl in range(num_levels):
|
||||
fpn_output.append(self.fpn_convs[lvl](laterals[lvl]))
|
||||
|
||||
if self.extra_stage > 0:
|
||||
# use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN)
|
||||
if not self.has_extra_convs:
|
||||
assert self.extra_stage == 1, 'extra_stage should be 1 if FPN has not extra convs'
|
||||
fpn_output.append(F.max_pool2d(fpn_output[-1], 1, stride=2))
|
||||
# add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
|
||||
else:
|
||||
if self.use_c5:
|
||||
extra_source = body_feats[-1]
|
||||
else:
|
||||
extra_source = fpn_output[-1]
|
||||
fpn_output.append(self.fpn_convs[num_levels](extra_source))
|
||||
|
||||
for i in range(1, self.extra_stage):
|
||||
if self.relu_before_extra_convs:
|
||||
fpn_output.append(self.fpn_convs[num_levels + i](F.relu(
|
||||
fpn_output[-1])))
|
||||
else:
|
||||
fpn_output.append(self.fpn_convs[num_levels + i](
|
||||
fpn_output[-1]))
|
||||
return fpn_output
|
||||
|
||||
@property
|
||||
def out_shape(self):
|
||||
return [
|
||||
ShapeSpec(
|
||||
channels=self.out_channel, stride=1. / s)
|
||||
for s in self.spatial_scales
|
||||
]
|
||||
129
paddle_detection/ppdet/modeling/necks/hrfpn.py
Normal file
129
paddle_detection/ppdet/modeling/necks/hrfpn.py
Normal file
@@ -0,0 +1,129 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import paddle
|
||||
import paddle.nn.functional as F
|
||||
import paddle.nn as nn
|
||||
from ppdet.core.workspace import register
|
||||
from ..shape_spec import ShapeSpec
|
||||
|
||||
__all__ = ['HRFPN']
|
||||
|
||||
|
||||
@register
|
||||
class HRFPN(nn.Layer):
|
||||
"""
|
||||
Args:
|
||||
in_channels (list): number of input feature channels from backbone
|
||||
out_channel (int): number of output feature channels
|
||||
share_conv (bool): whether to share conv for different layers' reduction
|
||||
extra_stage (int): add extra stage for returning HRFPN fpn_feats
|
||||
spatial_scales (list): feature map scaling factor
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels=[18, 36, 72, 144],
|
||||
out_channel=256,
|
||||
share_conv=False,
|
||||
extra_stage=1,
|
||||
spatial_scales=[1. / 4, 1. / 8, 1. / 16, 1. / 32],
|
||||
use_bias=False):
|
||||
super(HRFPN, self).__init__()
|
||||
in_channel = sum(in_channels)
|
||||
self.in_channel = in_channel
|
||||
self.out_channel = out_channel
|
||||
self.share_conv = share_conv
|
||||
for i in range(extra_stage):
|
||||
spatial_scales = spatial_scales + [spatial_scales[-1] / 2.]
|
||||
self.spatial_scales = spatial_scales
|
||||
self.num_out = len(self.spatial_scales)
|
||||
self.use_bias = use_bias
|
||||
bias_attr = False if use_bias is False else None
|
||||
|
||||
self.reduction = nn.Conv2D(
|
||||
in_channels=in_channel,
|
||||
out_channels=out_channel,
|
||||
kernel_size=1,
|
||||
bias_attr=bias_attr)
|
||||
|
||||
if share_conv:
|
||||
self.fpn_conv = nn.Conv2D(
|
||||
in_channels=out_channel,
|
||||
out_channels=out_channel,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
bias_attr=bias_attr)
|
||||
else:
|
||||
self.fpn_conv = []
|
||||
for i in range(self.num_out):
|
||||
conv_name = "fpn_conv_" + str(i)
|
||||
conv = self.add_sublayer(
|
||||
conv_name,
|
||||
nn.Conv2D(
|
||||
in_channels=out_channel,
|
||||
out_channels=out_channel,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
bias_attr=bias_attr))
|
||||
self.fpn_conv.append(conv)
|
||||
|
||||
def forward(self, body_feats):
|
||||
num_backbone_stages = len(body_feats)
|
||||
|
||||
outs = []
|
||||
outs.append(body_feats[0])
|
||||
|
||||
# resize
|
||||
for i in range(1, num_backbone_stages):
|
||||
resized = F.interpolate(
|
||||
body_feats[i], scale_factor=2**i, mode='bilinear')
|
||||
outs.append(resized)
|
||||
|
||||
# concat
|
||||
out = paddle.concat(outs, axis=1)
|
||||
assert out.shape[
|
||||
1] == self.in_channel, 'in_channel should be {}, be received {}'.format(
|
||||
out.shape[1], self.in_channel)
|
||||
|
||||
# reduction
|
||||
out = self.reduction(out)
|
||||
|
||||
# conv
|
||||
outs = [out]
|
||||
for i in range(1, self.num_out):
|
||||
outs.append(F.avg_pool2d(out, kernel_size=2**i, stride=2**i))
|
||||
outputs = []
|
||||
|
||||
for i in range(self.num_out):
|
||||
conv_func = self.fpn_conv if self.share_conv else self.fpn_conv[i]
|
||||
conv = conv_func(outs[i])
|
||||
outputs.append(conv)
|
||||
|
||||
fpn_feats = [outputs[k] for k in range(self.num_out)]
|
||||
return fpn_feats
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, input_shape):
|
||||
return {
|
||||
'in_channels': [i.channels for i in input_shape],
|
||||
'spatial_scales': [1.0 / i.stride for i in input_shape],
|
||||
}
|
||||
|
||||
@property
|
||||
def out_shape(self):
|
||||
return [
|
||||
ShapeSpec(
|
||||
channels=self.out_channel, stride=1. / s)
|
||||
for s in self.spatial_scales
|
||||
]
|
||||
168
paddle_detection/ppdet/modeling/necks/lc_pan.py
Normal file
168
paddle_detection/ppdet/modeling/necks/lc_pan.py
Normal file
@@ -0,0 +1,168 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle import ParamAttr
|
||||
from paddle.regularizer import L2Decay
|
||||
from ppdet.core.workspace import register, serializable
|
||||
|
||||
from ..shape_spec import ShapeSpec
|
||||
from ..backbones.lcnet import DepthwiseSeparable
|
||||
from .csp_pan import ConvBNLayer, Channel_T, DPModule
|
||||
|
||||
__all__ = ['LCPAN']
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class LCPAN(nn.Layer):
|
||||
"""Path Aggregation Network with LCNet module.
|
||||
Args:
|
||||
in_channels (List[int]): Number of input channels per scale.
|
||||
out_channels (int): Number of output channels (used at each scale)
|
||||
kernel_size (int): The conv2d kernel size of this Module.
|
||||
num_features (int): Number of output features of CSPPAN module.
|
||||
num_csp_blocks (int): Number of bottlenecks in CSPLayer. Default: 1
|
||||
use_depthwise (bool): Whether to depthwise separable convolution in
|
||||
blocks. Default: True
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size=5,
|
||||
num_features=3,
|
||||
use_depthwise=True,
|
||||
act='hard_swish',
|
||||
spatial_scales=[0.125, 0.0625, 0.03125]):
|
||||
super(LCPAN, self).__init__()
|
||||
self.conv_t = Channel_T(in_channels, out_channels, act=act)
|
||||
in_channels = [out_channels] * len(spatial_scales)
|
||||
self.in_channels = in_channels
|
||||
self.out_channels = out_channels
|
||||
self.spatial_scales = spatial_scales
|
||||
self.num_features = num_features
|
||||
conv_func = DPModule if use_depthwise else ConvBNLayer
|
||||
|
||||
NET_CONFIG = {
|
||||
#k, in_c, out_c, stride, use_se
|
||||
"block1": [
|
||||
[kernel_size, out_channels * 2, out_channels * 2, 1, False],
|
||||
[kernel_size, out_channels * 2, out_channels, 1, False],
|
||||
],
|
||||
"block2": [
|
||||
[kernel_size, out_channels * 2, out_channels * 2, 1, False],
|
||||
[kernel_size, out_channels * 2, out_channels, 1, False],
|
||||
]
|
||||
}
|
||||
|
||||
if self.num_features == 4:
|
||||
self.first_top_conv = conv_func(
|
||||
in_channels[0], in_channels[0], kernel_size, stride=2, act=act)
|
||||
self.second_top_conv = conv_func(
|
||||
in_channels[0], in_channels[0], kernel_size, stride=2, act=act)
|
||||
self.spatial_scales.append(self.spatial_scales[-1] / 2)
|
||||
|
||||
# build top-down blocks
|
||||
self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
|
||||
self.top_down_blocks = nn.LayerList()
|
||||
for idx in range(len(in_channels) - 1, 0, -1):
|
||||
self.top_down_blocks.append(
|
||||
nn.Sequential(* [
|
||||
DepthwiseSeparable(
|
||||
num_channels=in_c,
|
||||
num_filters=out_c,
|
||||
dw_size=k,
|
||||
stride=s,
|
||||
use_se=se)
|
||||
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG[
|
||||
"block1"])
|
||||
]))
|
||||
|
||||
# build bottom-up blocks
|
||||
self.downsamples = nn.LayerList()
|
||||
self.bottom_up_blocks = nn.LayerList()
|
||||
for idx in range(len(in_channels) - 1):
|
||||
self.downsamples.append(
|
||||
conv_func(
|
||||
in_channels[idx],
|
||||
in_channels[idx],
|
||||
kernel_size=kernel_size,
|
||||
stride=2,
|
||||
act=act))
|
||||
self.bottom_up_blocks.append(
|
||||
nn.Sequential(* [
|
||||
DepthwiseSeparable(
|
||||
num_channels=in_c,
|
||||
num_filters=out_c,
|
||||
dw_size=k,
|
||||
stride=s,
|
||||
use_se=se)
|
||||
for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG[
|
||||
"block2"])
|
||||
]))
|
||||
|
||||
def forward(self, inputs):
|
||||
"""
|
||||
Args:
|
||||
inputs (tuple[Tensor]): input features.
|
||||
Returns:
|
||||
tuple[Tensor]: CSPPAN features.
|
||||
"""
|
||||
assert len(inputs) == len(self.in_channels)
|
||||
inputs = self.conv_t(inputs)
|
||||
|
||||
# top-down path
|
||||
inner_outs = [inputs[-1]]
|
||||
for idx in range(len(self.in_channels) - 1, 0, -1):
|
||||
feat_heigh = inner_outs[0]
|
||||
feat_low = inputs[idx - 1]
|
||||
|
||||
upsample_feat = self.upsample(feat_heigh)
|
||||
|
||||
inner_out = self.top_down_blocks[len(self.in_channels) - 1 - idx](
|
||||
paddle.concat([upsample_feat, feat_low], 1))
|
||||
inner_outs.insert(0, inner_out)
|
||||
|
||||
# bottom-up path
|
||||
outs = [inner_outs[0]]
|
||||
for idx in range(len(self.in_channels) - 1):
|
||||
feat_low = outs[-1]
|
||||
feat_height = inner_outs[idx + 1]
|
||||
downsample_feat = self.downsamples[idx](feat_low)
|
||||
out = self.bottom_up_blocks[idx](paddle.concat(
|
||||
[downsample_feat, feat_height], 1))
|
||||
outs.append(out)
|
||||
|
||||
top_features = None
|
||||
if self.num_features == 4:
|
||||
top_features = self.first_top_conv(inputs[-1])
|
||||
top_features = top_features + self.second_top_conv(outs[-1])
|
||||
outs.append(top_features)
|
||||
|
||||
return tuple(outs)
|
||||
|
||||
@property
|
||||
def out_shape(self):
|
||||
return [
|
||||
ShapeSpec(
|
||||
channels=self.out_channels, stride=1. / s)
|
||||
for s in self.spatial_scales
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, input_shape):
|
||||
return {'in_channels': [i.channels for i in input_shape], }
|
||||
242
paddle_detection/ppdet/modeling/necks/ttf_fpn.py
Normal file
242
paddle_detection/ppdet/modeling/necks/ttf_fpn.py
Normal file
@@ -0,0 +1,242 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle import ParamAttr
|
||||
from paddle.nn.initializer import Constant, Uniform, Normal, XavierUniform
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from paddle.regularizer import L2Decay
|
||||
from ppdet.modeling.layers import DeformableConvV2, ConvNormLayer, LiteConv
|
||||
import math
|
||||
from ppdet.modeling.ops import batch_norm
|
||||
from ..shape_spec import ShapeSpec
|
||||
|
||||
__all__ = ['TTFFPN']
|
||||
|
||||
|
||||
class Upsample(nn.Layer):
|
||||
def __init__(self, ch_in, ch_out, norm_type='bn'):
|
||||
super(Upsample, self).__init__()
|
||||
fan_in = ch_in * 3 * 3
|
||||
stdv = 1. / math.sqrt(fan_in)
|
||||
self.dcn = DeformableConvV2(
|
||||
ch_in,
|
||||
ch_out,
|
||||
kernel_size=3,
|
||||
weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)),
|
||||
bias_attr=ParamAttr(
|
||||
initializer=Constant(0),
|
||||
regularizer=L2Decay(0.),
|
||||
learning_rate=2.),
|
||||
lr_scale=2.,
|
||||
regularizer=L2Decay(0.))
|
||||
|
||||
self.bn = batch_norm(
|
||||
ch_out, norm_type=norm_type, initializer=Constant(1.))
|
||||
|
||||
def forward(self, feat):
|
||||
dcn = self.dcn(feat)
|
||||
bn = self.bn(dcn)
|
||||
relu = F.relu(bn)
|
||||
out = F.interpolate(relu, scale_factor=2., mode='bilinear')
|
||||
return out
|
||||
|
||||
|
||||
class DeConv(nn.Layer):
|
||||
def __init__(self, ch_in, ch_out, norm_type='bn'):
|
||||
super(DeConv, self).__init__()
|
||||
self.deconv = nn.Sequential()
|
||||
conv1 = ConvNormLayer(
|
||||
ch_in=ch_in,
|
||||
ch_out=ch_out,
|
||||
stride=1,
|
||||
filter_size=1,
|
||||
norm_type=norm_type,
|
||||
initializer=XavierUniform())
|
||||
conv2 = nn.Conv2DTranspose(
|
||||
in_channels=ch_out,
|
||||
out_channels=ch_out,
|
||||
kernel_size=4,
|
||||
padding=1,
|
||||
stride=2,
|
||||
groups=ch_out,
|
||||
weight_attr=ParamAttr(initializer=XavierUniform()),
|
||||
bias_attr=False)
|
||||
bn = batch_norm(ch_out, norm_type=norm_type, norm_decay=0.)
|
||||
conv3 = ConvNormLayer(
|
||||
ch_in=ch_out,
|
||||
ch_out=ch_out,
|
||||
stride=1,
|
||||
filter_size=1,
|
||||
norm_type=norm_type,
|
||||
initializer=XavierUniform())
|
||||
|
||||
self.deconv.add_sublayer('conv1', conv1)
|
||||
self.deconv.add_sublayer('relu6_1', nn.ReLU6())
|
||||
self.deconv.add_sublayer('conv2', conv2)
|
||||
self.deconv.add_sublayer('bn', bn)
|
||||
self.deconv.add_sublayer('relu6_2', nn.ReLU6())
|
||||
self.deconv.add_sublayer('conv3', conv3)
|
||||
self.deconv.add_sublayer('relu6_3', nn.ReLU6())
|
||||
|
||||
def forward(self, inputs):
|
||||
return self.deconv(inputs)
|
||||
|
||||
|
||||
class LiteUpsample(nn.Layer):
|
||||
def __init__(self, ch_in, ch_out, norm_type='bn'):
|
||||
super(LiteUpsample, self).__init__()
|
||||
self.deconv = DeConv(ch_in, ch_out, norm_type=norm_type)
|
||||
self.conv = LiteConv(ch_in, ch_out, norm_type=norm_type)
|
||||
|
||||
def forward(self, inputs):
|
||||
deconv_up = self.deconv(inputs)
|
||||
conv = self.conv(inputs)
|
||||
interp_up = F.interpolate(conv, scale_factor=2., mode='bilinear')
|
||||
return deconv_up + interp_up
|
||||
|
||||
|
||||
class ShortCut(nn.Layer):
|
||||
def __init__(self,
|
||||
layer_num,
|
||||
ch_in,
|
||||
ch_out,
|
||||
norm_type='bn',
|
||||
lite_neck=False,
|
||||
name=None):
|
||||
super(ShortCut, self).__init__()
|
||||
shortcut_conv = nn.Sequential()
|
||||
for i in range(layer_num):
|
||||
fan_out = 3 * 3 * ch_out
|
||||
std = math.sqrt(2. / fan_out)
|
||||
in_channels = ch_in if i == 0 else ch_out
|
||||
shortcut_name = name + '.conv.{}'.format(i)
|
||||
if lite_neck:
|
||||
shortcut_conv.add_sublayer(
|
||||
shortcut_name,
|
||||
LiteConv(
|
||||
in_channels=in_channels,
|
||||
out_channels=ch_out,
|
||||
with_act=i < layer_num - 1,
|
||||
norm_type=norm_type))
|
||||
else:
|
||||
shortcut_conv.add_sublayer(
|
||||
shortcut_name,
|
||||
nn.Conv2D(
|
||||
in_channels=in_channels,
|
||||
out_channels=ch_out,
|
||||
kernel_size=3,
|
||||
padding=1,
|
||||
weight_attr=ParamAttr(initializer=Normal(0, std)),
|
||||
bias_attr=ParamAttr(
|
||||
learning_rate=2., regularizer=L2Decay(0.))))
|
||||
if i < layer_num - 1:
|
||||
shortcut_conv.add_sublayer(shortcut_name + '.act',
|
||||
nn.ReLU())
|
||||
self.shortcut = self.add_sublayer('shortcut', shortcut_conv)
|
||||
|
||||
def forward(self, feat):
|
||||
out = self.shortcut(feat)
|
||||
return out
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class TTFFPN(nn.Layer):
|
||||
"""
|
||||
Args:
|
||||
in_channels (list): number of input feature channels from backbone.
|
||||
[128,256,512,1024] by default, means the channels of DarkNet53
|
||||
backbone return_idx [1,2,3,4].
|
||||
planes (list): the number of output feature channels of FPN.
|
||||
[256, 128, 64] by default
|
||||
shortcut_num (list): the number of convolution layers in each shortcut.
|
||||
[3,2,1] by default, means DarkNet53 backbone return_idx_1 has 3 convs
|
||||
in its shortcut, return_idx_2 has 2 convs and return_idx_3 has 1 conv.
|
||||
norm_type (string): norm type, 'sync_bn', 'bn', 'gn' are optional.
|
||||
bn by default
|
||||
lite_neck (bool): whether to use lite conv in TTFNet FPN,
|
||||
False by default
|
||||
fusion_method (string): the method to fusion upsample and lateral layer.
|
||||
'add' and 'concat' are optional, add by default
|
||||
"""
|
||||
|
||||
__shared__ = ['norm_type']
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
planes=[256, 128, 64],
|
||||
shortcut_num=[3, 2, 1],
|
||||
norm_type='bn',
|
||||
lite_neck=False,
|
||||
fusion_method='add'):
|
||||
super(TTFFPN, self).__init__()
|
||||
self.planes = planes
|
||||
self.shortcut_num = shortcut_num[::-1]
|
||||
self.shortcut_len = len(shortcut_num)
|
||||
self.ch_in = in_channels[::-1]
|
||||
self.fusion_method = fusion_method
|
||||
|
||||
self.upsample_list = []
|
||||
self.shortcut_list = []
|
||||
self.upper_list = []
|
||||
for i, out_c in enumerate(self.planes):
|
||||
in_c = self.ch_in[i] if i == 0 else self.upper_list[-1]
|
||||
upsample_module = LiteUpsample if lite_neck else Upsample
|
||||
upsample = self.add_sublayer(
|
||||
'upsample.' + str(i),
|
||||
upsample_module(
|
||||
in_c, out_c, norm_type=norm_type))
|
||||
self.upsample_list.append(upsample)
|
||||
if i < self.shortcut_len:
|
||||
shortcut = self.add_sublayer(
|
||||
'shortcut.' + str(i),
|
||||
ShortCut(
|
||||
self.shortcut_num[i],
|
||||
self.ch_in[i + 1],
|
||||
out_c,
|
||||
norm_type=norm_type,
|
||||
lite_neck=lite_neck,
|
||||
name='shortcut.' + str(i)))
|
||||
self.shortcut_list.append(shortcut)
|
||||
if self.fusion_method == 'add':
|
||||
upper_c = out_c
|
||||
elif self.fusion_method == 'concat':
|
||||
upper_c = out_c * 2
|
||||
else:
|
||||
raise ValueError('Illegal fusion method. Expected add or\
|
||||
concat, but received {}'.format(self.fusion_method))
|
||||
self.upper_list.append(upper_c)
|
||||
|
||||
def forward(self, inputs):
|
||||
feat = inputs[-1]
|
||||
for i, out_c in enumerate(self.planes):
|
||||
feat = self.upsample_list[i](feat)
|
||||
if i < self.shortcut_len:
|
||||
shortcut = self.shortcut_list[i](inputs[-i - 2])
|
||||
if self.fusion_method == 'add':
|
||||
feat = feat + shortcut
|
||||
else:
|
||||
feat = paddle.concat([feat, shortcut], axis=1)
|
||||
return feat
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg, input_shape):
|
||||
return {'in_channels': [i.channels for i in input_shape], }
|
||||
|
||||
@property
|
||||
def out_shape(self):
|
||||
return [ShapeSpec(channels=self.upper_list[-1], )]
|
||||
1099
paddle_detection/ppdet/modeling/necks/yolo_fpn.py
Normal file
1099
paddle_detection/ppdet/modeling/necks/yolo_fpn.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user