267 lines
9.5 KiB
Python
267 lines
9.5 KiB
Python
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""
|
|
This code is the paddle implementation of MobileOne block, see: https://arxiv.org/pdf/2206.04040.pdf.
|
|
Some codes are based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
|
|
Ths copyright of microsoft/Swin-Transformer is as follows:
|
|
MIT License [see LICENSE for details]
|
|
"""
|
|
|
|
import paddle
|
|
import paddle.nn as nn
|
|
from paddle import ParamAttr
|
|
from paddle.regularizer import L2Decay
|
|
from paddle.nn.initializer import Normal, Constant
|
|
|
|
from ppdet.modeling.ops import get_act_fn
|
|
from ppdet.modeling.layers import ConvNormLayer
|
|
|
|
|
|
class MobileOneBlock(nn.Layer):
|
|
def __init__(
|
|
self,
|
|
ch_in,
|
|
ch_out,
|
|
stride,
|
|
kernel_size,
|
|
conv_num=1,
|
|
norm_type='bn',
|
|
norm_decay=0.,
|
|
norm_groups=32,
|
|
bias_on=False,
|
|
lr_scale=1.,
|
|
freeze_norm=False,
|
|
initializer=Normal(
|
|
mean=0., std=0.01),
|
|
skip_quant=False,
|
|
act='relu', ):
|
|
super(MobileOneBlock, self).__init__()
|
|
|
|
self.ch_in = ch_in
|
|
self.ch_out = ch_out
|
|
self.kernel_size = kernel_size
|
|
self.stride = stride
|
|
self.padding = (kernel_size - 1) // 2
|
|
self.k = conv_num
|
|
|
|
self.depth_conv = nn.LayerList()
|
|
self.point_conv = nn.LayerList()
|
|
for _ in range(self.k):
|
|
self.depth_conv.append(
|
|
ConvNormLayer(
|
|
ch_in,
|
|
ch_in,
|
|
kernel_size,
|
|
stride=stride,
|
|
groups=ch_in,
|
|
norm_type=norm_type,
|
|
norm_decay=norm_decay,
|
|
norm_groups=norm_groups,
|
|
bias_on=bias_on,
|
|
lr_scale=lr_scale,
|
|
freeze_norm=freeze_norm,
|
|
initializer=initializer,
|
|
skip_quant=skip_quant))
|
|
self.point_conv.append(
|
|
ConvNormLayer(
|
|
ch_in,
|
|
ch_out,
|
|
1,
|
|
stride=1,
|
|
groups=1,
|
|
norm_type=norm_type,
|
|
norm_decay=norm_decay,
|
|
norm_groups=norm_groups,
|
|
bias_on=bias_on,
|
|
lr_scale=lr_scale,
|
|
freeze_norm=freeze_norm,
|
|
initializer=initializer,
|
|
skip_quant=skip_quant))
|
|
self.rbr_1x1 = ConvNormLayer(
|
|
ch_in,
|
|
ch_in,
|
|
1,
|
|
stride=self.stride,
|
|
groups=ch_in,
|
|
norm_type=norm_type,
|
|
norm_decay=norm_decay,
|
|
norm_groups=norm_groups,
|
|
bias_on=bias_on,
|
|
lr_scale=lr_scale,
|
|
freeze_norm=freeze_norm,
|
|
initializer=initializer,
|
|
skip_quant=skip_quant)
|
|
self.rbr_identity_st1 = nn.BatchNorm2D(
|
|
num_features=ch_in,
|
|
weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
|
|
bias_attr=ParamAttr(regularizer=L2Decay(
|
|
0.0))) if ch_in == ch_out and self.stride == 1 else None
|
|
self.rbr_identity_st2 = nn.BatchNorm2D(
|
|
num_features=ch_out,
|
|
weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
|
|
bias_attr=ParamAttr(regularizer=L2Decay(
|
|
0.0))) if ch_in == ch_out and self.stride == 1 else None
|
|
self.act = get_act_fn(act) if act is None or isinstance(act, (
|
|
str, dict)) else act
|
|
|
|
def forward(self, x):
|
|
if hasattr(self, "conv1") and hasattr(self, "conv2"):
|
|
y = self.act(self.conv2(self.act(self.conv1(x))))
|
|
else:
|
|
if self.rbr_identity_st1 is None:
|
|
id_out_st1 = 0
|
|
else:
|
|
id_out_st1 = self.rbr_identity_st1(x)
|
|
|
|
x1_1 = 0
|
|
for i in range(self.k):
|
|
x1_1 += self.depth_conv[i](x)
|
|
|
|
x1_2 = self.rbr_1x1(x)
|
|
x1 = self.act(x1_1 + x1_2 + id_out_st1)
|
|
|
|
if self.rbr_identity_st2 is None:
|
|
id_out_st2 = 0
|
|
else:
|
|
id_out_st2 = self.rbr_identity_st2(x1)
|
|
|
|
x2_1 = 0
|
|
for i in range(self.k):
|
|
x2_1 += self.point_conv[i](x1)
|
|
y = self.act(x2_1 + id_out_st2)
|
|
|
|
return y
|
|
|
|
def convert_to_deploy(self):
|
|
if not hasattr(self, 'conv1'):
|
|
self.conv1 = nn.Conv2D(
|
|
in_channels=self.ch_in,
|
|
out_channels=self.ch_in,
|
|
kernel_size=self.kernel_size,
|
|
stride=self.stride,
|
|
padding=self.padding,
|
|
groups=self.ch_in,
|
|
bias_attr=ParamAttr(
|
|
initializer=Constant(value=0.), learning_rate=1.))
|
|
if not hasattr(self, 'conv2'):
|
|
self.conv2 = nn.Conv2D(
|
|
in_channels=self.ch_in,
|
|
out_channels=self.ch_out,
|
|
kernel_size=1,
|
|
stride=1,
|
|
padding='SAME',
|
|
groups=1,
|
|
bias_attr=ParamAttr(
|
|
initializer=Constant(value=0.), learning_rate=1.))
|
|
|
|
conv1_kernel, conv1_bias, conv2_kernel, conv2_bias = self.get_equivalent_kernel_bias(
|
|
)
|
|
self.conv1.weight.set_value(conv1_kernel)
|
|
self.conv1.bias.set_value(conv1_bias)
|
|
self.conv2.weight.set_value(conv2_kernel)
|
|
self.conv2.bias.set_value(conv2_bias)
|
|
self.__delattr__('depth_conv')
|
|
self.__delattr__('point_conv')
|
|
self.__delattr__('rbr_1x1')
|
|
if hasattr(self, 'rbr_identity_st1'):
|
|
self.__delattr__('rbr_identity_st1')
|
|
if hasattr(self, 'rbr_identity_st2'):
|
|
self.__delattr__('rbr_identity_st2')
|
|
|
|
def get_equivalent_kernel_bias(self):
|
|
st1_kernel3x3, st1_bias3x3 = self._fuse_bn_tensor(self.depth_conv)
|
|
st1_kernel1x1, st1_bias1x1 = self._fuse_bn_tensor(self.rbr_1x1)
|
|
st1_kernelid, st1_biasid = self._fuse_bn_tensor(
|
|
self.rbr_identity_st1, kernel_size=self.kernel_size)
|
|
|
|
st2_kernel1x1, st2_bias1x1 = self._fuse_bn_tensor(self.point_conv)
|
|
st2_kernelid, st2_biasid = self._fuse_bn_tensor(
|
|
self.rbr_identity_st2, kernel_size=1)
|
|
|
|
conv1_kernel = st1_kernel3x3 + self._pad_1x1_to_3x3_tensor(
|
|
st1_kernel1x1) + st1_kernelid
|
|
|
|
conv1_bias = st1_bias3x3 + st1_bias1x1 + st1_biasid
|
|
|
|
conv2_kernel = st2_kernel1x1 + st2_kernelid
|
|
conv2_bias = st2_bias1x1 + st2_biasid
|
|
|
|
return conv1_kernel, conv1_bias, conv2_kernel, conv2_bias
|
|
|
|
def _pad_1x1_to_3x3_tensor(self, kernel1x1):
|
|
if kernel1x1 is None:
|
|
return 0
|
|
else:
|
|
padding_size = (self.kernel_size - 1) // 2
|
|
return nn.functional.pad(
|
|
kernel1x1,
|
|
[padding_size, padding_size, padding_size, padding_size])
|
|
|
|
def _fuse_bn_tensor(self, branch, kernel_size=3):
|
|
if branch is None:
|
|
return 0, 0
|
|
|
|
if isinstance(branch, nn.LayerList):
|
|
fused_kernels = []
|
|
fused_bias = []
|
|
for block in branch:
|
|
kernel = block.conv.weight
|
|
running_mean = block.norm._mean
|
|
running_var = block.norm._variance
|
|
gamma = block.norm.weight
|
|
beta = block.norm.bias
|
|
eps = block.norm._epsilon
|
|
|
|
std = (running_var + eps).sqrt()
|
|
t = (gamma / std).reshape((-1, 1, 1, 1))
|
|
|
|
fused_kernels.append(kernel * t)
|
|
fused_bias.append(beta - running_mean * gamma / std)
|
|
|
|
return sum(fused_kernels), sum(fused_bias)
|
|
|
|
elif isinstance(branch, ConvNormLayer):
|
|
kernel = branch.conv.weight
|
|
running_mean = branch.norm._mean
|
|
running_var = branch.norm._variance
|
|
gamma = branch.norm.weight
|
|
beta = branch.norm.bias
|
|
eps = branch.norm._epsilon
|
|
else:
|
|
assert isinstance(branch, nn.BatchNorm2D)
|
|
input_dim = self.ch_in if kernel_size == 1 else 1
|
|
kernel_value = paddle.zeros(
|
|
shape=[self.ch_in, input_dim, kernel_size, kernel_size],
|
|
dtype='float32')
|
|
if kernel_size > 1:
|
|
for i in range(self.ch_in):
|
|
kernel_value[i, i % input_dim, (kernel_size - 1) // 2, (
|
|
kernel_size - 1) // 2] = 1
|
|
elif kernel_size == 1:
|
|
for i in range(self.ch_in):
|
|
kernel_value[i, i % input_dim, 0, 0] = 1
|
|
else:
|
|
raise ValueError("Invalid kernel size recieved!")
|
|
kernel = paddle.to_tensor(kernel_value, place=branch.weight.place)
|
|
running_mean = branch._mean
|
|
running_var = branch._variance
|
|
gamma = branch.weight
|
|
beta = branch.bias
|
|
eps = branch._epsilon
|
|
|
|
std = (running_var + eps).sqrt()
|
|
t = (gamma / std).reshape((-1, 1, 1, 1))
|
|
|
|
return kernel * t, beta - running_mean * gamma / std
|