Files
fcb_photo_review/paddle_detection/ppdet/modeling/clrnet_utils.py
2024-08-27 14:42:45 +08:00

310 lines
9.9 KiB
Python

import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.modeling.initializer import constant_
from paddle.nn.initializer import KaimingNormal
class ConvModule(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size=1,
stride=1,
padding=0,
dilation=1,
groups=1,
bias=False,
norm_type='bn',
wtih_act=True):
super(ConvModule, self).__init__()
assert norm_type in ['bn', 'sync_bn', 'gn', None]
self.with_norm = norm_type is not None
self.wtih_act = wtih_act
self.conv = nn.Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
bias_attr=bias,
weight_attr=KaimingNormal())
if self.with_norm:
if norm_type == 'bn':
self.bn = nn.BatchNorm2D(out_channels)
elif norm_type == 'gn':
self.bn = nn.GroupNorm(out_channels, out_channels)
if self.wtih_act:
self.act = nn.ReLU()
def forward(self, inputs):
x = self.conv(inputs)
if self.with_norm:
x = self.bn(x)
if self.wtih_act:
x = self.act(x)
return x
def LinearModule(hidden_dim):
return nn.LayerList(
[nn.Linear(
hidden_dim, hidden_dim, bias_attr=True), nn.ReLU()])
class FeatureResize(nn.Layer):
def __init__(self, size=(10, 25)):
super(FeatureResize, self).__init__()
self.size = size
def forward(self, x):
x = F.interpolate(x, self.size)
return x.flatten(2)
class ROIGather(nn.Layer):
'''
ROIGather module for gather global information
Args:
in_channels: prior feature channels
num_priors: prior numbers we predefined
sample_points: the number of sampled points when we extract feature from line
fc_hidden_dim: the fc output channel
refine_layers: the total number of layers to build refine
'''
def __init__(self,
in_channels,
num_priors,
sample_points,
fc_hidden_dim,
refine_layers,
mid_channels=48):
super(ROIGather, self).__init__()
self.in_channels = in_channels
self.num_priors = num_priors
self.f_key = ConvModule(
in_channels=self.in_channels,
out_channels=self.in_channels,
kernel_size=1,
stride=1,
padding=0,
norm_type='bn')
self.f_query = nn.Sequential(
nn.Conv1D(
in_channels=num_priors,
out_channels=num_priors,
kernel_size=1,
stride=1,
padding=0,
groups=num_priors),
nn.ReLU(), )
self.f_value = nn.Conv2D(
in_channels=self.in_channels,
out_channels=self.in_channels,
kernel_size=1,
stride=1,
padding=0)
self.W = nn.Conv1D(
in_channels=num_priors,
out_channels=num_priors,
kernel_size=1,
stride=1,
padding=0,
groups=num_priors)
self.resize = FeatureResize()
constant_(self.W.weight, 0)
constant_(self.W.bias, 0)
self.convs = nn.LayerList()
self.catconv = nn.LayerList()
for i in range(refine_layers):
self.convs.append(
ConvModule(
in_channels,
mid_channels, (9, 1),
padding=(4, 0),
bias=False,
norm_type='bn'))
self.catconv.append(
ConvModule(
mid_channels * (i + 1),
in_channels, (9, 1),
padding=(4, 0),
bias=False,
norm_type='bn'))
self.fc = nn.Linear(
sample_points * fc_hidden_dim, fc_hidden_dim, bias_attr=True)
self.fc_norm = nn.LayerNorm(fc_hidden_dim)
def roi_fea(self, x, layer_index):
feats = []
for i, feature in enumerate(x):
feat_trans = self.convs[i](feature)
feats.append(feat_trans)
cat_feat = paddle.concat(feats, axis=1)
cat_feat = self.catconv[layer_index](cat_feat)
return cat_feat
def forward(self, roi_features, x, layer_index):
'''
Args:
roi_features: prior feature, shape: (Batch * num_priors, prior_feat_channel, sample_point, 1)
x: feature map
layer_index: currently on which layer to refine
Return:
roi: prior features with gathered global information, shape: (Batch, num_priors, fc_hidden_dim)
'''
roi = self.roi_fea(roi_features, layer_index)
# return roi
# print(roi.shape)
# return roi
bs = x.shape[0]
# print(bs)
#roi = roi.contiguous().view(bs * self.num_priors, -1)
roi = roi.reshape([bs * self.num_priors, -1])
# roi = paddle.randn([192,2304])
# return roi
# print(roi)
# print(self.fc)
# print(self.fc.weight)
roi = self.fc(roi)
roi = F.relu(self.fc_norm(roi))
# return roi
#roi = roi.view(bs, self.num_priors, -1)
roi = roi.reshape([bs, self.num_priors, -1])
query = roi
value = self.resize(self.f_value(x)) # (B, C, N) global feature
query = self.f_query(
query) # (B, N, 1) sample context feature from prior roi
key = self.f_key(x)
value = value.transpose(perm=[0, 2, 1])
key = self.resize(key) # (B, C, N) global feature
sim_map = paddle.matmul(query, key)
sim_map = (self.in_channels**-.5) * sim_map
sim_map = F.softmax(sim_map, axis=-1)
context = paddle.matmul(sim_map, value)
context = self.W(context)
roi = roi + F.dropout(context, p=0.1, training=self.training)
return roi
class SegDecoder(nn.Layer):
'''
Optionaly seg decoder
'''
def __init__(self,
image_height,
image_width,
num_class,
prior_feat_channels=64,
refine_layers=3):
super().__init__()
self.dropout = nn.Dropout2D(0.1)
self.conv = nn.Conv2D(prior_feat_channels * refine_layers, num_class, 1)
self.image_height = image_height
self.image_width = image_width
def forward(self, x):
x = self.dropout(x)
x = self.conv(x)
x = F.interpolate(
x,
size=[self.image_height, self.image_width],
mode='bilinear',
align_corners=False)
return x
import paddle.nn as nn
def accuracy(pred, target, topk=1, thresh=None):
"""Calculate accuracy according to the prediction and target.
Args:
pred (torch.Tensor): The model prediction, shape (N, num_class)
target (torch.Tensor): The target of each prediction, shape (N, )
topk (int | tuple[int], optional): If the predictions in ``topk``
matches the target, the predictions will be regarded as
correct ones. Defaults to 1.
thresh (float, optional): If not None, predictions with scores under
this threshold are considered incorrect. Default to None.
Returns:
float | tuple[float]: If the input ``topk`` is a single integer,
the function will return a single float as accuracy. If
``topk`` is a tuple containing multiple integers, the
function will return a tuple containing accuracies of
each ``topk`` number.
"""
assert isinstance(topk, (int, tuple))
if isinstance(topk, int):
topk = (topk, )
return_single = True
else:
return_single = False
maxk = max(topk)
if pred.shape[0] == 0:
accu = [pred.new_tensor(0.) for i in range(len(topk))]
return accu[0] if return_single else accu
assert pred.ndim == 2 and target.ndim == 1
assert pred.shape[0] == target.shape[0]
assert maxk <= pred.shape[1], \
f'maxk {maxk} exceeds pred dimension {pred.shape[1]}'
pred_value, pred_label = pred.topk(maxk, axis=1)
pred_label = pred_label.t() # transpose to shape (maxk, N)
correct = pred_label.equal(target.reshape([1, -1]).expand_as(pred_label))
if thresh is not None:
# Only prediction values larger than thresh are counted as correct
correct = correct & (pred_value > thresh).t()
res = []
for k in topk:
correct_k = correct[:k].reshape([-1]).cast("float32").sum(0,
keepdim=True)
correct_k = correct_k * (100.0 / pred.shape[0])
res.append(correct_k)
return res[0] if return_single else res
class Accuracy(nn.Layer):
def __init__(self, topk=(1, ), thresh=None):
"""Module to calculate the accuracy.
Args:
topk (tuple, optional): The criterion used to calculate the
accuracy. Defaults to (1,).
thresh (float, optional): If not None, predictions with scores
under this threshold are considered incorrect. Default to None.
"""
super().__init__()
self.topk = topk
self.thresh = thresh
def forward(self, pred, target):
"""Forward function to calculate accuracy.
Args:
pred (torch.Tensor): Prediction of models.
target (torch.Tensor): Target for each prediction.
Returns:
tuple[float]: The accuracies under different topk criterions.
"""
return accuracy(pred, target, self.topk, self.thresh)