更换文档检测模型
This commit is contained in:
309
paddle_detection/ppdet/modeling/clrnet_utils.py
Normal file
309
paddle_detection/ppdet/modeling/clrnet_utils.py
Normal file
@@ -0,0 +1,309 @@
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from ppdet.modeling.initializer import constant_
|
||||
from paddle.nn.initializer import KaimingNormal
|
||||
|
||||
|
||||
class ConvModule(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
dilation=1,
|
||||
groups=1,
|
||||
bias=False,
|
||||
norm_type='bn',
|
||||
wtih_act=True):
|
||||
super(ConvModule, self).__init__()
|
||||
assert norm_type in ['bn', 'sync_bn', 'gn', None]
|
||||
self.with_norm = norm_type is not None
|
||||
self.wtih_act = wtih_act
|
||||
self.conv = nn.Conv2D(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
dilation=dilation,
|
||||
groups=groups,
|
||||
bias_attr=bias,
|
||||
weight_attr=KaimingNormal())
|
||||
if self.with_norm:
|
||||
if norm_type == 'bn':
|
||||
self.bn = nn.BatchNorm2D(out_channels)
|
||||
elif norm_type == 'gn':
|
||||
self.bn = nn.GroupNorm(out_channels, out_channels)
|
||||
|
||||
if self.wtih_act:
|
||||
self.act = nn.ReLU()
|
||||
|
||||
def forward(self, inputs):
|
||||
x = self.conv(inputs)
|
||||
if self.with_norm:
|
||||
x = self.bn(x)
|
||||
if self.wtih_act:
|
||||
x = self.act(x)
|
||||
return x
|
||||
|
||||
|
||||
def LinearModule(hidden_dim):
|
||||
return nn.LayerList(
|
||||
[nn.Linear(
|
||||
hidden_dim, hidden_dim, bias_attr=True), nn.ReLU()])
|
||||
|
||||
|
||||
class FeatureResize(nn.Layer):
|
||||
def __init__(self, size=(10, 25)):
|
||||
super(FeatureResize, self).__init__()
|
||||
self.size = size
|
||||
|
||||
def forward(self, x):
|
||||
x = F.interpolate(x, self.size)
|
||||
return x.flatten(2)
|
||||
|
||||
|
||||
class ROIGather(nn.Layer):
|
||||
'''
|
||||
ROIGather module for gather global information
|
||||
Args:
|
||||
in_channels: prior feature channels
|
||||
num_priors: prior numbers we predefined
|
||||
sample_points: the number of sampled points when we extract feature from line
|
||||
fc_hidden_dim: the fc output channel
|
||||
refine_layers: the total number of layers to build refine
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
num_priors,
|
||||
sample_points,
|
||||
fc_hidden_dim,
|
||||
refine_layers,
|
||||
mid_channels=48):
|
||||
super(ROIGather, self).__init__()
|
||||
self.in_channels = in_channels
|
||||
self.num_priors = num_priors
|
||||
self.f_key = ConvModule(
|
||||
in_channels=self.in_channels,
|
||||
out_channels=self.in_channels,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
norm_type='bn')
|
||||
|
||||
self.f_query = nn.Sequential(
|
||||
nn.Conv1D(
|
||||
in_channels=num_priors,
|
||||
out_channels=num_priors,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
groups=num_priors),
|
||||
nn.ReLU(), )
|
||||
self.f_value = nn.Conv2D(
|
||||
in_channels=self.in_channels,
|
||||
out_channels=self.in_channels,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0)
|
||||
self.W = nn.Conv1D(
|
||||
in_channels=num_priors,
|
||||
out_channels=num_priors,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
groups=num_priors)
|
||||
|
||||
self.resize = FeatureResize()
|
||||
constant_(self.W.weight, 0)
|
||||
constant_(self.W.bias, 0)
|
||||
|
||||
self.convs = nn.LayerList()
|
||||
self.catconv = nn.LayerList()
|
||||
for i in range(refine_layers):
|
||||
self.convs.append(
|
||||
ConvModule(
|
||||
in_channels,
|
||||
mid_channels, (9, 1),
|
||||
padding=(4, 0),
|
||||
bias=False,
|
||||
norm_type='bn'))
|
||||
|
||||
self.catconv.append(
|
||||
ConvModule(
|
||||
mid_channels * (i + 1),
|
||||
in_channels, (9, 1),
|
||||
padding=(4, 0),
|
||||
bias=False,
|
||||
norm_type='bn'))
|
||||
|
||||
self.fc = nn.Linear(
|
||||
sample_points * fc_hidden_dim, fc_hidden_dim, bias_attr=True)
|
||||
|
||||
self.fc_norm = nn.LayerNorm(fc_hidden_dim)
|
||||
|
||||
def roi_fea(self, x, layer_index):
|
||||
feats = []
|
||||
for i, feature in enumerate(x):
|
||||
feat_trans = self.convs[i](feature)
|
||||
feats.append(feat_trans)
|
||||
cat_feat = paddle.concat(feats, axis=1)
|
||||
cat_feat = self.catconv[layer_index](cat_feat)
|
||||
return cat_feat
|
||||
|
||||
def forward(self, roi_features, x, layer_index):
|
||||
'''
|
||||
Args:
|
||||
roi_features: prior feature, shape: (Batch * num_priors, prior_feat_channel, sample_point, 1)
|
||||
x: feature map
|
||||
layer_index: currently on which layer to refine
|
||||
Return:
|
||||
roi: prior features with gathered global information, shape: (Batch, num_priors, fc_hidden_dim)
|
||||
'''
|
||||
|
||||
roi = self.roi_fea(roi_features, layer_index)
|
||||
# return roi
|
||||
# print(roi.shape)
|
||||
# return roi
|
||||
bs = x.shape[0]
|
||||
# print(bs)
|
||||
#roi = roi.contiguous().view(bs * self.num_priors, -1)
|
||||
roi = roi.reshape([bs * self.num_priors, -1])
|
||||
# roi = paddle.randn([192,2304])
|
||||
# return roi
|
||||
# print(roi)
|
||||
# print(self.fc)
|
||||
# print(self.fc.weight)
|
||||
roi = self.fc(roi)
|
||||
roi = F.relu(self.fc_norm(roi))
|
||||
# return roi
|
||||
#roi = roi.view(bs, self.num_priors, -1)
|
||||
roi = roi.reshape([bs, self.num_priors, -1])
|
||||
query = roi
|
||||
|
||||
value = self.resize(self.f_value(x)) # (B, C, N) global feature
|
||||
query = self.f_query(
|
||||
query) # (B, N, 1) sample context feature from prior roi
|
||||
key = self.f_key(x)
|
||||
value = value.transpose(perm=[0, 2, 1])
|
||||
key = self.resize(key) # (B, C, N) global feature
|
||||
sim_map = paddle.matmul(query, key)
|
||||
sim_map = (self.in_channels**-.5) * sim_map
|
||||
sim_map = F.softmax(sim_map, axis=-1)
|
||||
|
||||
context = paddle.matmul(sim_map, value)
|
||||
context = self.W(context)
|
||||
|
||||
roi = roi + F.dropout(context, p=0.1, training=self.training)
|
||||
|
||||
return roi
|
||||
|
||||
|
||||
class SegDecoder(nn.Layer):
|
||||
'''
|
||||
Optionaly seg decoder
|
||||
'''
|
||||
|
||||
def __init__(self,
|
||||
image_height,
|
||||
image_width,
|
||||
num_class,
|
||||
prior_feat_channels=64,
|
||||
refine_layers=3):
|
||||
super().__init__()
|
||||
self.dropout = nn.Dropout2D(0.1)
|
||||
self.conv = nn.Conv2D(prior_feat_channels * refine_layers, num_class, 1)
|
||||
self.image_height = image_height
|
||||
self.image_width = image_width
|
||||
|
||||
def forward(self, x):
|
||||
x = self.dropout(x)
|
||||
x = self.conv(x)
|
||||
x = F.interpolate(
|
||||
x,
|
||||
size=[self.image_height, self.image_width],
|
||||
mode='bilinear',
|
||||
align_corners=False)
|
||||
return x
|
||||
|
||||
|
||||
import paddle.nn as nn
|
||||
|
||||
|
||||
def accuracy(pred, target, topk=1, thresh=None):
|
||||
"""Calculate accuracy according to the prediction and target.
|
||||
|
||||
Args:
|
||||
pred (torch.Tensor): The model prediction, shape (N, num_class)
|
||||
target (torch.Tensor): The target of each prediction, shape (N, )
|
||||
topk (int | tuple[int], optional): If the predictions in ``topk``
|
||||
matches the target, the predictions will be regarded as
|
||||
correct ones. Defaults to 1.
|
||||
thresh (float, optional): If not None, predictions with scores under
|
||||
this threshold are considered incorrect. Default to None.
|
||||
|
||||
Returns:
|
||||
float | tuple[float]: If the input ``topk`` is a single integer,
|
||||
the function will return a single float as accuracy. If
|
||||
``topk`` is a tuple containing multiple integers, the
|
||||
function will return a tuple containing accuracies of
|
||||
each ``topk`` number.
|
||||
"""
|
||||
assert isinstance(topk, (int, tuple))
|
||||
if isinstance(topk, int):
|
||||
topk = (topk, )
|
||||
return_single = True
|
||||
else:
|
||||
return_single = False
|
||||
|
||||
maxk = max(topk)
|
||||
if pred.shape[0] == 0:
|
||||
accu = [pred.new_tensor(0.) for i in range(len(topk))]
|
||||
return accu[0] if return_single else accu
|
||||
assert pred.ndim == 2 and target.ndim == 1
|
||||
assert pred.shape[0] == target.shape[0]
|
||||
assert maxk <= pred.shape[1], \
|
||||
f'maxk {maxk} exceeds pred dimension {pred.shape[1]}'
|
||||
pred_value, pred_label = pred.topk(maxk, axis=1)
|
||||
pred_label = pred_label.t() # transpose to shape (maxk, N)
|
||||
correct = pred_label.equal(target.reshape([1, -1]).expand_as(pred_label))
|
||||
if thresh is not None:
|
||||
# Only prediction values larger than thresh are counted as correct
|
||||
correct = correct & (pred_value > thresh).t()
|
||||
res = []
|
||||
for k in topk:
|
||||
correct_k = correct[:k].reshape([-1]).cast("float32").sum(0,
|
||||
keepdim=True)
|
||||
correct_k = correct_k * (100.0 / pred.shape[0])
|
||||
res.append(correct_k)
|
||||
return res[0] if return_single else res
|
||||
|
||||
|
||||
class Accuracy(nn.Layer):
|
||||
def __init__(self, topk=(1, ), thresh=None):
|
||||
"""Module to calculate the accuracy.
|
||||
|
||||
Args:
|
||||
topk (tuple, optional): The criterion used to calculate the
|
||||
accuracy. Defaults to (1,).
|
||||
thresh (float, optional): If not None, predictions with scores
|
||||
under this threshold are considered incorrect. Default to None.
|
||||
"""
|
||||
super().__init__()
|
||||
self.topk = topk
|
||||
self.thresh = thresh
|
||||
|
||||
def forward(self, pred, target):
|
||||
"""Forward function to calculate accuracy.
|
||||
|
||||
Args:
|
||||
pred (torch.Tensor): Prediction of models.
|
||||
target (torch.Tensor): Target for each prediction.
|
||||
|
||||
Returns:
|
||||
tuple[float]: The accuracies under different topk criterions.
|
||||
"""
|
||||
return accuracy(pred, target, self.topk, self.thresh)
|
||||
Reference in New Issue
Block a user