更换文档检测模型

This commit is contained in:
2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions

View File

@@ -0,0 +1,399 @@
import math
import paddle
import numpy as np
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register
from ppdet.modeling.initializer import normal_
from ppdet.modeling.lane_utils import Lane
from ppdet.modeling.losses import line_iou
from ppdet.modeling.clrnet_utils import ROIGather, LinearModule, SegDecoder
__all__ = ['CLRHead']
@register
class CLRHead(nn.Layer):
__inject__ = ['loss']
__shared__ = [
'img_w', 'img_h', 'ori_img_h', 'num_classes', 'cut_height',
'num_points', "max_lanes"
]
def __init__(self,
num_points=72,
prior_feat_channels=64,
fc_hidden_dim=64,
num_priors=192,
img_w=800,
img_h=320,
ori_img_h=590,
cut_height=270,
num_classes=5,
num_fc=2,
refine_layers=3,
sample_points=36,
conf_threshold=0.4,
nms_thres=0.5,
max_lanes=4,
loss='CLRNetLoss'):
super(CLRHead, self).__init__()
self.img_w = img_w
self.img_h = img_h
self.n_strips = num_points - 1
self.n_offsets = num_points
self.num_priors = num_priors
self.sample_points = sample_points
self.refine_layers = refine_layers
self.num_classes = num_classes
self.fc_hidden_dim = fc_hidden_dim
self.ori_img_h = ori_img_h
self.cut_height = cut_height
self.conf_threshold = conf_threshold
self.nms_thres = nms_thres
self.max_lanes = max_lanes
self.prior_feat_channels = prior_feat_channels
self.loss = loss
self.register_buffer(
name='sample_x_indexs',
tensor=(paddle.linspace(
start=0, stop=1, num=self.sample_points,
dtype=paddle.float32) * self.n_strips).astype(dtype='int64'))
self.register_buffer(
name='prior_feat_ys',
tensor=paddle.flip(
x=(1 - self.sample_x_indexs.astype('float32') / self.n_strips),
axis=[-1]))
self.register_buffer(
name='prior_ys',
tensor=paddle.linspace(
start=1, stop=0, num=self.n_offsets).astype('float32'))
self.prior_feat_channels = prior_feat_channels
self._init_prior_embeddings()
init_priors, priors_on_featmap = self.generate_priors_from_embeddings()
self.register_buffer(name='priors', tensor=init_priors)
self.register_buffer(name='priors_on_featmap', tensor=priors_on_featmap)
self.seg_decoder = SegDecoder(self.img_h, self.img_w, self.num_classes,
self.prior_feat_channels,
self.refine_layers)
reg_modules = list()
cls_modules = list()
for _ in range(num_fc):
reg_modules += [*LinearModule(self.fc_hidden_dim)]
cls_modules += [*LinearModule(self.fc_hidden_dim)]
self.reg_modules = nn.LayerList(sublayers=reg_modules)
self.cls_modules = nn.LayerList(sublayers=cls_modules)
self.roi_gather = ROIGather(self.prior_feat_channels, self.num_priors,
self.sample_points, self.fc_hidden_dim,
self.refine_layers)
self.reg_layers = nn.Linear(
in_features=self.fc_hidden_dim,
out_features=self.n_offsets + 1 + 2 + 1,
bias_attr=True)
self.cls_layers = nn.Linear(
in_features=self.fc_hidden_dim, out_features=2, bias_attr=True)
self.init_weights()
def init_weights(self):
for m in self.cls_layers.parameters():
normal_(m, mean=0.0, std=0.001)
for m in self.reg_layers.parameters():
normal_(m, mean=0.0, std=0.001)
def pool_prior_features(self, batch_features, num_priors, prior_xs):
"""
pool prior feature from feature map.
Args:
batch_features (Tensor): Input feature maps, shape: (B, C, H, W)
"""
batch_size = batch_features.shape[0]
prior_xs = prior_xs.reshape([batch_size, num_priors, -1, 1])
prior_ys = self.prior_feat_ys.tile(repeat_times=[
batch_size * num_priors
]).reshape([batch_size, num_priors, -1, 1])
prior_xs = prior_xs * 2.0 - 1.0
prior_ys = prior_ys * 2.0 - 1.0
grid = paddle.concat(x=(prior_xs, prior_ys), axis=-1)
feature = F.grid_sample(
x=batch_features, grid=grid,
align_corners=True).transpose(perm=[0, 2, 1, 3])
feature = feature.reshape([
batch_size * num_priors, self.prior_feat_channels,
self.sample_points, 1
])
return feature
def generate_priors_from_embeddings(self):
predictions = self.prior_embeddings.weight
# 2 scores, 1 start_y, 1 start_x, 1 theta, 1 length, 72 coordinates, score[0] = negative prob, score[1] = positive prob
priors = paddle.zeros(
(self.num_priors, 2 + 2 + 2 + self.n_offsets),
dtype=predictions.dtype)
priors[:, 2:5] = predictions.clone()
priors[:, 6:] = (
priors[:, 3].unsqueeze(1).clone().tile([1, self.n_offsets]) *
(self.img_w - 1) +
((1 - self.prior_ys.tile([self.num_priors, 1]) -
priors[:, 2].unsqueeze(1).clone().tile([1, self.n_offsets])) *
self.img_h / paddle.tan(x=priors[:, 4].unsqueeze(1).clone().tile(
[1, self.n_offsets]) * math.pi + 1e-05))) / (self.img_w - 1)
priors_on_featmap = paddle.index_select(
priors, 6 + self.sample_x_indexs, axis=-1)
return priors, priors_on_featmap
def _init_prior_embeddings(self):
self.prior_embeddings = nn.Embedding(self.num_priors, 3)
bottom_priors_nums = self.num_priors * 3 // 4
left_priors_nums, _ = self.num_priors // 8, self.num_priors // 8
strip_size = 0.5 / (left_priors_nums // 2 - 1)
bottom_strip_size = 1 / (bottom_priors_nums // 4 + 1)
with paddle.no_grad():
for i in range(left_priors_nums):
self.prior_embeddings.weight[i, 0] = i // 2 * strip_size
self.prior_embeddings.weight[i, 1] = 0.0
self.prior_embeddings.weight[i,
2] = 0.16 if i % 2 == 0 else 0.32
for i in range(left_priors_nums,
left_priors_nums + bottom_priors_nums):
self.prior_embeddings.weight[i, 0] = 0.0
self.prior_embeddings.weight[i, 1] = (
(i - left_priors_nums) // 4 + 1) * bottom_strip_size
self.prior_embeddings.weight[i, 2] = 0.2 * (i % 4 + 1)
for i in range(left_priors_nums + bottom_priors_nums,
self.num_priors):
self.prior_embeddings.weight[i, 0] = (
i - left_priors_nums - bottom_priors_nums) // 2 * strip_size
self.prior_embeddings.weight[i, 1] = 1.0
self.prior_embeddings.weight[i,
2] = 0.68 if i % 2 == 0 else 0.84
def forward(self, x, inputs=None):
"""
Take pyramid features as input to perform Cross Layer Refinement and finally output the prediction lanes.
Each feature is a 4D tensor.
Args:
x: input features (list[Tensor])
Return:
prediction_list: each layer's prediction result
seg: segmentation result for auxiliary loss
"""
batch_features = list(x[len(x) - self.refine_layers:])
batch_features.reverse()
batch_size = batch_features[-1].shape[0]
if self.training:
self.priors, self.priors_on_featmap = self.generate_priors_from_embeddings(
)
priors, priors_on_featmap = self.priors.tile(
[batch_size, 1,
1]), self.priors_on_featmap.tile([batch_size, 1, 1])
predictions_lists = []
prior_features_stages = []
for stage in range(self.refine_layers):
num_priors = priors_on_featmap.shape[1]
prior_xs = paddle.flip(x=priors_on_featmap, axis=[2])
batch_prior_features = self.pool_prior_features(
batch_features[stage], num_priors, prior_xs)
prior_features_stages.append(batch_prior_features)
fc_features = self.roi_gather(prior_features_stages,
batch_features[stage], stage)
# return fc_features
fc_features = fc_features.reshape(
[num_priors, batch_size, -1]).reshape(
[batch_size * num_priors, self.fc_hidden_dim])
cls_features = fc_features.clone()
reg_features = fc_features.clone()
for cls_layer in self.cls_modules:
cls_features = cls_layer(cls_features)
# return cls_features
for reg_layer in self.reg_modules:
reg_features = reg_layer(reg_features)
cls_logits = self.cls_layers(cls_features)
reg = self.reg_layers(reg_features)
cls_logits = cls_logits.reshape(
[batch_size, -1, cls_logits.shape[1]])
reg = reg.reshape([batch_size, -1, reg.shape[1]])
predictions = priors.clone()
predictions[:, :, :2] = cls_logits
predictions[:, :, 2:5] += reg[:, :, :3]
predictions[:, :, 5] = reg[:, :, 3]
def tran_tensor(t):
return t.unsqueeze(axis=2).clone().tile([1, 1, self.n_offsets])
predictions[..., 6:] = (
tran_tensor(predictions[..., 3]) * (self.img_w - 1) +
((1 - self.prior_ys.tile([batch_size, num_priors, 1]) -
tran_tensor(predictions[..., 2])) * self.img_h / paddle.tan(
tran_tensor(predictions[..., 4]) * math.pi + 1e-05))) / (
self.img_w - 1)
prediction_lines = predictions.clone()
predictions[..., 6:] += reg[..., 4:]
predictions_lists.append(predictions)
if stage != self.refine_layers - 1:
priors = prediction_lines.detach().clone()
priors_on_featmap = priors.index_select(
6 + self.sample_x_indexs, axis=-1)
if self.training:
seg = None
seg_features = paddle.concat(
[
F.interpolate(
feature,
size=[
batch_features[-1].shape[2],
batch_features[-1].shape[3]
],
mode='bilinear',
align_corners=False) for feature in batch_features
],
axis=1)
seg = self.seg_decoder(seg_features)
output = {'predictions_lists': predictions_lists, 'seg': seg}
return self.loss(output, inputs)
return predictions_lists[-1]
def predictions_to_pred(self, predictions):
"""
Convert predictions to internal Lane structure for evaluation.
"""
self.prior_ys = paddle.to_tensor(self.prior_ys)
self.prior_ys = self.prior_ys.astype('float64')
lanes = []
for lane in predictions:
lane_xs = lane[6:].clone()
start = min(
max(0, int(round(lane[2].item() * self.n_strips))),
self.n_strips)
length = int(round(lane[5].item()))
end = start + length - 1
end = min(end, len(self.prior_ys) - 1)
if start > 0:
mask = ((lane_xs[:start] >= 0.) &
(lane_xs[:start] <= 1.)).cpu().detach().numpy()[::-1]
mask = ~((mask.cumprod()[::-1]).astype(np.bool))
lane_xs[:start][mask] = -2
if end < len(self.prior_ys) - 1:
lane_xs[end + 1:] = -2
lane_ys = self.prior_ys[lane_xs >= 0].clone()
lane_xs = lane_xs[lane_xs >= 0]
lane_xs = lane_xs.flip(axis=0).astype('float64')
lane_ys = lane_ys.flip(axis=0)
lane_ys = (lane_ys *
(self.ori_img_h - self.cut_height) + self.cut_height
) / self.ori_img_h
if len(lane_xs) <= 1:
continue
points = paddle.stack(
x=(lane_xs.reshape([-1, 1]), lane_ys.reshape([-1, 1])),
axis=1).squeeze(axis=2)
lane = Lane(
points=points.cpu().numpy(),
metadata={
'start_x': lane[3],
'start_y': lane[2],
'conf': lane[1]
})
lanes.append(lane)
return lanes
def lane_nms(self, predictions, scores, nms_overlap_thresh, top_k):
"""
NMS for lane detection.
predictions: paddle.Tensor [num_lanes,conf,y,x,lenght,72offsets] [12,77]
scores: paddle.Tensor [num_lanes]
nms_overlap_thresh: float
top_k: int
"""
# sort by scores to get idx
idx = scores.argsort(descending=True)
keep = []
condidates = predictions.clone()
condidates = condidates.index_select(idx)
while len(condidates) > 0:
keep.append(idx[0])
if len(keep) >= top_k or len(condidates) == 1:
break
ious = []
for i in range(1, len(condidates)):
ious.append(1 - line_iou(
condidates[i].unsqueeze(0),
condidates[0].unsqueeze(0),
img_w=self.img_w,
length=15))
ious = paddle.to_tensor(ious)
mask = ious <= nms_overlap_thresh
id = paddle.where(mask == False)[0]
if id.shape[0] == 0:
break
condidates = condidates[1:].index_select(id)
idx = idx[1:].index_select(id)
keep = paddle.stack(keep)
return keep
def get_lanes(self, output, as_lanes=True):
"""
Convert model output to lanes.
"""
softmax = nn.Softmax(axis=1)
decoded = []
for predictions in output:
threshold = self.conf_threshold
scores = softmax(predictions[:, :2])[:, 1]
keep_inds = scores >= threshold
predictions = predictions[keep_inds]
scores = scores[keep_inds]
if predictions.shape[0] == 0:
decoded.append([])
continue
nms_predictions = predictions.detach().clone()
nms_predictions = paddle.concat(
x=[nms_predictions[..., :4], nms_predictions[..., 5:]], axis=-1)
nms_predictions[..., 4] = nms_predictions[..., 4] * self.n_strips
nms_predictions[..., 5:] = nms_predictions[..., 5:] * (
self.img_w - 1)
keep = self.lane_nms(
nms_predictions[..., 5:],
scores,
nms_overlap_thresh=self.nms_thres,
top_k=self.max_lanes)
predictions = predictions.index_select(keep)
if predictions.shape[0] == 0:
decoded.append([])
continue
predictions[:, 5] = paddle.round(predictions[:, 5] * self.n_strips)
if as_lanes:
pred = self.predictions_to_pred(predictions)
else:
pred = predictions
decoded.append(pred)
return decoded