更换文档检测模型

2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions
--- a/paddle_detection/ppdet/modeling/clrnet_utils.py
+++ b/paddle_detection/ppdet/modeling/clrnet_utils.py
@@ -0,0 +1,309 @@
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+from ppdet.modeling.initializer import constant_
+from paddle.nn.initializer import KaimingNormal
+
+
+class ConvModule(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=1,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 bias=False,
+                 norm_type='bn',
+                 wtih_act=True):
+        super(ConvModule, self).__init__()
+        assert norm_type in ['bn', 'sync_bn', 'gn', None]
+        self.with_norm = norm_type is not None
+        self.wtih_act = wtih_act
+        self.conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+            bias_attr=bias,
+            weight_attr=KaimingNormal())
+        if self.with_norm:
+            if norm_type == 'bn':
+                self.bn = nn.BatchNorm2D(out_channels)
+            elif norm_type == 'gn':
+                self.bn = nn.GroupNorm(out_channels, out_channels)
+
+        if self.wtih_act:
+            self.act = nn.ReLU()
+
+    def forward(self, inputs):
+        x = self.conv(inputs)
+        if self.with_norm:
+            x = self.bn(x)
+        if self.wtih_act:
+            x = self.act(x)
+        return x
+
+
+def LinearModule(hidden_dim):
+    return nn.LayerList(
+        [nn.Linear(
+            hidden_dim, hidden_dim, bias_attr=True), nn.ReLU()])
+
+
+class FeatureResize(nn.Layer):
+    def __init__(self, size=(10, 25)):
+        super(FeatureResize, self).__init__()
+        self.size = size
+
+    def forward(self, x):
+        x = F.interpolate(x, self.size)
+        return x.flatten(2)
+
+
+class ROIGather(nn.Layer):
+    '''
+    ROIGather module for gather global information
+    Args: 
+        in_channels: prior feature channels
+        num_priors: prior numbers we predefined
+        sample_points: the number of sampled points when we extract feature from line
+        fc_hidden_dim: the fc output channel
+        refine_layers: the total number of layers to build refine
+    '''
+
+    def __init__(self,
+                 in_channels,
+                 num_priors,
+                 sample_points,
+                 fc_hidden_dim,
+                 refine_layers,
+                 mid_channels=48):
+        super(ROIGather, self).__init__()
+        self.in_channels = in_channels
+        self.num_priors = num_priors
+        self.f_key = ConvModule(
+            in_channels=self.in_channels,
+            out_channels=self.in_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            norm_type='bn')
+
+        self.f_query = nn.Sequential(
+            nn.Conv1D(
+                in_channels=num_priors,
+                out_channels=num_priors,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                groups=num_priors),
+            nn.ReLU(), )
+        self.f_value = nn.Conv2D(
+            in_channels=self.in_channels,
+            out_channels=self.in_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0)
+        self.W = nn.Conv1D(
+            in_channels=num_priors,
+            out_channels=num_priors,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=num_priors)
+
+        self.resize = FeatureResize()
+        constant_(self.W.weight, 0)
+        constant_(self.W.bias, 0)
+
+        self.convs = nn.LayerList()
+        self.catconv = nn.LayerList()
+        for i in range(refine_layers):
+            self.convs.append(
+                ConvModule(
+                    in_channels,
+                    mid_channels, (9, 1),
+                    padding=(4, 0),
+                    bias=False,
+                    norm_type='bn'))
+
+            self.catconv.append(
+                ConvModule(
+                    mid_channels * (i + 1),
+                    in_channels, (9, 1),
+                    padding=(4, 0),
+                    bias=False,
+                    norm_type='bn'))
+
+        self.fc = nn.Linear(
+            sample_points * fc_hidden_dim, fc_hidden_dim, bias_attr=True)
+
+        self.fc_norm = nn.LayerNorm(fc_hidden_dim)
+
+    def roi_fea(self, x, layer_index):
+        feats = []
+        for i, feature in enumerate(x):
+            feat_trans = self.convs[i](feature)
+            feats.append(feat_trans)
+        cat_feat = paddle.concat(feats, axis=1)
+        cat_feat = self.catconv[layer_index](cat_feat)
+        return cat_feat
+
+    def forward(self, roi_features, x, layer_index):
+        '''
+        Args:
+            roi_features: prior feature, shape: (Batch * num_priors, prior_feat_channel, sample_point, 1)
+            x: feature map
+            layer_index: currently on which layer to refine
+        Return: 
+            roi: prior features with gathered global information, shape: (Batch, num_priors, fc_hidden_dim)
+        '''
+
+        roi = self.roi_fea(roi_features, layer_index)
+        # return roi
+        # print(roi.shape)
+        # return roi
+        bs = x.shape[0]
+        # print(bs)
+        #roi = roi.contiguous().view(bs * self.num_priors, -1)
+        roi = roi.reshape([bs * self.num_priors, -1])
+        # roi = paddle.randn([192,2304])
+        # return roi
+        # print(roi)
+        # print(self.fc)
+        # print(self.fc.weight)
+        roi = self.fc(roi)
+        roi = F.relu(self.fc_norm(roi))
+        # return roi
+        #roi = roi.view(bs, self.num_priors, -1)
+        roi = roi.reshape([bs, self.num_priors, -1])
+        query = roi
+
+        value = self.resize(self.f_value(x))  # (B, C, N) global feature
+        query = self.f_query(
+            query)  # (B, N, 1) sample context feature from prior roi
+        key = self.f_key(x)
+        value = value.transpose(perm=[0, 2, 1])
+        key = self.resize(key)  # (B, C, N) global feature
+        sim_map = paddle.matmul(query, key)
+        sim_map = (self.in_channels**-.5) * sim_map
+        sim_map = F.softmax(sim_map, axis=-1)
+
+        context = paddle.matmul(sim_map, value)
+        context = self.W(context)
+
+        roi = roi + F.dropout(context, p=0.1, training=self.training)
+
+        return roi
+
+
+class SegDecoder(nn.Layer):
+    '''
+    Optionaly seg decoder
+    '''
+
+    def __init__(self,
+                 image_height,
+                 image_width,
+                 num_class,
+                 prior_feat_channels=64,
+                 refine_layers=3):
+        super().__init__()
+        self.dropout = nn.Dropout2D(0.1)
+        self.conv = nn.Conv2D(prior_feat_channels * refine_layers, num_class, 1)
+        self.image_height = image_height
+        self.image_width = image_width
+
+    def forward(self, x):
+        x = self.dropout(x)
+        x = self.conv(x)
+        x = F.interpolate(
+            x,
+            size=[self.image_height, self.image_width],
+            mode='bilinear',
+            align_corners=False)
+        return x
+
+
+import paddle.nn as nn
+
+
+def accuracy(pred, target, topk=1, thresh=None):
+    """Calculate accuracy according to the prediction and target.
+
+    Args:
+        pred (torch.Tensor): The model prediction, shape (N, num_class)
+        target (torch.Tensor): The target of each prediction, shape (N, )
+        topk (int | tuple[int], optional): If the predictions in ``topk``
+            matches the target, the predictions will be regarded as
+            correct ones. Defaults to 1.
+        thresh (float, optional): If not None, predictions with scores under
+            this threshold are considered incorrect. Default to None.
+
+    Returns:
+        float | tuple[float]: If the input ``topk`` is a single integer,
+            the function will return a single float as accuracy. If
+            ``topk`` is a tuple containing multiple integers, the
+            function will return a tuple containing accuracies of
+            each ``topk`` number.
+    """
+    assert isinstance(topk, (int, tuple))
+    if isinstance(topk, int):
+        topk = (topk, )
+        return_single = True
+    else:
+        return_single = False
+
+    maxk = max(topk)
+    if pred.shape[0] == 0:
+        accu = [pred.new_tensor(0.) for i in range(len(topk))]
+        return accu[0] if return_single else accu
+    assert pred.ndim == 2 and target.ndim == 1
+    assert pred.shape[0] == target.shape[0]
+    assert maxk <= pred.shape[1], \
+        f'maxk {maxk} exceeds pred dimension {pred.shape[1]}'
+    pred_value, pred_label = pred.topk(maxk, axis=1)
+    pred_label = pred_label.t()  # transpose to shape (maxk, N)
+    correct = pred_label.equal(target.reshape([1, -1]).expand_as(pred_label))
+    if thresh is not None:
+        # Only prediction values larger than thresh are counted as correct
+        correct = correct & (pred_value > thresh).t()
+    res = []
+    for k in topk:
+        correct_k = correct[:k].reshape([-1]).cast("float32").sum(0,
+                                                                  keepdim=True)
+        correct_k = correct_k * (100.0 / pred.shape[0])
+        res.append(correct_k)
+    return res[0] if return_single else res
+
+
+class Accuracy(nn.Layer):
+    def __init__(self, topk=(1, ), thresh=None):
+        """Module to calculate the accuracy.
+
+        Args:
+            topk (tuple, optional): The criterion used to calculate the
+                accuracy. Defaults to (1,).
+            thresh (float, optional): If not None, predictions with scores
+                under this threshold are considered incorrect. Default to None.
+        """
+        super().__init__()
+        self.topk = topk
+        self.thresh = thresh
+
+    def forward(self, pred, target):
+        """Forward function to calculate accuracy.
+
+        Args:
+            pred (torch.Tensor): Prediction of models.
+            target (torch.Tensor): Target for each prediction.
+
+        Returns:
+            tuple[float]: The accuracies under different topk criterions.
+        """
+        return accuracy(pred, target, self.topk, self.thresh)