更换文档检测模型

2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions
--- a/paddle_detection/configs/solov2/README.md
+++ b/paddle_detection/configs/solov2/README.md
@@ -0,0 +1,52 @@
+# SOLOv2 for instance segmentation
+
+## Introduction
+
+SOLOv2 (Segmenting Objects by Locations) is a fast instance segmentation framework with strong performance. We reproduced the model of the paper, and improved and optimized the accuracy and speed of the SOLOv2.
+
+**Highlights:**
+
+- Training Time: The training time of the model of `solov2_r50_fpn_1x` on Tesla v100 with 8 GPU is only 10 hours.
+
+## Model Zoo
+
+| Detector  | Backbone                | Multi-scale training  | Lr schd |  Mask AP<sup>val</sup> |  V100 FP32(FPS) |    GPU  |    Download                  | Configs |
+| :-------: | :---------------------: | :-------------------: | :-----: | :--------------------: | :-------------: | :-----: | :---------: | :------------------------: |
+| YOLACT++  |  R50-FPN    | False      |  80w iter     |   34.1 (test-dev) |  33.5  | Xp |  -  |  -   |
+| CenterMask | R50-FPN | True        |   2x    |     36.4        |  13.9  | Xp |   -  |  -  |
+| CenterMask | V2-99-FPN | True        |   3x    |     40.2       |  8.9  | Xp |   -  |  -  |
+| PolarMask | R50-FPN | True        |   2x    |     30.5        |  9.4  | V100 |   -  |  -  |
+| BlendMask | R50-FPN | True        |   3x    |     37.8        |  13.5  | V100 |   -  |  -  |
+| SOLOv2 (Paper) | R50-FPN | False        |   1x    |     34.8        |  18.5  | V100 |   -  |  -  |
+| SOLOv2 (Paper) | X101-DCN-FPN | True        |   3x    |     42.4        |  5.9  | V100 |   -  |  -  |
+| SOLOv2 | R50-FPN                 |  False                |   1x    |    35.5         |  21.9     | V100 |  [model](https://paddledet.bj.bcebos.com/models/solov2_r50_fpn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/solov2/solov2_r50_fpn_1x_coco.yml) |
+| SOLOv2 | R50-FPN                 |  True                |   3x    |     38.0         |   21.9    | V100 |  [model](https://paddledet.bj.bcebos.com/models/solov2_r50_fpn_3x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/solov2/solov2_r50_fpn_3x_coco.yml) |
+| SOLOv2 | R101vd-FPN                 |  True                |   3x    |     42.7         |   12.1    | V100 |  [model](https://paddledet.bj.bcebos.com/models/solov2_r101_vd_fpn_3x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/solov2/solov2_r101_vd_fpn_3x_coco.yml) |
+
+**Notes:**
+
+- SOLOv2 is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
+
+## Enhanced model
+| Backbone                | Input size  | Lr schd | V100 FP32(FPS) | Mask AP<sup>val</sup> |         Download                  | Configs |
+| :---------------------: | :-------------------: | :-----: | :------------: | :-----: | :---------: | :------------------------: |
+| Light-R50-VD-DCN-FPN          |  512     |   3x    |     38.6          |  39.0   | [model](https://paddledet.bj.bcebos.com/models/solov2_r50_enhance_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/solov2/solov2_r50_enhance_coco.yml) |
+
+**Optimizing method of enhanced model:**
+- Better backbone network: ResNet50vd-DCN
+- A better pre-training model for knowledge distillation
+- [Exponential Moving Average](https://www.investopedia.com/terms/e/ema.asp)
+- Synchronized Batch Normalization
+- Multi-scale training
+- More data augmentation methods
+- DropBlock
+
+## Citations
+```
+@article{wang2020solov2,
+  title={SOLOv2: Dynamic, Faster and Stronger},
+  author={Wang, Xinlong and Zhang, Rufeng and  Kong, Tao and Li, Lei and Shen, Chunhua},
+  journal={arXiv preprint arXiv:2003.10152},
+  year={2020}
+}
+```
--- a/paddle_detection/configs/solov2/_base_/optimizer_1x.yml
+++ b/paddle_detection/configs/solov2/_base_/optimizer_1x.yml
@@ -0,0 +1,19 @@
+epoch: 12
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.
+    steps: 1000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
--- a/paddle_detection/configs/solov2/_base_/solov2_light_reader.yml
+++ b/paddle_detection/configs/solov2/_base_/solov2_light_reader.yml
@@ -0,0 +1,47 @@
+worker_num: 2
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - Poly2Mask: {}
+  - RandomDistort: {}
+  - RandomCrop: {}
+  - RandomResize: {interp: 1,
+                     target_size: [[352, 852], [384, 852], [416, 852], [448, 852], [480, 852], [512, 852]],
+                     keep_ratio: True}
+  - RandomFlip: {}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  - Gt2Solov2Target: {num_grids: [40, 36, 24, 16, 12],
+                        scale_ranges: [[1, 96], [48, 192], [96, 384], [192, 768], [384, 2048]],
+                        coord_sigma: 0.2}
+  batch_size: 2
+  shuffle: true
+  drop_last: true
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Resize: {interp: 1, target_size: [512, 852], keep_ratio: True}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Resize: {interp: 1, target_size: [512, 852], keep_ratio: True}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
--- a/paddle_detection/configs/solov2/_base_/solov2_r50_fpn.yml
+++ b/paddle_detection/configs/solov2/_base_/solov2_r50_fpn.yml
@@ -0,0 +1,40 @@
+architecture: SOLOv2
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+SOLOv2:
+  backbone: ResNet
+  neck: FPN
+  solov2_head: SOLOv2Head
+  mask_head: SOLOv2MaskHead
+
+ResNet:
+  depth: 50
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+
+FPN:
+  out_channel: 256
+
+SOLOv2Head:
+  seg_feat_channels: 512
+  stacked_convs: 4
+  num_grids: [40, 36, 24, 16, 12]
+  kernel_out_channels: 256
+  solov2_loss: SOLOv2Loss
+  mask_nms: MaskMatrixNMS
+
+SOLOv2MaskHead:
+  mid_channels: 128
+  out_channels: 256
+  start_level: 0
+  end_level: 3
+
+SOLOv2Loss:
+  ins_loss_weight: 3.0
+  focal_loss_gamma: 2.0
+  focal_loss_alpha: 0.25
+
+MaskMatrixNMS:
+  pre_nms_top_n: 500
+  post_nms_top_n: 100
--- a/paddle_detection/configs/solov2/_base_/solov2_reader.yml
+++ b/paddle_detection/configs/solov2/_base_/solov2_reader.yml
@@ -0,0 +1,43 @@
+worker_num: 8
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - Poly2Mask: {}
+  - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
+  - RandomFlip: {}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  - Gt2Solov2Target: {num_grids: [40, 36, 24, 16, 12],
+                        scale_ranges: [[1, 96], [48, 192], [96, 384], [192, 768], [384, 2048]],
+                        coord_sigma: 0.2}
+  batch_size: 2
+  shuffle: true
+  drop_last: true
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
--- a/paddle_detection/configs/solov2/solov2_r101_vd_fpn_3x_coco.yml
+++ b/paddle_detection/configs/solov2/solov2_r101_vd_fpn_3x_coco.yml
@@ -0,0 +1,66 @@
+_BASE_: [
+  '../datasets/coco_instance.yml',
+  '../runtime.yml',
+  '_base_/solov2_r50_fpn.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/solov2_reader.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
+weights: output/solov2_r101_vd_fpn_3x_coco/model_final
+epoch: 36
+use_ema: true
+ema_decay: 0.9998
+
+ResNet:
+  depth: 101
+  variant: d
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  dcn_v2_stages: [1,2,3]
+  num_stages: 4
+
+SOLOv2Head:
+  seg_feat_channels: 512
+  stacked_convs: 4
+  num_grids: [40, 36, 24, 16, 12]
+  kernel_out_channels: 256
+  solov2_loss: SOLOv2Loss
+  mask_nms: MaskMatrixNMS
+  dcn_v2_stages: [0, 1, 2, 3]
+
+SOLOv2MaskHead:
+  mid_channels: 128
+  out_channels: 256
+  start_level: 0
+  end_level: 3
+  use_dcn_in_tower: True
+
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [24, 33]
+  - !LinearWarmup
+    start_factor: 0.
+    steps: 2000
+
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - Poly2Mask: {}
+  - RandomResize: {interp: 1,
+                     target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]],
+                     keep_ratio: True}
+  - RandomFlip: {}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  - Gt2Solov2Target: {num_grids: [40, 36, 24, 16, 12],
+                        scale_ranges: [[1, 96], [48, 192], [96, 384], [192, 768], [384, 2048]],
+                        coord_sigma: 0.2}
+  batch_size: 2
+  shuffle: true
+  drop_last: true
--- a/paddle_detection/configs/solov2/solov2_r50_enhance_coco.yml
+++ b/paddle_detection/configs/solov2/solov2_r50_enhance_coco.yml
@@ -0,0 +1,50 @@
+_BASE_: [
+  '../datasets/coco_instance.yml',
+  '../runtime.yml',
+  '_base_/solov2_r50_fpn.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/solov2_light_reader.yml',
+]
+pretrain_weights:  https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
+weights: output/solov2_r50_fpn_3x_coco/model_final
+epoch: 36
+use_ema: true
+ema_decay: 0.9998
+
+ResNet:
+  depth: 50
+  variant: d
+  freeze_at: 0
+  freeze_norm: false
+  norm_type: sync_bn
+  return_idx: [0,1,2,3]
+  dcn_v2_stages: [1,2,3]
+  lr_mult_list: [0.05, 0.05, 0.1, 0.15]
+  num_stages: 4
+
+SOLOv2Head:
+  seg_feat_channels: 256
+  stacked_convs: 3
+  num_grids: [40, 36, 24, 16, 12]
+  kernel_out_channels: 128
+  solov2_loss: SOLOv2Loss
+  mask_nms: MaskMatrixNMS
+  dcn_v2_stages: [2]
+  drop_block: True
+
+SOLOv2MaskHead:
+  mid_channels: 128
+  out_channels: 128
+  start_level: 0
+  end_level: 3
+  use_dcn_in_tower: True
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [24, 33]
+  - !LinearWarmup
+    start_factor: 0.
+    steps: 1000
--- a/paddle_detection/configs/solov2/solov2_r50_fpn_1x_coco.yml
+++ b/paddle_detection/configs/solov2/solov2_r50_fpn_1x_coco.yml
@@ -0,0 +1,8 @@
+_BASE_: [
+  '../datasets/coco_instance.yml',
+  '../runtime.yml',
+  '_base_/solov2_r50_fpn.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/solov2_reader.yml',
+]
+weights: output/solov2_r50_fpn_1x_coco/model_final
--- a/paddle_detection/configs/solov2/solov2_r50_fpn_3x_coco.yml
+++ b/paddle_detection/configs/solov2/solov2_r50_fpn_3x_coco.yml
@@ -0,0 +1,38 @@
+_BASE_: [
+  '../datasets/coco_instance.yml',
+  '../runtime.yml',
+  '_base_/solov2_r50_fpn.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/solov2_reader.yml',
+]
+weights: output/solov2_r50_fpn_3x_coco/model_final
+epoch: 36
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [24, 33]
+  - !LinearWarmup
+    start_factor: 0.
+    steps: 1000
+
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - Poly2Mask: {}
+  - RandomResize: {interp: 1,
+                     target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]],
+                     keep_ratio: True}
+  - RandomFlip: {}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  - Gt2Solov2Target: {num_grids: [40, 36, 24, 16, 12],
+                        scale_ranges: [[1, 96], [48, 192], [96, 384], [192, 768], [384, 2048]],
+                        coord_sigma: 0.2}
+  batch_size: 2
+  shuffle: true
+  drop_last: true