更换文档检测模型

This commit is contained in:
2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions

View File

@@ -0,0 +1,52 @@
# SOLOv2 for instance segmentation
## Introduction
SOLOv2 (Segmenting Objects by Locations) is a fast instance segmentation framework with strong performance. We reproduced the model of the paper, and improved and optimized the accuracy and speed of the SOLOv2.
**Highlights:**
- Training Time: The training time of the model of `solov2_r50_fpn_1x` on Tesla v100 with 8 GPU is only 10 hours.
## Model Zoo
| Detector | Backbone | Multi-scale training | Lr schd | Mask AP<sup>val</sup> | V100 FP32(FPS) | GPU | Download | Configs |
| :-------: | :---------------------: | :-------------------: | :-----: | :--------------------: | :-------------: | :-----: | :---------: | :------------------------: |
| YOLACT++ | R50-FPN | False | 80w iter | 34.1 (test-dev) | 33.5 | Xp | - | - |
| CenterMask | R50-FPN | True | 2x | 36.4 | 13.9 | Xp | - | - |
| CenterMask | V2-99-FPN | True | 3x | 40.2 | 8.9 | Xp | - | - |
| PolarMask | R50-FPN | True | 2x | 30.5 | 9.4 | V100 | - | - |
| BlendMask | R50-FPN | True | 3x | 37.8 | 13.5 | V100 | - | - |
| SOLOv2 (Paper) | R50-FPN | False | 1x | 34.8 | 18.5 | V100 | - | - |
| SOLOv2 (Paper) | X101-DCN-FPN | True | 3x | 42.4 | 5.9 | V100 | - | - |
| SOLOv2 | R50-FPN | False | 1x | 35.5 | 21.9 | V100 | [model](https://paddledet.bj.bcebos.com/models/solov2_r50_fpn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/solov2/solov2_r50_fpn_1x_coco.yml) |
| SOLOv2 | R50-FPN | True | 3x | 38.0 | 21.9 | V100 | [model](https://paddledet.bj.bcebos.com/models/solov2_r50_fpn_3x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/solov2/solov2_r50_fpn_3x_coco.yml) |
| SOLOv2 | R101vd-FPN | True | 3x | 42.7 | 12.1 | V100 | [model](https://paddledet.bj.bcebos.com/models/solov2_r101_vd_fpn_3x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/solov2/solov2_r101_vd_fpn_3x_coco.yml) |
**Notes:**
- SOLOv2 is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
## Enhanced model
| Backbone | Input size | Lr schd | V100 FP32(FPS) | Mask AP<sup>val</sup> | Download | Configs |
| :---------------------: | :-------------------: | :-----: | :------------: | :-----: | :---------: | :------------------------: |
| Light-R50-VD-DCN-FPN | 512 | 3x | 38.6 | 39.0 | [model](https://paddledet.bj.bcebos.com/models/solov2_r50_enhance_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/solov2/solov2_r50_enhance_coco.yml) |
**Optimizing method of enhanced model:**
- Better backbone network: ResNet50vd-DCN
- A better pre-training model for knowledge distillation
- [Exponential Moving Average](https://www.investopedia.com/terms/e/ema.asp)
- Synchronized Batch Normalization
- Multi-scale training
- More data augmentation methods
- DropBlock
## Citations
```
@article{wang2020solov2,
title={SOLOv2: Dynamic, Faster and Stronger},
author={Wang, Xinlong and Zhang, Rufeng and Kong, Tao and Li, Lei and Shen, Chunhua},
journal={arXiv preprint arXiv:2003.10152},
year={2020}
}
```

View File

@@ -0,0 +1,19 @@
epoch: 12
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [8, 11]
- !LinearWarmup
start_factor: 0.
steps: 1000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2

View File

@@ -0,0 +1,47 @@
worker_num: 2
TrainReader:
sample_transforms:
- Decode: {}
- Poly2Mask: {}
- RandomDistort: {}
- RandomCrop: {}
- RandomResize: {interp: 1,
target_size: [[352, 852], [384, 852], [416, 852], [448, 852], [480, 852], [512, 852]],
keep_ratio: True}
- RandomFlip: {}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
- Gt2Solov2Target: {num_grids: [40, 36, 24, 16, 12],
scale_ranges: [[1, 96], [48, 192], [96, 384], [192, 768], [384, 2048]],
coord_sigma: 0.2}
batch_size: 2
shuffle: true
drop_last: true
EvalReader:
sample_transforms:
- Decode: {}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Resize: {interp: 1, target_size: [512, 852], keep_ratio: True}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: false
drop_last: false
TestReader:
sample_transforms:
- Decode: {}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Resize: {interp: 1, target_size: [512, 852], keep_ratio: True}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: false
drop_last: false

View File

@@ -0,0 +1,40 @@
architecture: SOLOv2
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
SOLOv2:
backbone: ResNet
neck: FPN
solov2_head: SOLOv2Head
mask_head: SOLOv2MaskHead
ResNet:
depth: 50
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
FPN:
out_channel: 256
SOLOv2Head:
seg_feat_channels: 512
stacked_convs: 4
num_grids: [40, 36, 24, 16, 12]
kernel_out_channels: 256
solov2_loss: SOLOv2Loss
mask_nms: MaskMatrixNMS
SOLOv2MaskHead:
mid_channels: 128
out_channels: 256
start_level: 0
end_level: 3
SOLOv2Loss:
ins_loss_weight: 3.0
focal_loss_gamma: 2.0
focal_loss_alpha: 0.25
MaskMatrixNMS:
pre_nms_top_n: 500
post_nms_top_n: 100

View File

@@ -0,0 +1,43 @@
worker_num: 8
TrainReader:
sample_transforms:
- Decode: {}
- Poly2Mask: {}
- Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- RandomFlip: {}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
- Gt2Solov2Target: {num_grids: [40, 36, 24, 16, 12],
scale_ranges: [[1, 96], [48, 192], [96, 384], [192, 768], [384, 2048]],
coord_sigma: 0.2}
batch_size: 2
shuffle: true
drop_last: true
EvalReader:
sample_transforms:
- Decode: {}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: false
drop_last: false
TestReader:
sample_transforms:
- Decode: {}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: false
drop_last: false

View File

@@ -0,0 +1,66 @@
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'_base_/solov2_r50_fpn.yml',
'_base_/optimizer_1x.yml',
'_base_/solov2_reader.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
weights: output/solov2_r101_vd_fpn_3x_coco/model_final
epoch: 36
use_ema: true
ema_decay: 0.9998
ResNet:
depth: 101
variant: d
freeze_at: 0
return_idx: [0,1,2,3]
dcn_v2_stages: [1,2,3]
num_stages: 4
SOLOv2Head:
seg_feat_channels: 512
stacked_convs: 4
num_grids: [40, 36, 24, 16, 12]
kernel_out_channels: 256
solov2_loss: SOLOv2Loss
mask_nms: MaskMatrixNMS
dcn_v2_stages: [0, 1, 2, 3]
SOLOv2MaskHead:
mid_channels: 128
out_channels: 256
start_level: 0
end_level: 3
use_dcn_in_tower: True
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [24, 33]
- !LinearWarmup
start_factor: 0.
steps: 2000
TrainReader:
sample_transforms:
- Decode: {}
- Poly2Mask: {}
- RandomResize: {interp: 1,
target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]],
keep_ratio: True}
- RandomFlip: {}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
- Gt2Solov2Target: {num_grids: [40, 36, 24, 16, 12],
scale_ranges: [[1, 96], [48, 192], [96, 384], [192, 768], [384, 2048]],
coord_sigma: 0.2}
batch_size: 2
shuffle: true
drop_last: true

View File

@@ -0,0 +1,50 @@
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'_base_/solov2_r50_fpn.yml',
'_base_/optimizer_1x.yml',
'_base_/solov2_light_reader.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
weights: output/solov2_r50_fpn_3x_coco/model_final
epoch: 36
use_ema: true
ema_decay: 0.9998
ResNet:
depth: 50
variant: d
freeze_at: 0
freeze_norm: false
norm_type: sync_bn
return_idx: [0,1,2,3]
dcn_v2_stages: [1,2,3]
lr_mult_list: [0.05, 0.05, 0.1, 0.15]
num_stages: 4
SOLOv2Head:
seg_feat_channels: 256
stacked_convs: 3
num_grids: [40, 36, 24, 16, 12]
kernel_out_channels: 128
solov2_loss: SOLOv2Loss
mask_nms: MaskMatrixNMS
dcn_v2_stages: [2]
drop_block: True
SOLOv2MaskHead:
mid_channels: 128
out_channels: 128
start_level: 0
end_level: 3
use_dcn_in_tower: True
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [24, 33]
- !LinearWarmup
start_factor: 0.
steps: 1000

View File

@@ -0,0 +1,8 @@
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'_base_/solov2_r50_fpn.yml',
'_base_/optimizer_1x.yml',
'_base_/solov2_reader.yml',
]
weights: output/solov2_r50_fpn_1x_coco/model_final

View File

@@ -0,0 +1,38 @@
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'_base_/solov2_r50_fpn.yml',
'_base_/optimizer_1x.yml',
'_base_/solov2_reader.yml',
]
weights: output/solov2_r50_fpn_3x_coco/model_final
epoch: 36
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [24, 33]
- !LinearWarmup
start_factor: 0.
steps: 1000
TrainReader:
sample_transforms:
- Decode: {}
- Poly2Mask: {}
- RandomResize: {interp: 1,
target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]],
keep_ratio: True}
- RandomFlip: {}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
- Gt2Solov2Target: {num_grids: [40, 36, 24, 16, 12],
scale_ranges: [[1, 96], [48, 192], [96, 384], [192, 768], [384, 2048]],
coord_sigma: 0.2}
batch_size: 2
shuffle: true
drop_last: true