更换文档检测模型

This commit is contained in:
2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions

View File

@@ -0,0 +1,28 @@
# RetinaNet (Focal Loss for Dense Object Detection)
## Model Zoo
| Backbone | Model | imgs/GPU | lr schedule | FPS | Box AP | download | config |
| ------------ | --------- | -------- | ----------- | --- | ------ | ---------- | ----------- |
| ResNet50-FPN | RetinaNet | 2 | 1x | --- | 37.5 | [model](https://bj.bcebos.com/v1/paddledet/models/retinanet_r50_fpn_1x_coco.pdparams) | [config](./retinanet_r50_fpn_1x_coco.yml) |
| ResNet50-FPN | RetinaNet | 2 | 2x | --- | 39.1 | [model](https://bj.bcebos.com/v1/paddledet/models/retinanet_r50_fpn_2x_coco.pdparams) | [config](./retinanet_r50_fpn_2x_coco.yml) |
| ResNet101-FPN| RetinaNet | 2 | 2x | --- | 40.6 | [model](https://paddledet.bj.bcebos.com/models/retinanet_r101_fpn_2x_coco.pdparams) | [config](./retinanet_r101_fpn_2x_coco.yml) |
| ResNet50-FPN | RetinaNet + [FGD](../slim/distill/README.md) | 2 | 2x | --- | 40.8 | [model](https://bj.bcebos.com/v1/paddledet/models/retinanet_r101_distill_r50_2x_coco.pdparams) | [config](./retinanet_r50_fpn_2x_coco.yml)/[slim_config](../slim/distill/retinanet_resnet101_coco_distill.yml) |
**Notes:**
- The ResNet50-FPN are trained on COCO train2017 with 8 GPUs. Both ResNet101-FPN and ResNet50-FPN with [FGD](../slim/distill/README.md) are trained on COCO train2017 with 4 GPUs.
- All above models are evaluated on val2017. Box AP=`mAP(IoU=0.5:0.95)`.
## Citation
```latex
@inproceedings{lin2017focal,
title={Focal loss for dense object detection},
author={Lin, Tsung-Yi and Goyal, Priya and Girshick, Ross and He, Kaiming and Doll{\'a}r, Piotr},
booktitle={Proceedings of the IEEE international conference on computer vision},
year={2017}
}
```

View File

@@ -0,0 +1,19 @@
epoch: 12
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [8, 11]
- !LinearWarmup
start_factor: 0.001
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2

View File

@@ -0,0 +1,19 @@
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [16, 22]
- !LinearWarmup
start_factor: 0.001
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2

View File

@@ -0,0 +1,57 @@
architecture: RetinaNet
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
RetinaNet:
backbone: ResNet
neck: FPN
head: RetinaHead
ResNet:
depth: 50
variant: b
norm_type: bn
freeze_at: 0
return_idx: [1,2,3]
num_stages: 4
FPN:
out_channel: 256
spatial_scales: [0.125, 0.0625, 0.03125]
extra_stage: 2
has_extra_convs: true
use_c5: false
RetinaHead:
conv_feat:
name: RetinaFeat
feat_in: 256
feat_out: 256
num_convs: 4
norm_type: null
use_dcn: false
anchor_generator:
name: RetinaAnchorGenerator
octave_base_scale: 4
scales_per_octave: 3
aspect_ratios: [0.5, 1.0, 2.0]
strides: [8.0, 16.0, 32.0, 64.0, 128.0]
bbox_assigner:
name: MaxIoUAssigner
positive_overlap: 0.5
negative_overlap: 0.4
allow_low_quality: true
loss_class:
name: FocalLoss
gamma: 2.0
alpha: 0.25
loss_weight: 1.0
loss_bbox:
name: SmoothL1Loss
beta: 0.0
loss_weight: 1.0
nms:
name: MultiClassNMS
nms_top_k: 1000
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5

View File

@@ -0,0 +1,36 @@
worker_num: 2
TrainReader:
sample_transforms:
- Decode: {}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: True, interp: 1}
- RandomFlip: {}
- NormalizeImage: {is_scale: True, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 2
shuffle: True
drop_last: True
collate_batch: False
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [800, 1333], keep_ratio: True, interp: 1}
- NormalizeImage: {is_scale: True, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 8
TestReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [800, 1333], keep_ratio: True, interp: 1}
- NormalizeImage: {is_scale: True, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 1

View File

@@ -0,0 +1,9 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/retinanet_r50_fpn.yml',
'_base_/optimizer_2x.yml',
'_base_/retinanet_reader.yml'
]
weights: https://paddledet.bj.bcebos.com/models/retinanet_r101_distill_r50_2x_coco.pdparams

View File

@@ -0,0 +1,18 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/retinanet_r50_fpn.yml',
'_base_/optimizer_2x.yml',
'_base_/retinanet_reader.yml'
]
weights: output/retinanet_r101_fpn_2x_coco/model_final
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
ResNet:
depth: 101
variant: b
norm_type: bn
freeze_at: 0
return_idx: [1, 2, 3]
num_stages: 4

View File

@@ -0,0 +1,9 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/retinanet_r50_fpn.yml',
'_base_/optimizer_1x.yml',
'_base_/retinanet_reader.yml'
]
weights: output/retinanet_r50_fpn_1x_coco/model_final

View File

@@ -0,0 +1,9 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/retinanet_r50_fpn.yml',
'_base_/optimizer_2x.yml',
'_base_/retinanet_reader.yml'
]
weights: output/retinanet_r50_fpn_2x_coco/model_final