更换文档检测模型

2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions
--- a/paddle_detection/configs/retinanet/README.md
+++ b/paddle_detection/configs/retinanet/README.md
@@ -0,0 +1,28 @@
+# RetinaNet (Focal Loss for Dense Object Detection)
+
+## Model Zoo
+
+| Backbone     | Model     | imgs/GPU | lr schedule | FPS | Box AP | download   | config      |
+| ------------ | --------- | -------- | ----------- | --- | ------ | ---------- | ----------- |
+| ResNet50-FPN | RetinaNet | 2        | 1x          | --- | 37.5   | [model](https://bj.bcebos.com/v1/paddledet/models/retinanet_r50_fpn_1x_coco.pdparams) | [config](./retinanet_r50_fpn_1x_coco.yml) |
+| ResNet50-FPN | RetinaNet | 2        | 2x          | --- | 39.1   | [model](https://bj.bcebos.com/v1/paddledet/models/retinanet_r50_fpn_2x_coco.pdparams) | [config](./retinanet_r50_fpn_2x_coco.yml) |
+| ResNet101-FPN| RetinaNet | 2        | 2x          | --- | 40.6   | [model](https://paddledet.bj.bcebos.com/models/retinanet_r101_fpn_2x_coco.pdparams) | [config](./retinanet_r101_fpn_2x_coco.yml)  |
+| ResNet50-FPN | RetinaNet + [FGD](../slim/distill/README.md) | 2        | 2x          | --- | 40.8    | [model](https://bj.bcebos.com/v1/paddledet/models/retinanet_r101_distill_r50_2x_coco.pdparams) | [config](./retinanet_r50_fpn_2x_coco.yml)/[slim_config](../slim/distill/retinanet_resnet101_coco_distill.yml) |
+
+
+**Notes:**
+
+- The ResNet50-FPN are trained on COCO train2017 with 8 GPUs. Both ResNet101-FPN and ResNet50-FPN with [FGD](../slim/distill/README.md) are trained on COCO train2017 with 4 GPUs.
+- All above models are evaluated on val2017. Box AP=`mAP(IoU=0.5:0.95)`.
+
+
+## Citation
+
+```latex
+@inproceedings{lin2017focal,
+  title={Focal loss for dense object detection},
+  author={Lin, Tsung-Yi and Goyal, Priya and Girshick, Ross and He, Kaiming and Doll{\'a}r, Piotr},
+  booktitle={Proceedings of the IEEE international conference on computer vision},
+  year={2017}
+}
+```
--- a/paddle_detection/configs/retinanet/_base_/optimizer_1x.yml
+++ b/paddle_detection/configs/retinanet/_base_/optimizer_1x.yml
@@ -0,0 +1,19 @@
+epoch: 12
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.001
+    steps: 500
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
--- a/paddle_detection/configs/retinanet/_base_/optimizer_2x.yml
+++ b/paddle_detection/configs/retinanet/_base_/optimizer_2x.yml
@@ -0,0 +1,19 @@
+epoch: 24
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [16, 22]
+  - !LinearWarmup
+    start_factor: 0.001
+    steps: 500
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
--- a/paddle_detection/configs/retinanet/_base_/retinanet_r50_fpn.yml
+++ b/paddle_detection/configs/retinanet/_base_/retinanet_r50_fpn.yml
@@ -0,0 +1,57 @@
+architecture: RetinaNet
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+RetinaNet:
+  backbone: ResNet
+  neck: FPN
+  head: RetinaHead
+
+ResNet:
+  depth: 50
+  variant: b
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [1,2,3]
+  num_stages: 4
+
+FPN:
+  out_channel: 256
+  spatial_scales: [0.125, 0.0625, 0.03125]
+  extra_stage: 2
+  has_extra_convs: true
+  use_c5: false
+
+RetinaHead:
+  conv_feat:
+    name: RetinaFeat
+    feat_in: 256
+    feat_out: 256
+    num_convs: 4
+    norm_type: null
+    use_dcn: false
+  anchor_generator:
+    name: RetinaAnchorGenerator
+    octave_base_scale: 4
+    scales_per_octave: 3
+    aspect_ratios: [0.5, 1.0, 2.0]
+    strides: [8.0, 16.0, 32.0, 64.0, 128.0]
+  bbox_assigner:
+    name: MaxIoUAssigner
+    positive_overlap: 0.5
+    negative_overlap: 0.4
+    allow_low_quality: true
+  loss_class:
+    name: FocalLoss
+    gamma: 2.0
+    alpha: 0.25
+    loss_weight: 1.0
+  loss_bbox:
+    name: SmoothL1Loss
+    beta: 0.0
+    loss_weight: 1.0
+  nms:
+    name: MultiClassNMS
+    nms_top_k: 1000
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5
--- a/paddle_detection/configs/retinanet/_base_/retinanet_reader.yml
+++ b/paddle_detection/configs/retinanet/_base_/retinanet_reader.yml
@@ -0,0 +1,36 @@
+worker_num: 2
+TrainReader:
+  sample_transforms:
+    - Decode: {}
+    - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: True, interp: 1}
+    - RandomFlip: {}
+    - NormalizeImage: {is_scale: True, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
+    - Permute: {}
+  batch_transforms:
+    - PadBatch: {pad_to_stride: 32}
+  batch_size: 2
+  shuffle: True
+  drop_last: True
+  collate_batch: False
+
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [800, 1333], keep_ratio: True, interp: 1}
+    - NormalizeImage: {is_scale: True, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
+    - Permute: {}
+  batch_transforms:
+    - PadBatch: {pad_to_stride: 32}
+  batch_size: 8
+
+
+TestReader:
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [800, 1333], keep_ratio: True, interp: 1}
+    - NormalizeImage: {is_scale: True, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
+    - Permute: {}
+  batch_transforms:
+    - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
--- a/paddle_detection/configs/retinanet/retinanet_r101_distill_r50_2x_coco.yml
+++ b/paddle_detection/configs/retinanet/retinanet_r101_distill_r50_2x_coco.yml
@@ -0,0 +1,9 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/retinanet_r50_fpn.yml',
+  '_base_/optimizer_2x.yml',
+  '_base_/retinanet_reader.yml'
+]
+
+weights: https://paddledet.bj.bcebos.com/models/retinanet_r101_distill_r50_2x_coco.pdparams
--- a/paddle_detection/configs/retinanet/retinanet_r101_fpn_2x_coco.yml
+++ b/paddle_detection/configs/retinanet/retinanet_r101_fpn_2x_coco.yml
@@ -0,0 +1,18 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/retinanet_r50_fpn.yml',
+  '_base_/optimizer_2x.yml',
+  '_base_/retinanet_reader.yml'
+]
+
+weights: output/retinanet_r101_fpn_2x_coco/model_final
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
+
+ResNet:
+  depth: 101
+  variant: b
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [1, 2, 3]
+  num_stages: 4
--- a/paddle_detection/configs/retinanet/retinanet_r50_fpn_1x_coco.yml
+++ b/paddle_detection/configs/retinanet/retinanet_r50_fpn_1x_coco.yml
@@ -0,0 +1,9 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/retinanet_r50_fpn.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/retinanet_reader.yml'
+]
+
+weights: output/retinanet_r50_fpn_1x_coco/model_final
--- a/paddle_detection/configs/retinanet/retinanet_r50_fpn_2x_coco.yml
+++ b/paddle_detection/configs/retinanet/retinanet_r50_fpn_2x_coco.yml
@@ -0,0 +1,9 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/retinanet_r50_fpn.yml',
+  '_base_/optimizer_2x.yml',
+  '_base_/retinanet_reader.yml'
+]
+
+weights: output/retinanet_r50_fpn_2x_coco/model_final