更换文档检测模型

2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions
--- a/paddle_detection/configs/sparse_rcnn/README.md
+++ b/paddle_detection/configs/sparse_rcnn/README.md
@@ -0,0 +1,25 @@
+# Sparse R-CNN: End-to-End Object Detection with Learnable Proposals
+
+
+## Introduction
+Sparse RCNN is a purely sparse method for object detection in images.
+
+
+## Model Zoo
+
+| Backbone        | Proposals | lr schedule | Box AP | download   | config |
+| :-------------- | :-----: | :------------: | :-----: | :-----: | :-----: |
+| ResNet50-FPN | 100 | 3x |  43.0  | [download](https://paddledet.bj.bcebos.com/models/sparse_rcnn_r50_fpn_3x_pro100_coco.pdparams) | [config](./sparse_rcnn_r50_fpn_3x_pro100_coco.yml) |
+| ResNet50-FPN | 300 | 3x |  44.6  | [download](https://paddledet.bj.bcebos.com/models/sparse_rcnn_r50_fpn_3x_pro300_coco.pdparams) | [config](./sparse_rcnn_r50_fpn_3x_pro300_coco.yml) |
+
+## Citations
+```
+@misc{sun2021sparse,
+      title={Sparse R-CNN: End-to-End Object Detection with Learnable Proposals},
+      author={Peize Sun and Rufeng Zhang and Yi Jiang and Tao Kong and Chenfeng Xu and Wei Zhan and Masayoshi Tomizuka and Lei Li and Zehuan Yuan and Changhu Wang and Ping Luo},
+      year={2021},
+      eprint={2011.12450},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV}
+}
+```
--- a/paddle_detection/configs/sparse_rcnn/_base_/optimizer_3x.yml
+++ b/paddle_detection/configs/sparse_rcnn/_base_/optimizer_3x.yml
@@ -0,0 +1,17 @@
+epoch: 36
+
+LearningRate:
+  base_lr: 0.000025
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [28, 34]
+  - !LinearWarmup
+    start_factor: 0.01
+    steps: 1000
+
+OptimizerBuilder:
+  clip_grad_by_norm: 1.0
+  optimizer:
+    type: AdamW
+    weight_decay: 0.0001
--- a/paddle_detection/configs/sparse_rcnn/_base_/sparse_rcnn_r50_fpn.yml
+++ b/paddle_detection/configs/sparse_rcnn/_base_/sparse_rcnn_r50_fpn.yml
@@ -0,0 +1,44 @@
+architecture: SparseRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+SparseRCNN:
+  backbone: ResNet
+  neck: FPN
+  head: SparseRCNNHead
+  postprocess: SparsePostProcess
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+
+FPN:
+  out_channel: 256
+
+SparseRCNNHead:
+  head_hidden_dim: 256
+  head_dim_feedforward: 2048
+  nhead: 8
+  head_dropout: 0.0
+  head_cls: 1
+  head_reg: 3
+  head_dim_dynamic: 64
+  head_num_dynamic: 2
+  head_num_heads: 6
+  deep_supervision: true
+  num_proposals: 100
+  loss_func: SparseRCNNLoss
+
+SparseRCNNLoss:
+  losses: ["labels", "boxes"]
+  focal_loss_alpha: 0.25
+  focal_loss_gamma: 2.0
+  class_weight: 2.0
+  l1_weight: 5.0
+  giou_weight: 2.0
+
+SparsePostProcess:
+  num_proposals: 100
--- a/paddle_detection/configs/sparse_rcnn/_base_/sparse_rcnn_reader.yml
+++ b/paddle_detection/configs/sparse_rcnn/_base_/sparse_rcnn_reader.yml
@@ -0,0 +1,41 @@
+worker_num: 4
+
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[480, 1333], [512, 1333], [544, 1333], [576, 1333], [608, 1333], [640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: true, interp: 1}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  - Gt2SparseTarget: {use_padding_shape: True}
+  batch_size: 4
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  - Gt2SparseTarget: {use_padding_shape: True}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  - Gt2SparseTarget: {use_padding_shape: True}
+  batch_size: 1
+  shuffle: false
--- a/paddle_detection/configs/sparse_rcnn/sparse_rcnn_r50_fpn_3x_pro100_coco.yml
+++ b/paddle_detection/configs/sparse_rcnn/sparse_rcnn_r50_fpn_3x_pro100_coco.yml
@@ -0,0 +1,10 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/sparse_rcnn_r50_fpn.yml',
+  '_base_/optimizer_3x.yml',
+  '_base_/sparse_rcnn_reader.yml',
+]
+
+num_classes: 80
+weights: output/sparse_rcnn_r50_fpn_3x_pro100_coco/model_final
--- a/paddle_detection/configs/sparse_rcnn/sparse_rcnn_r50_fpn_3x_pro300_coco.yml
+++ b/paddle_detection/configs/sparse_rcnn/sparse_rcnn_r50_fpn_3x_pro300_coco.yml
@@ -0,0 +1,19 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/sparse_rcnn_r50_fpn.yml',
+  '_base_/optimizer_3x.yml',
+  '_base_/sparse_rcnn_reader.yml',
+]
+
+num_classes: 80
+weights: output/sparse_rcnn_r50_fpn_3x_pro300_coco/model_final
+
+snapshot_epoch: 1
+
+
+SparseRCNNHead:
+  num_proposals: 300
+
+SparsePostProcess:
+  num_proposals: 300