更换文档检测模型

2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions
--- a/paddle_detection/configs/few-shot/README.md
+++ b/paddle_detection/configs/few-shot/README.md
@@ -0,0 +1,76 @@
+# Co-tuning for Transfer Learning <br />Supervised Contrastive Learning
+
+## Data preparation
+以[Kaggle数据集](https://www.kaggle.com/andrewmvd/road-sign-detection) 比赛数据为例，说明如何准备自定义数据。
+Kaggle上的 [road-sign-detection](https://www.kaggle.com/andrewmvd/road-sign-detection) 比赛数据包含877张图像，数据类别4类：crosswalk，speedlimit，stop，trafficlight。
+可从Kaggle上下载，也可以从[下载链接](https://fsdet-dataset.bj.bcebos.com/roadsign_coco.tar.gz) 下载。
+分别从原始数据集中每类选取相同样本（例如：10shots即每类都有十个训练样本）训练即可。<br />
+工业数据集使用PKU-Market-PCB，该数据集用于印刷电路板（PCB）的瑕疵检测，提供了6种常见的PCB缺陷[下载链接](https://fsdet-dataset.bj.bcebos.com/pcb.tar.gz)
+
+
+## Model Zoo
+| 骨架网络             | 网络类型       | 每张GPU图片个数 | 每类样本个数 | Box AP |                           下载                          | 配置文件 |
+| :------------------- | :------------- | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
+| ResNet50-vd             | Faster         |    1    |     10     |  60.1  |  [下载链接](https://bj.bcebos.com/v1/paddledet/models/faster_rcnn_r50_vd_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_1x_coco_cotuning_roadsign.yml) |
+| PPYOLOE_crn_s             | PPYOLOE         |    1    |     30    |  17.8  | [下载链接](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_s_80e_contrast_pcb.pdparams) |[配置文件](./ppyoloe_plus_crn_s_80e_contrast_pcb.yml) |
+
+## Compare-cotuning
+| 骨架网络             | 网络类型       | 每张GPU图片个数 |每类样本个数 | Cotuning |  Box AP  |
+| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: |
+| ResNet50-vd             | Faster         |    1    |     10     |  False  |  56.7  |
+| ResNet50-vd             | Faster         |    1    |     10     |  True  |  60.1 |
+
+## Compare-contrast
+| 骨架网络             | 网络类型       | 每张GPU图片个数 | 每类样本个数 | Contrast |  Box AP  |
+| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: |
+| PPYOLOE_crn_s             | PPYOLOE         |    1    |     30    |  False  |  15.4  |
+| PPYOLOE_crn_s             | PPYOLOE         |    1    |     30     |  True  |  17.8 |
+
+## Training & Evaluation & Inference
+### 1、Training
+
+```
+# -c 参数表示指定使用哪个配置文件
+# --eval 参数表示边训练边评估，训练过程中会保存验证效果最佳的checkpoint
+
+python tools/train.py -c configs/few-shot/faster_rcnn_r50_vd_fpn_1x_coco_cotuning_roadsign.yml --eval
+```
+### 2、Evaluation
+```
+# -c 参数表示指定使用哪个配置文件
+# -o 参数表示指定配置文件中的全局变量（覆盖配置文件中的设置）
+
+python tools/eval.py -c configs/few-shot/faster_rcnn_r50_vd_fpn_1x_coco_cotuning_roadsign.yml  \ 
+                     -o weights=output/faster_rcnn_r50_vd_fpn_1x_coco_cotuning_roadsign/best_model
+```
+
+
+### 3、Inference
+```
+# -c 参数表示指定使用哪个配置文件
+# --infer_img 参数指定预测图像路径
+
+python tools/infer.py -c configs/few-shot/faster_rcnn_r50_vd_fpn_1x_coco_cotuning_roadsign.yml  \
+                       --infer_img=demo/road554.png
+```
+
+## Citations
+```
+@article{you2020co,
+  title={Co-tuning for transfer learning},
+  author={You, Kaichao and Kou, Zhi and Long, Mingsheng and Wang, Jianmin},
+  journal={Advances in Neural Information Processing Systems},
+  volume={33},
+  pages={17236--17246},
+  year={2020}
+}
+
+@article{khosla2020supervised,
+  title={Supervised contrastive learning},
+  author={Khosla, Prannay and Teterwak, Piotr and Wang, Chen and Sarna, Aaron and Tian, Yonglong and Isola, Phillip and Maschinot, Aaron and Liu, Ce and Krishnan, Dilip},
+  journal={Advances in Neural Information Processing Systems},
+  volume={33},
+  pages={18661--18673},
+  year={2020}
+}
+```
--- a/paddle_detection/configs/few-shot/_base_/faster_fpn_reader.yml
+++ b/paddle_detection/configs/few-shot/_base_/faster_fpn_reader.yml
@@ -0,0 +1,40 @@
+worker_num: 2
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
--- a/paddle_detection/configs/few-shot/_base_/faster_rcnn_r50.yml
+++ b/paddle_detection/configs/few-shot/_base_/faster_rcnn_r50.yml
@@ -0,0 +1,66 @@
+architecture: FasterRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+FasterRCNN:
+  backbone: ResNet
+  rpn_head: RPNHead
+  bbox_head: BBoxHead
+  # post process
+  bbox_post_process: BBoxPostProcess
+
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [2]
+  num_stages: 3
+
+RPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [32, 64, 128, 256, 512]
+    strides: [16]
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 12000
+    post_nms_top_n: 2000
+    topk_after_collect: False
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 6000
+    post_nms_top_n: 1000
+
+
+BBoxHead:
+  head: Res5Head
+  roi_extractor:
+    resolution: 14
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+  with_pool: true
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bg_thresh: 0.5
+  fg_thresh: 0.5
+  fg_fraction: 0.25
+  use_random: True
+
+BBoxPostProcess:
+  decode: RCNNBox
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5
--- a/paddle_detection/configs/few-shot/_base_/faster_rcnn_r50_fpn.yml
+++ b/paddle_detection/configs/few-shot/_base_/faster_rcnn_r50_fpn.yml
@@ -0,0 +1,73 @@
+architecture: FasterRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+FasterRCNN:
+  backbone: ResNet
+  neck: FPN
+  rpn_head: RPNHead
+  bbox_head: BBoxHead
+  # post process
+  bbox_post_process: BBoxPostProcess
+
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+
+FPN:
+  out_channel: 256
+
+RPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [[32], [64], [128], [256], [512]]
+    strides: [4, 8, 16, 32, 64]
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 1000
+    topk_after_collect: True
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+
+BBoxHead:
+  head: TwoFCHead
+  roi_extractor:
+    resolution: 7
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bg_thresh: 0.5
+  fg_thresh: 0.5
+  fg_fraction: 0.25
+  use_random: True
+
+TwoFCHead:
+  out_channel: 1024
+
+
+BBoxPostProcess:
+  decode: RCNNBox
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5
--- a/paddle_detection/configs/few-shot/_base_/faster_reader.yml
+++ b/paddle_detection/configs/few-shot/_base_/faster_reader.yml
@@ -0,0 +1,40 @@
+worker_num: 2
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: -1}
+  batch_size: 1
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: -1}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: -1}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
--- a/paddle_detection/configs/few-shot/_base_/optimizer_1x.yml
+++ b/paddle_detection/configs/few-shot/_base_/optimizer_1x.yml
@@ -0,0 +1,19 @@
+epoch: 12
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
--- a/paddle_detection/configs/few-shot/_base_/optimizer_80e.yml
+++ b/paddle_detection/configs/few-shot/_base_/optimizer_80e.yml
@@ -0,0 +1,18 @@
+epoch: 80
+
+LearningRate:
+  base_lr: 0.001
+  schedulers:
+    - !CosineDecay
+      max_epochs: 96
+    - !LinearWarmup
+      start_factor: 0.
+      epochs: 5
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
--- a/paddle_detection/configs/few-shot/_base_/ppyoloe_plus_crn.yml
+++ b/paddle_detection/configs/few-shot/_base_/ppyoloe_plus_crn.yml
@@ -0,0 +1,49 @@
+architecture: YOLOv3
+norm_type: sync_bn
+use_ema: true
+use_cot: False
+ema_decay: 0.9998
+ema_black_list: ['proj_conv.weight']
+custom_black_list: ['reduce_mean']
+
+YOLOv3:
+  backbone: CSPResNet
+  neck: CustomCSPPAN
+  yolo_head: PPYOLOEHead
+  post_process: ~
+
+CSPResNet:
+  layers: [3, 6, 6, 3]
+  channels: [64, 128, 256, 512, 1024]
+  return_idx: [1, 2, 3]
+  use_large_stem: True
+  use_alpha: True
+
+CustomCSPPAN:
+  out_channels: [768, 384, 192]
+  stage_num: 1
+  block_num: 3
+  act: 'swish'
+  spp: true
+
+PPYOLOEHead:
+  fpn_strides: [32, 16, 8]
+  grid_cell_scale: 5.0
+  grid_cell_offset: 0.5
+  static_assigner_epoch: 30
+  use_varifocal_loss: True
+  loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
+  static_assigner:
+    name: ATSSAssigner
+    topk: 9
+  assigner:
+    name: TaskAlignedAssigner
+    topk: 13
+    alpha: 1.0
+    beta: 6.0
+  nms:
+    name: MultiClassNMS
+    nms_top_k: 1000
+    keep_top_k: 300
+    score_threshold: 0.01
+    nms_threshold: 0.7
--- a/paddle_detection/configs/few-shot/_base_/ppyoloe_plus_reader.yml
+++ b/paddle_detection/configs/few-shot/_base_/ppyoloe_plus_reader.yml
@@ -0,0 +1,40 @@
+worker_num: 4
+eval_height: &eval_height 640
+eval_width: &eval_width 640
+eval_size: &eval_size [*eval_height, *eval_width]
+
+TrainReader:
+  sample_transforms:
+    - Decode: {}
+    - RandomDistort: {}
+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+    - RandomCrop: {}
+    - RandomFlip: {}
+  batch_transforms:
+    - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768], random_size: True, random_interp: True, keep_ratio: False}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+    - PadGT: {}
+  batch_size: 8
+  shuffle: true
+  drop_last: true
+  use_shared_memory: true
+  collate_batch: true
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+  batch_size: 2
+
+TestReader:
+  inputs_def:
+    image_shape: [3, *eval_height, *eval_width]
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
+    - Permute: {}
+  batch_size: 1
--- a/paddle_detection/configs/few-shot/faster_rcnn_r50_vd_fpn_1x_coco_cotuning_roadsign.yml
+++ b/paddle_detection/configs/few-shot/faster_rcnn_r50_vd_fpn_1x_coco_cotuning_roadsign.yml
@@ -0,0 +1,67 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/faster_rcnn_r50_fpn.yml',
+  '_base_/faster_fpn_reader.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_1x_coco.pdparams
+weights: output/faster_rcnn_r50_vd_fpn_1x_coco_cotuning_roadsign/model_final
+
+snapshot_epoch: 5
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+
+epoch: 30
+LearningRate:
+  base_lr: 0.001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1 
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.1 
+    steps: 1000
+
+use_cot: True
+BBoxHead:
+  head: TwoFCHead
+  roi_extractor:
+    resolution: 7
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+  cot_classes: 80
+  loss_cot: 
+    name: COTLoss
+    cot_lambda: 1
+    cot_scale: 1
+
+num_classes: 4
+metric: COCO
+map_type: integral
+
+TrainDataset:
+  !COCODataSet
+    image_dir: images
+    anno_path: annotations/train_shots10.json
+    dataset_dir: dataset/roadsign_coco
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  !COCODataSet
+    image_dir: images
+    anno_path: annotations/roadsign_valid.json
+    dataset_dir: dataset/roadsign_coco
+
+TestDataset:
+  !ImageFolder
+    anno_path: annotations/roadsign_valid.json
+    dataset_dir: dataset/roadsign_coco
--- a/paddle_detection/configs/few-shot/ppyoloe_plus_crn_s_80e_contrast_pcb.yml
+++ b/paddle_detection/configs/few-shot/ppyoloe_plus_crn_s_80e_contrast_pcb.yml
@@ -0,0 +1,81 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  './_base_/optimizer_80e.yml',
+  './_base_/ppyoloe_plus_crn.yml',
+  './_base_/ppyoloe_plus_reader.yml',
+]
+
+log_iter: 100
+snapshot_epoch: 10
+weights: output/ppyoloe_plus_crn_s_80e_contrast_pcb/model_final
+
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_s_obj365_pretrained.pdparams
+depth_mult: 0.33
+width_mult: 0.50
+
+epoch: 80
+
+LearningRate:
+  base_lr: 0.0001
+  schedulers:
+    - !CosineDecay
+      max_epochs: 96
+    - !LinearWarmup
+      start_factor: 0.
+      epochs: 5
+
+YOLOv3:
+  backbone: CSPResNet
+  neck: CustomCSPPAN
+  yolo_head: PPYOLOEContrastHead
+  post_process: ~
+
+PPYOLOEContrastHead:
+  fpn_strides: [32, 16, 8]
+  grid_cell_scale: 5.0
+  grid_cell_offset: 0.5
+  static_assigner_epoch: 100
+  use_varifocal_loss: True
+  loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5, contrast: 0.2}
+  static_assigner:
+    name: ATSSAssigner
+    topk: 9
+  assigner:
+    name: TaskAlignedAssigner
+    topk: 13
+    alpha: 1.0
+    beta: 6.0
+  contrast_loss:
+    name: SupContrast
+    temperature: 100
+    sample_num: 2048
+    thresh: 0.75
+  nms:
+    name: MultiClassNMS
+    nms_top_k: 1000
+    keep_top_k: 300
+    score_threshold: 0.01
+    nms_threshold: 0.7
+
+num_classes: 6
+metric: COCO
+map_type: integral
+
+TrainDataset:
+  !COCODataSet
+    image_dir: images
+    anno_path: pcb_cocoanno/train_shots30.json
+    dataset_dir: dataset/pcb
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  !COCODataSet
+    image_dir: images
+    anno_path: pcb_cocoanno/val.json
+    dataset_dir: dataset/pcb
+
+TestDataset:
+  !ImageFolder
+    anno_path: pcb_cocoanno/val.json
+    dataset_dir: dataset/pcb