更换文档检测模型

2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions
--- a/paddle_detection/configs/ppyolo/_base_/optimizer_1x.yml
+++ b/paddle_detection/configs/ppyolo/_base_/optimizer_1x.yml
@@ -0,0 +1,22 @@
+epoch: 405
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones:
+    - 243
+    - 324
+  - !LinearWarmup
+    start_factor: 0.
+    steps: 4000
+
+OptimizerBuilder:
+  clip_grad_by_norm: 35.
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
--- a/paddle_detection/configs/ppyolo/_base_/optimizer_2x.yml
+++ b/paddle_detection/configs/ppyolo/_base_/optimizer_2x.yml
@@ -0,0 +1,22 @@
+epoch: 811
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones:
+    - 649
+    - 730
+  - !LinearWarmup
+    start_factor: 0.
+    steps: 4000
+
+OptimizerBuilder:
+  clip_grad_by_norm: 35.
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
--- a/paddle_detection/configs/ppyolo/_base_/optimizer_365e.yml
+++ b/paddle_detection/configs/ppyolo/_base_/optimizer_365e.yml
@@ -0,0 +1,21 @@
+epoch: 365
+
+LearningRate:
+  base_lr: 0.005
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones:
+    - 243
+  - !LinearWarmup
+    start_factor: 0.
+    steps: 4000
+
+OptimizerBuilder:
+  clip_grad_by_norm: 35.
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
--- a/paddle_detection/configs/ppyolo/_base_/optimizer_650e.yml
+++ b/paddle_detection/configs/ppyolo/_base_/optimizer_650e.yml
@@ -0,0 +1,22 @@
+epoch: 650
+
+LearningRate:
+  base_lr: 0.005
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones:
+    - 430
+    - 540
+    - 610
+  - !LinearWarmup
+    start_factor: 0.
+    steps: 4000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
--- a/paddle_detection/configs/ppyolo/_base_/ppyolo_mbv3_large.yml
+++ b/paddle_detection/configs/ppyolo/_base_/ppyolo_mbv3_large.yml
@@ -0,0 +1,56 @@
+architecture: YOLOv3
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+
+YOLOv3:
+  backbone: MobileNetV3
+  neck: PPYOLOFPN
+  yolo_head: YOLOv3Head
+  post_process: BBoxPostProcess
+
+MobileNetV3:
+  model_name: large
+  scale: 1.
+  with_extra_blocks: false
+  extra_block_filters: []
+  feature_maps: [13, 16]
+
+PPYOLOFPN:
+  in_channels: [160, 368]
+  coord_conv: true
+  conv_block_num: 0
+  spp: true
+  drop_block: true
+
+YOLOv3Head:
+  anchors: [[11, 18], [34, 47], [51, 126],
+            [115, 71], [120, 195], [254, 235]]
+  anchor_masks: [[3, 4, 5], [0, 1, 2]]
+  loss: YOLOv3Loss
+
+YOLOv3Loss:
+  ignore_thresh: 0.5
+  downsample: [32, 16]
+  label_smooth: false
+  scale_x_y: 1.05
+  iou_loss: IouLoss
+
+IouLoss:
+  loss_weight: 2.5
+  loss_square: true
+
+BBoxPostProcess:
+  decode:
+    name: YOLOBox
+    conf_thresh: 0.005
+    downsample_ratio: 32
+    clip_bbox: true
+    scale_x_y: 1.05
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    nms_threshold: 0.45
+    nms_top_k: 1000
+    score_threshold: 0.005
--- a/paddle_detection/configs/ppyolo/_base_/ppyolo_mbv3_small.yml
+++ b/paddle_detection/configs/ppyolo/_base_/ppyolo_mbv3_small.yml
@@ -0,0 +1,56 @@
+architecture: YOLOv3
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_small_x1_0_ssld_pretrained.pdparams
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+
+YOLOv3:
+  backbone: MobileNetV3
+  neck: PPYOLOFPN
+  yolo_head: YOLOv3Head
+  post_process: BBoxPostProcess
+
+MobileNetV3:
+  model_name: small
+  scale: 1.
+  with_extra_blocks: false
+  extra_block_filters: []
+  feature_maps: [9, 12]
+
+PPYOLOFPN:
+  in_channels: [96, 304]
+  coord_conv: true
+  conv_block_num: 0
+  spp: true
+  drop_block: true
+
+YOLOv3Head:
+  anchors: [[11, 18], [34, 47], [51, 126],
+            [115, 71], [120, 195], [254, 235]]
+  anchor_masks: [[3, 4, 5], [0, 1, 2]]
+  loss: YOLOv3Loss
+
+YOLOv3Loss:
+  ignore_thresh: 0.5
+  downsample: [32, 16]
+  label_smooth: false
+  scale_x_y: 1.05
+  iou_loss: IouLoss
+
+IouLoss:
+  loss_weight: 2.5
+  loss_square: true
+
+BBoxPostProcess:
+  decode:
+    name: YOLOBox
+    conf_thresh: 0.005
+    downsample_ratio: 32
+    clip_bbox: true
+    scale_x_y: 1.05
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    nms_threshold: 0.45
+    nms_top_k: 1000
+    score_threshold: 0.005
--- a/paddle_detection/configs/ppyolo/_base_/ppyolo_r18vd.yml
+++ b/paddle_detection/configs/ppyolo/_base_/ppyolo_r18vd.yml
@@ -0,0 +1,57 @@
+architecture: YOLOv3
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet18_vd_pretrained.pdparams
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+
+YOLOv3:
+  backbone: ResNet
+  neck: PPYOLOFPN
+  yolo_head: YOLOv3Head
+  post_process: BBoxPostProcess
+
+ResNet:
+  depth: 18
+  variant: d
+  return_idx: [2, 3]
+  freeze_at: -1
+  freeze_norm: false
+  norm_decay: 0.
+
+PPYOLOFPN:
+  drop_block: true
+  block_size: 3
+  keep_prob: 0.9
+  conv_block_num: 0
+
+YOLOv3Head:
+  anchor_masks: [[3, 4, 5], [0, 1, 2]]
+  anchors: [[10, 14], [23, 27], [37, 58],
+            [81, 82], [135, 169], [344, 319]]
+  loss: YOLOv3Loss
+
+YOLOv3Loss:
+  ignore_thresh: 0.7
+  downsample: [32, 16]
+  label_smooth: false
+  scale_x_y: 1.05
+  iou_loss: IouLoss
+
+IouLoss:
+  loss_weight: 2.5
+  loss_square: true
+
+BBoxPostProcess:
+  decode:
+    name: YOLOBox
+    conf_thresh: 0.01
+    downsample_ratio: 32
+    clip_bbox: true
+    scale_x_y: 1.05
+  nms:
+    name: MatrixNMS
+    keep_top_k: 100
+    score_threshold: 0.01
+    post_threshold: 0.01
+    nms_top_k: -1
+    background_label: -1
--- a/paddle_detection/configs/ppyolo/_base_/ppyolo_r50vd_dcn.yml
+++ b/paddle_detection/configs/ppyolo/_base_/ppyolo_r50vd_dcn.yml
@@ -0,0 +1,66 @@
+architecture: YOLOv3
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+
+YOLOv3:
+  backbone: ResNet
+  neck: PPYOLOFPN
+  yolo_head: YOLOv3Head
+  post_process: BBoxPostProcess
+
+ResNet:
+  depth: 50
+  variant: d
+  return_idx: [1, 2, 3]
+  dcn_v2_stages: [3]
+  freeze_at: -1
+  freeze_norm: false
+  norm_decay: 0.
+
+PPYOLOFPN:
+  coord_conv: true
+  drop_block: true
+  block_size: 3
+  keep_prob: 0.9
+  spp: true
+
+YOLOv3Head:
+  anchors: [[10, 13], [16, 30], [33, 23],
+            [30, 61], [62, 45], [59, 119],
+            [116, 90], [156, 198], [373, 326]]
+  anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+  loss: YOLOv3Loss
+  iou_aware: true
+  iou_aware_factor: 0.4
+
+YOLOv3Loss:
+  ignore_thresh: 0.7
+  downsample: [32, 16, 8]
+  label_smooth: false
+  scale_x_y: 1.05
+  iou_loss: IouLoss
+  iou_aware_loss: IouAwareLoss
+
+IouLoss:
+  loss_weight: 2.5
+  loss_square: true
+
+IouAwareLoss:
+  loss_weight: 1.0
+
+BBoxPostProcess:
+  decode:
+    name: YOLOBox
+    conf_thresh: 0.01
+    downsample_ratio: 32
+    clip_bbox: true
+    scale_x_y: 1.05
+  nms:
+    name: MatrixNMS
+    keep_top_k: 100
+    score_threshold: 0.01
+    post_threshold: 0.01
+    nms_top_k: -1
+    background_label: -1
--- a/paddle_detection/configs/ppyolo/_base_/ppyolo_reader.yml
+++ b/paddle_detection/configs/ppyolo/_base_/ppyolo_reader.yml
@@ -0,0 +1,42 @@
+worker_num: 2
+TrainReader:
+  inputs_def:
+    num_max_boxes: 50
+  sample_transforms:
+    - Decode: {}
+    - Mixup: {alpha: 1.5, beta: 1.5}
+    - RandomDistort: {}
+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+    - RandomCrop: {}
+    - RandomFlip: {}
+  batch_transforms:
+    - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_size: True, random_interp: True, keep_ratio: False}
+    - NormalizeBox: {}
+    - PadBox: {num_max_boxes: 50}
+    - BboxXYXY2XYWH: {}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+    - Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]}
+  batch_size: 24
+  shuffle: true
+  drop_last: true
+  mixup_epoch: 25000
+  use_shared_memory: true
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+  batch_size: 8
+
+TestReader:
+  inputs_def:
+    image_shape: [3, 608, 608]
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+  batch_size: 1
--- a/paddle_detection/configs/ppyolo/_base_/ppyolo_tiny.yml
+++ b/paddle_detection/configs/ppyolo/_base_/ppyolo_tiny.yml
@@ -0,0 +1,55 @@
+architecture: YOLOv3
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x0_5_pretrained.pdparams
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+
+YOLOv3:
+  backbone: MobileNetV3
+  neck: PPYOLOTinyFPN
+  yolo_head: YOLOv3Head
+  post_process: BBoxPostProcess
+
+MobileNetV3:
+  model_name: large
+  scale: .5
+  with_extra_blocks: false
+  extra_block_filters: []
+  feature_maps: [7, 13, 16]
+
+PPYOLOTinyFPN:
+  detection_block_channels: [160, 128, 96]
+  spp: true
+  drop_block: true
+
+YOLOv3Head:
+  anchors: [[10, 15], [24, 36], [72, 42],
+            [35, 87], [102, 96], [60, 170],
+            [220, 125], [128, 222], [264, 266]]
+  anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+  loss: YOLOv3Loss
+
+YOLOv3Loss:
+  ignore_thresh: 0.5
+  downsample: [32, 16, 8]
+  label_smooth: false
+  scale_x_y: 1.05
+  iou_loss: IouLoss
+
+IouLoss:
+  loss_weight: 2.5
+  loss_square: true
+
+BBoxPostProcess:
+  decode:
+    name: YOLOBox
+    conf_thresh: 0.005
+    downsample_ratio: 32
+    clip_bbox: true
+    scale_x_y: 1.05
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    nms_threshold: 0.45
+    nms_top_k: 1000
+    score_threshold: 0.005
--- a/paddle_detection/configs/ppyolo/_base_/ppyolo_tiny_reader.yml
+++ b/paddle_detection/configs/ppyolo/_base_/ppyolo_tiny_reader.yml
@@ -0,0 +1,42 @@
+worker_num: 4
+TrainReader:
+  inputs_def:
+    num_max_boxes: 100
+  sample_transforms:
+    - Decode: {}
+    - Mixup: {alpha: 1.5, beta: 1.5}
+    - RandomDistort: {}
+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+    - RandomCrop: {}
+    - RandomFlip: {}
+  batch_transforms:
+    - BatchRandomResize: {target_size: [192, 224, 256, 288, 320, 352, 384, 416, 448, 480, 512], random_size: True, random_interp: True, keep_ratio: False}
+    - NormalizeBox: {}
+    - PadBox: {num_max_boxes: 100}
+    - BboxXYXY2XYWH: {}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+    - Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 15], [24, 36], [72, 42], [35, 87], [102, 96], [60, 170], [220, 125], [128, 222], [264, 266]], downsample_ratios: [32, 16, 8]}
+  batch_size: 32
+  shuffle: true
+  drop_last: true
+  mixup_epoch: 500
+  use_shared_memory: true
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [320, 320], keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+  batch_size: 8
+
+TestReader:
+  inputs_def:
+    image_shape: [3, 320, 320]
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [320, 320], keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+  batch_size: 1
--- a/paddle_detection/configs/ppyolo/_base_/ppyolov2_r50vd_dcn.yml
+++ b/paddle_detection/configs/ppyolo/_base_/ppyolov2_r50vd_dcn.yml
@@ -0,0 +1,68 @@
+architecture: YOLOv3
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+
+# AMP training
+master_grad: true
+
+YOLOv3:
+  backbone: ResNet
+  neck: PPYOLOPAN
+  yolo_head: YOLOv3Head
+  post_process: BBoxPostProcess
+
+ResNet:
+  depth: 50
+  variant: d
+  return_idx: [1, 2, 3]
+  dcn_v2_stages: [3]
+  freeze_at: -1
+  freeze_norm: false
+  norm_decay: 0.
+
+PPYOLOPAN:
+  drop_block: true
+  block_size: 3
+  keep_prob: 0.9
+  spp: true
+
+YOLOv3Head:
+  anchors: [[10, 13], [16, 30], [33, 23],
+            [30, 61], [62, 45], [59, 119],
+            [116, 90], [156, 198], [373, 326]]
+  anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+  loss: YOLOv3Loss
+  iou_aware: true
+  iou_aware_factor: 0.5
+
+YOLOv3Loss:
+  ignore_thresh: 0.7
+  downsample: [32, 16, 8]
+  label_smooth: false
+  scale_x_y: 1.05
+  iou_loss: IouLoss
+  iou_aware_loss: IouAwareLoss
+
+IouLoss:
+  loss_weight: 2.5
+  loss_square: true
+
+IouAwareLoss:
+  loss_weight: 1.0
+
+BBoxPostProcess:
+  decode:
+    name: YOLOBox
+    conf_thresh: 0.01
+    downsample_ratio: 32
+    clip_bbox: true
+    scale_x_y: 1.05
+  nms:
+    name: MatrixNMS
+    keep_top_k: 100
+    score_threshold: 0.01
+    post_threshold: 0.01
+    nms_top_k: -1
+    background_label: -1
--- a/paddle_detection/configs/ppyolo/_base_/ppyolov2_reader.yml
+++ b/paddle_detection/configs/ppyolo/_base_/ppyolov2_reader.yml
@@ -0,0 +1,42 @@
+worker_num: 2
+TrainReader:
+  inputs_def:
+    num_max_boxes: 100
+  sample_transforms:
+    - Decode: {}
+    - Mixup: {alpha: 1.5, beta: 1.5}
+    - RandomDistort: {}
+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+    - RandomCrop: {}
+    - RandomFlip: {}
+  batch_transforms:
+    - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768], random_size: True, random_interp: True, keep_ratio: False}
+    - NormalizeBox: {}
+    - PadBox: {num_max_boxes: 100}
+    - BboxXYXY2XYWH: {}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+    - Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]}
+  batch_size: 12
+  shuffle: true
+  drop_last: true
+  mixup_epoch: 25000
+  use_shared_memory: true
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+  batch_size: 8
+
+TestReader:
+  inputs_def:
+    image_shape: [3, 640, 640]
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+  batch_size: 1