更换文档检测模型

2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions
--- a/paddle_detection/docs/tutorials/config_annotation/faster_rcnn_r50_fpn_1x_coco_annotation.md
+++ b/paddle_detection/docs/tutorials/config_annotation/faster_rcnn_r50_fpn_1x_coco_annotation.md
@@ -0,0 +1,261 @@
+# RCNN系列模型参数配置教程
+
+标签： 模型参数配置
+
+以`faster_rcnn_r50_fpn_1x_coco.yml`为例，这个模型由五个子配置文件组成：
+
+- 数据配置文件 `coco_detection.yml`
+
+```yaml
+# 数据评估类型
+metric: COCO
+# 数据集的类别数
+num_classes: 80
+
+# TrainDataset
+TrainDataset:
+  !COCODataSet
+    # 图像数据路径，相对 dataset_dir 路径，os.path.join(dataset_dir, image_dir)
+    image_dir: train2017
+    # 标注文件路径，相对 dataset_dir 路径，os.path.join(dataset_dir, anno_path)
+    anno_path: annotations/instances_train2017.json
+    # 数据文件夹
+    dataset_dir: dataset/coco
+    # data_fields
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  !COCODataSet
+    # 图像数据路径，相对 dataset_dir 路径，os.path.join(dataset_dir, image_dir)
+    image_dir: val2017
+    # 标注文件路径，相对 dataset_dir 路径，os.path.join(dataset_dir, anno_path)
+    anno_path: annotations/instances_val2017.json
+    # 数据文件夹
+    dataset_dir: dataset/coco
+
+TestDataset:
+  !ImageFolder
+    # 标注文件路径，相对 dataset_dir 路径，os.path.join(dataset_dir, anno_path)
+    anno_path: annotations/instances_val2017.json
+```
+
+- 优化器配置文件 `optimizer_1x.yml`
+
+```yaml
+# 总训练轮数
+epoch: 12
+
+# 学习率设置
+LearningRate:
+  # 默认为8卡训学习率
+  base_lr: 0.01
+  # 学习率调整策略
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    # 学习率变化位置(轮数)
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000
+
+# 优化器
+OptimizerBuilder:
+  # 优化器
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  # 正则化
+  regularizer:
+    factor: 0.0001
+    type: L2
+```
+
+- 数据读取配置文件 `faster_fpn_reader.yml`
+
+```yaml
+# 每张GPU reader进程个数
+worker_num: 2
+# 训练数据
+TrainReader:
+  # 训练数据transforms
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  # 由于模型存在FPN结构，输入图片需要padding为32的倍数
+  - PadBatch: {pad_to_stride: 32}
+  # 训练时batch_size
+  batch_size: 1
+  # 读取数据是否乱序
+  shuffle: true
+  # 是否丢弃最后不能完整组成batch的数据
+  drop_last: true
+  # 表示reader是否对gt进行组batch的操作，在rcnn系列算法中设置为false，得到的gt格式为list[Tensor]
+  collate_batch: false
+
+# 评估数据
+EvalReader:
+  # 评估数据transforms
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  # 由于模型存在FPN结构，输入图片需要padding为32的倍数
+  - PadBatch: {pad_to_stride: 32}
+  # 评估时batch_size
+  batch_size: 1
+  # 读取数据是否乱序
+  shuffle: false
+  # 是否丢弃最后不能完整组成batch的数据
+  drop_last: false
+
+# 测试数据
+TestReader:
+  # 测试数据transforms
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  # 由于模型存在FPN结构，输入图片需要padding为32的倍数
+  - PadBatch: {pad_to_stride: 32}
+  # 测试时batch_size
+  batch_size: 1
+  # 读取数据是否乱序
+  shuffle: false
+  # 是否丢弃最后不能完整组成batch的数据
+  drop_last: false
+```
+
+- 模型配置文件 `faster_rcnn_r50_fpn.yml`
+
+```yaml
+# 模型结构类型
+architecture: FasterRCNN
+# 预训练模型地址
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+# FasterRCNN
+FasterRCNN:
+  # backbone
+  backbone: ResNet
+  # neck
+  neck: FPN
+  # rpn_head
+  rpn_head: RPNHead
+  # bbox_head
+  bbox_head: BBoxHead
+  # post process
+  bbox_post_process: BBoxPostProcess
+
+
+# backbone
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  # norm_type，可设置参数：bn 或 sync_bn
+  norm_type: bn
+  # freeze_at index, 0 represent res2
+  freeze_at: 0
+  # return_idx
+  return_idx: [0,1,2,3]
+  # num_stages
+  num_stages: 4
+
+# FPN
+FPN:
+  # channel of FPN
+  out_channel: 256
+
+# RPNHead
+RPNHead:
+  # anchor generator
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [[32], [64], [128], [256], [512]]
+    strides: [4, 8, 16, 32, 64]
+  # rpn_target_assign
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  # 训练时生成proposal的参数
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 1000
+    topk_after_collect: True
+  # 评估时生成proposal的参数
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+# BBoxHead
+BBoxHead:
+  # TwoFCHead as BBoxHead
+  head: TwoFCHead
+  # roi align
+  roi_extractor:
+    resolution: 7
+    sampling_ratio: 0
+    aligned: True
+  # bbox_assigner
+  bbox_assigner: BBoxAssigner
+
+# BBoxAssigner
+BBoxAssigner:
+  # batch_size_per_im
+  batch_size_per_im: 512
+  # 背景阈值
+  bg_thresh: 0.5
+  # 前景阈值
+  fg_thresh: 0.5
+  # 前景比例
+  fg_fraction: 0.25
+  # 是否随机采样
+  use_random: True
+
+# TwoFCHead
+TwoFCHead:
+  # TwoFCHead特征维度
+  out_channel: 1024
+
+
+# BBoxPostProcess
+BBoxPostProcess:
+  # 解码
+  decode: RCNNBox
+  # nms
+  nms:
+    # 使用MultiClassNMS
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5
+
+```
+
+- 运行时置文件 `runtime.yml`
+
+```yaml
+# 是否使用gpu
+use_gpu: true
+# 日志打印间隔
+log_iter: 20
+# save_dir
+save_dir: output
+# 模型保存间隔时间
+snapshot_epoch: 1
+```
--- a/paddle_detection/docs/tutorials/config_annotation/faster_rcnn_r50_fpn_1x_coco_annotation_en.md
+++ b/paddle_detection/docs/tutorials/config_annotation/faster_rcnn_r50_fpn_1x_coco_annotation_en.md
@@ -0,0 +1,261 @@
+# RCNN series model parameter configuration tutorial
+
+Tag: Model parameter configuration
+
+Take `faster_rcnn_r50_fpn_1x_coco.yml` as an example. The model consists of five sub-profiles:
+
+- Data profile `coco_detection.yml`
+
+```yaml
+# Data evaluation type
+metric: COCO
+# The number of categories in the dataset
+num_classes: 80
+
+# TrainDataset
+TrainDataset:
+  !COCODataSet
+    # Image data path, Relative path of dataset_dir, os.path.join(dataset_dir, image_dir)
+    image_dir: train2017
+    # Annotation file path, Relative path of dataset_dir, os.path.join(dataset_dir, anno_path)
+    anno_path: annotations/instances_train2017.json
+    # data file
+    dataset_dir: dataset/coco
+    # data_fields
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  !COCODataSet
+    # Image data path, Relative path of dataset_dir, os.path.join(dataset_dir, image_dir)
+    image_dir: val2017
+    # Annotation file path, Relative path of dataset_dir, os.path.join(dataset_dir, anno_path)
+    anno_path: annotations/instances_val2017.json
+    # data file file os.path.join(dataset_dir, anno_path)
+    dataset_dir: dataset/coco
+
+TestDataset:
+  !ImageFolder
+    # Annotation file path, Relative path of dataset_dir, os.path.join(dataset_dir, anno_path)
+    anno_path: annotations/instances_val2017.json
+```
+
+- Optimizer configuration file `optimizer_1x.yml`
+
+```yaml
+# Total training epoches
+epoch: 12
+
+# learning rate setting
+LearningRate:
+  # Default is 8 Gpus training learning rate
+  base_lr: 0.01
+  # Learning rate adjustment strategy
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    # Position of change in learning rate (number of epoches)
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000
+
+# Optimizer
+OptimizerBuilder:
+  # Optimizer
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  # Regularization
+  regularizer:
+    factor: 0.0001
+    type: L2
+```
+
+- Data reads configuration files `faster_fpn_reader.yml`
+
+```yaml
+# Number of PROCESSES per GPU Reader
+worker_num: 2
+# training data
+TrainReader:
+  # Training data transforms
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  # Since the model has FPN structure, the input image needs a multiple of 32 padding
+  - PadBatch: {pad_to_stride: 32}
+  # Batch_size during training
+  batch_size: 1
+  # Read data is out of order
+  shuffle: true
+  # Whether to discard data that does not complete the batch
+  drop_last: true
+  # Set it to false. Then you have a sequence of values for GT: List [Tensor]
+  collate_batch: false
+
+# Evaluate data
+EvalReader:
+  # Evaluate data transforms
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  # Since the model has FPN structure, the input image needs a multiple of 32 padding
+  - PadBatch: {pad_to_stride: 32}
+  # batch_size of evaluation
+  batch_size: 1
+  # Read data is out of order
+  shuffle: false
+  # Whether to discard data that does not complete the batch
+  drop_last: false
+
+# test data
+TestReader:
+  # test data transforms
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  # Since the model has FPN structure, the input image needs a multiple of 32 padding
+  - PadBatch: {pad_to_stride: 32}
+  # batch_size of test
+  batch_size: 1
+  # Read data is out of order
+  shuffle: false
+  # Whether to discard data that does not complete the batch
+  drop_last: false
+```
+
+- Model profile `faster_rcnn_r50_fpn.yml`
+
+```yaml
+# Model structure type
+architecture: FasterRCNN
+# Pretrain model address
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+# FasterRCNN
+FasterRCNN:
+  # backbone
+  backbone: ResNet
+  # neck
+  neck: FPN
+  # rpn_head
+  rpn_head: RPNHead
+  # bbox_head
+  bbox_head: BBoxHead
+  # post process
+  bbox_post_process: BBoxPostProcess
+
+
+# backbone
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  # norm_type, Configurable parameter: bn or sync_bn
+  norm_type: bn
+  # freeze_at index, 0 represent res2
+  freeze_at: 0
+  # return_idx
+  return_idx: [0,1,2,3]
+  # num_stages
+  num_stages: 4
+
+# FPN
+FPN:
+  # channel of FPN
+  out_channel: 256
+
+# RPNHead
+RPNHead:
+  # anchor generator
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [[32], [64], [128], [256], [512]]
+    strides: [4, 8, 16, 32, 64]
+  # rpn_target_assign
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  # The parameters of the proposal are generated during training
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 1000
+    topk_after_collect: True
+  # The parameters of the proposal are generated during evaluation
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+# BBoxHead
+BBoxHead:
+  # TwoFCHead as BBoxHead
+  head: TwoFCHead
+  # roi align
+  roi_extractor:
+    resolution: 7
+    sampling_ratio: 0
+    aligned: True
+  # bbox_assigner
+  bbox_assigner: BBoxAssigner
+
+# BBoxAssigner
+BBoxAssigner:
+  # batch_size_per_im
+  batch_size_per_im: 512
+  # Background the threshold
+  bg_thresh: 0.5
+  # Prospects for threshold
+  fg_thresh: 0.5
+  # Prospects of proportion
+  fg_fraction: 0.25
+  # Random sampling
+  use_random: True
+
+# TwoFCHead
+TwoFCHead:
+  # TwoFCHead feature dimension
+  out_channel: 1024
+
+
+# BBoxPostProcess
+BBoxPostProcess:
+  # decode
+  decode: RCNNBox
+  # nms
+  nms:
+    # use MultiClassNMS
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5
+
+```
+
+- runtime configuration file `runtime.yml`
+
+```yaml
+# Whether to use gpu
+use_gpu: true
+# Log Printing interval
+log_iter: 20
+# save_dir
+save_dir: output
+# Model save interval
+snapshot_epoch: 1
+```
--- a/paddle_detection/docs/tutorials/config_annotation/multi_scale_test_config.md
+++ b/paddle_detection/docs/tutorials/config_annotation/multi_scale_test_config.md
@@ -0,0 +1,45 @@
+# Multi Scale Test Configuration
+
+Tags: Configuration
+
+---
+```yaml
+
+##################################### Multi scale test configuration #####################################
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - MultiscaleTestResize: {origin_target_size: [800, 1333], target_size: [700 , 900]}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - MultiscaleTestResize: {origin_target_size: [800, 1333], target_size: [700 , 900]}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+```
+
+---
+
+Multi Scale Test is a TTA (Test Time Augmentation) method, it can improve object detection performance. 
+
+The input image will be scaled into different scales, then model generated predictions (bboxes) at different scales, finally all the predictions will be combined to generate final prediction. (Here **NMS** is used to aggregate the predictions.)
+
+## _MultiscaleTestResize_ option
+
+`MultiscaleTestResize` option is used to enable multi scale test prediction. 
+
+`origin_target_size: [800, 1333]` means the input image will be scaled to 800 (for short edge) and 1333 (max edge length cannot be greater than 1333) at first
+
+`target_size: [700 , 900]` property is used to specify different scales. 
+
+It can be plugged into evaluation process or test (inference) process, by adding `MultiscaleTestResize` entry to `EvalReader.sample_transforms` or `TestReader.sample_transforms`
+
+---
+
+###Note
+
+Now only CascadeRCNN, FasterRCNN and MaskRCNN are supported for multi scale testing. And batch size must be 1.
--- a/paddle_detection/docs/tutorials/config_annotation/multi_scale_test_config_cn.md
+++ b/paddle_detection/docs/tutorials/config_annotation/multi_scale_test_config_cn.md
@@ -0,0 +1,45 @@
+# 多尺度测试的配置
+
+标签: 配置
+
+---
+```yaml
+
+##################################### 多尺度测试的配置 #####################################
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - MultiscaleTestResize: {origin_target_size: [800, 1333], target_size: [700 , 900]}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - MultiscaleTestResize: {origin_target_size: [800, 1333], target_size: [700 , 900]}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+```
+
+---
+
+多尺度测试是一种TTA方法（测试时增强），可以用于提高目标检测的准确率
+
+输入图像首先被缩放为不同尺度的图像，然后模型对这些不同尺度的图像进行预测，最后将这些不同尺度上的预测结果整合为最终预测结果。（这里使用了**NMS**来整合不同尺度的预测结果）
+
+## _MultiscaleTestResize_ 选项
+
+`MultiscaleTestResize` 选项用于开启多尺度测试. 
+
+`origin_target_size: [800, 1333]` 项代表输入图像首先缩放为短边为800，最长边不超过1333.
+
+`target_size: [700 , 900]` 项设置不同的预测尺度。
+
+通过在`EvalReader.sample_transforms`或`TestReader.sample_transforms`中设置`MultiscaleTestResize`项，可以在评估过程或预测过程中开启多尺度测试。
+
+---
+
+###注意
+
+目前多尺度测试只支持CascadeRCNN, FasterRCNN and MaskRCNN网络, 并且batch size需要是1.
--- a/paddle_detection/docs/tutorials/config_annotation/ppyolo_r50vd_dcn_1x_coco_annotation.md
+++ b/paddle_detection/docs/tutorials/config_annotation/ppyolo_r50vd_dcn_1x_coco_annotation.md
@@ -0,0 +1,264 @@
+# YOLO系列模型参数配置教程
+
+标签： 模型参数配置
+
+以`ppyolo_r50vd_dcn_1x_coco.yml`为例，这个模型由五个子配置文件组成：
+
+- 数据配置文件 `coco_detection.yml`
+
+```yaml
+# 数据评估类型
+metric: COCO
+# 数据集的类别数
+num_classes: 80
+
+# TrainDataset
+TrainDataset:
+  !COCODataSet
+    # 图像数据路径，相对 dataset_dir 路径，os.path.join(dataset_dir, image_dir)
+    image_dir: train2017
+    # 标注文件路径，相对 dataset_dir 路径，os.path.join(dataset_dir, anno_path)
+    anno_path: annotations/instances_train2017.json
+    # 数据文件夹
+    dataset_dir: dataset/coco
+    # data_fields
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  !COCODataSet
+    # 图像数据路径，相对 dataset_dir 路径，os.path.join(dataset_dir, image_dir)
+    image_dir: val2017
+    # 标注文件路径，相对 dataset_dir 路径，os.path.join(dataset_dir, anno_path)
+    anno_path: annotations/instances_val2017.json
+    # 数据文件夹，os.path.join(dataset_dir, anno_path)
+    dataset_dir: dataset/coco
+
+TestDataset:
+  !ImageFolder
+    # 标注文件路径，相对 dataset_dir 路径
+    anno_path: annotations/instances_val2017.json
+```
+
+- 优化器配置文件 `optimizer_1x.yml`
+
+```yaml
+# 总训练轮数
+epoch: 405
+
+# 学习率设置
+LearningRate:
+  # 默认为8卡训学习率
+  base_lr: 0.01
+  # 学习率调整策略
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    # 学习率变化位置(轮数)
+    milestones:
+    - 243
+    - 324
+  # Warmup
+  - !LinearWarmup
+    start_factor: 0.
+    steps: 4000
+
+# 优化器
+OptimizerBuilder:
+  # 优化器
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  # 正则化
+  regularizer:
+    factor: 0.0005
+    type: L2
+```
+
+- 数据读取配置文件 `ppyolo_reader.yml`
+
+```yaml
+# 每张GPU reader进程个数
+worker_num: 2
+# 训练数据
+TrainReader:
+  inputs_def:
+    num_max_boxes: 50
+  # 训练数据transforms
+  sample_transforms:
+    - Decode: {}
+    - Mixup: {alpha: 1.5, beta: 1.5}
+    - RandomDistort: {}
+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+    - RandomCrop: {}
+    - RandomFlip: {}
+  # batch_transforms
+  batch_transforms:
+    - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_size: True, random_interp: True, keep_ratio: False}
+    - NormalizeBox: {}
+    - PadBox: {num_max_boxes: 50}
+    - BboxXYXY2XYWH: {}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+    - Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]}
+  # 训练时batch_size
+  batch_size: 24
+  # 读取数据是否乱序
+  shuffle: true
+  # 是否丢弃最后不能完整组成batch的数据
+  drop_last: true
+  # mixup_epoch，大于最大epoch，表示训练过程一直使用mixup数据增广
+  mixup_epoch: 25000
+  # 是否通过共享内存进行数据读取加速，需要保证共享内存大小(如/dev/shm)满足大于1G
+  use_shared_memory: true
+
+# 评估数据
+EvalReader:
+  # 评估数据transforms
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+  # 评估时batch_size
+  batch_size: 8
+
+# 测试数据
+TestReader:
+  inputs_def:
+    image_shape: [3, 608, 608]
+  # 测试数据transforms
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+  # 测试时batch_size
+  batch_size: 1
+```
+
+- 模型配置文件 `ppyolo_r50vd_dcn.yml`
+
+```yaml
+# 模型结构类型
+architecture: YOLOv3
+# 预训练模型地址
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
+# norm_type
+norm_type: sync_bn
+# 是否使用ema
+use_ema: true
+# ema_decay
+ema_decay: 0.9998
+
+# YOLOv3
+YOLOv3:
+  # backbone
+  backbone: ResNet
+  # neck
+  neck: PPYOLOFPN
+  # yolo_head
+  yolo_head: YOLOv3Head
+  # post_process
+  post_process: BBoxPostProcess
+
+
+# backbone
+ResNet:
+  # depth
+  depth: 50
+  # variant
+  variant: d
+  # return_idx, 0 represent res2
+  return_idx: [1, 2, 3]
+  # dcn_v2_stages
+  dcn_v2_stages: [3]
+  # freeze_at
+  freeze_at: -1
+  # freeze_norm
+  freeze_norm: false
+  # norm_decay
+  norm_decay: 0.
+
+# PPYOLOFPN
+PPYOLOFPN:
+  # 是否coord_conv
+  coord_conv: true
+  # 是否drop_block
+  drop_block: true
+  # block_size
+  block_size: 3
+  # keep_prob
+  keep_prob: 0.9
+  # 是否spp
+  spp: true
+
+# YOLOv3Head
+YOLOv3Head:
+  # anchors
+  anchors: [[10, 13], [16, 30], [33, 23],
+            [30, 61], [62, 45], [59, 119],
+            [116, 90], [156, 198], [373, 326]]
+  # anchor_masks
+  anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+  # loss
+  loss: YOLOv3Loss
+  # 是否使用iou_aware
+  iou_aware: true
+  # iou_aware_factor
+  iou_aware_factor: 0.4
+
+# YOLOv3Loss
+YOLOv3Loss:
+  # ignore_thresh
+  ignore_thresh: 0.7
+  # downsample
+  downsample: [32, 16, 8]
+  # 是否label_smooth
+  label_smooth: false
+  # scale_x_y
+  scale_x_y: 1.05
+  # iou_loss
+  iou_loss: IouLoss
+  # iou_aware_loss
+  iou_aware_loss: IouAwareLoss
+
+# IouLoss
+IouLoss:
+  loss_weight: 2.5
+  loss_square: true
+
+# IouAwareLoss
+IouAwareLoss:
+  loss_weight: 1.0
+
+# BBoxPostProcess
+BBoxPostProcess:
+  decode:
+    name: YOLOBox
+    conf_thresh: 0.01
+    downsample_ratio: 32
+    clip_bbox: true
+    scale_x_y: 1.05
+  # nms 配置
+  nms:
+    name: MatrixNMS
+    keep_top_k: 100
+    score_threshold: 0.01
+    post_threshold: 0.01
+    nms_top_k: -1
+    background_label: -1
+
+```
+
+- 运行时置文件 `runtime.yml`
+
+```yaml
+# 是否使用gpu
+use_gpu: true
+# 日志打印间隔
+log_iter: 20
+# save_dir
+save_dir: output
+# 模型保存间隔时间
+snapshot_epoch: 1
+```
--- a/paddle_detection/docs/tutorials/config_annotation/ppyolo_r50vd_dcn_1x_coco_annotation_en.md
+++ b/paddle_detection/docs/tutorials/config_annotation/ppyolo_r50vd_dcn_1x_coco_annotation_en.md
@@ -0,0 +1,264 @@
+# YOLO series model parameter configuration tutorial
+
+Tag: Model parameter configuration
+
+Take `ppyolo_r50vd_dcn_1x_coco.yml` as an example, The model consists of five sub-profiles:
+
+- Data profile `coco_detection.yml`
+
+```yaml
+# Data evaluation type
+metric: COCO
+# The number of categories in the dataset
+num_classes: 80
+
+# TrainDataset
+TrainDataset:
+  !COCODataSet
+    # Image data path, Relative path of dataset_dir, os.path.join(dataset_dir, image_dir)
+    image_dir: train2017
+    # Annotation file path, Relative path of dataset_dir, os.path.join(dataset_dir, anno_path)
+    anno_path: annotations/instances_train2017.json
+    # data file
+    dataset_dir: dataset/coco
+    # data_fields
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  !COCODataSet
+    # Image data path, Relative path of dataset_dir, os.path.join(dataset_dir, image_dir)
+    image_dir: val2017
+    # Annotation file path, Relative path of dataset_dir, os.path.join(dataset_dir, anno_path)
+    anno_path: annotations/instances_val2017.json
+    # data file os.path.join(dataset_dir, anno_path)
+    dataset_dir: dataset/coco
+
+TestDataset:
+  !ImageFolder
+    # Annotation file path, Relative path of dataset_dir, os.path.join(dataset_dir, anno_path)
+    anno_path: annotations/instances_val2017.json
+```
+
+- Optimizer configuration file `optimizer_1x.yml`
+
+```yaml
+# Total training epoches
+epoch: 405
+
+# learning rate setting
+LearningRate:
+  # Default is 8 Gpus training learning rate
+  base_lr: 0.01
+  # Learning rate adjustment strategy
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    # Position of change in learning rate (number of epoches)
+    milestones:
+    - 243
+    - 324
+  # Warmup
+  - !LinearWarmup
+    start_factor: 0.
+    steps: 4000
+
+# Optimizer
+OptimizerBuilder:
+  # Optimizer
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  # Regularization
+  regularizer:
+    factor: 0.0005
+    type: L2
+```
+
+- Data reads configuration files `ppyolo_reader.yml`
+
+```yaml
+# Number of PROCESSES per GPU Reader
+worker_num: 2
+# training data
+TrainReader:
+  inputs_def:
+    num_max_boxes: 50
+  # Training data transforms
+  sample_transforms:
+    - Decode: {}
+    - Mixup: {alpha: 1.5, beta: 1.5}
+    - RandomDistort: {}
+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+    - RandomCrop: {}
+    - RandomFlip: {}
+  # batch_transforms
+  batch_transforms:
+    - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_size: True, random_interp: True, keep_ratio: False}
+    - NormalizeBox: {}
+    - PadBox: {num_max_boxes: 50}
+    - BboxXYXY2XYWH: {}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+    - Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]}
+  # Batch size during training
+  batch_size: 24
+  # Read data is out of order
+  shuffle: true
+  # Whether to discard data that does not complete the batch
+  drop_last: true
+  # mixup_epoch，Greater than maximum epoch, Indicates that the training process has been augmented with mixup data
+  mixup_epoch: 25000
+  # Whether to use the shared memory to accelerate data reading, ensure that the shared memory size (such as /dev/shm) is greater than 1 GB
+  use_shared_memory: true
+
+# Evaluate data
+EvalReader:
+  # Evaluating data transforms
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+  # Batch_size during evaluation
+  batch_size: 8
+
+# test data
+TestReader:
+  inputs_def:
+    image_shape: [3, 608, 608]
+  # test data transforms
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
+    - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+    - Permute: {}
+  # batch_size during training
+  batch_size: 1
+```
+
+- Model profile `ppyolo_r50vd_dcn.yml`
+
+```yaml
+# Model structure type
+architecture: YOLOv3
+# Pretrain model address
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
+# norm_type
+norm_type: sync_bn
+# Whether to use EMA
+use_ema: true
+# ema_decay
+ema_decay: 0.9998
+
+# YOLOv3
+YOLOv3:
+  # backbone
+  backbone: ResNet
+  # neck
+  neck: PPYOLOFPN
+  # yolo_head
+  yolo_head: YOLOv3Head
+  # post_process
+  post_process: BBoxPostProcess
+
+
+# backbone
+ResNet:
+  # depth
+  depth: 50
+  # variant
+  variant: d
+  # return_idx, 0 represent res2
+  return_idx: [1, 2, 3]
+  # dcn_v2_stages
+  dcn_v2_stages: [3]
+  # freeze_at
+  freeze_at: -1
+  # freeze_norm
+  freeze_norm: false
+  # norm_decay
+  norm_decay: 0.
+
+# PPYOLOFPN
+PPYOLOFPN:
+  # whether coord_conv or not
+  coord_conv: true
+  # whether drop_block or not
+  drop_block: true
+  # block_size
+  block_size: 3
+  # keep_prob
+  keep_prob: 0.9
+  # whether spp or not
+  spp: true
+
+# YOLOv3Head
+YOLOv3Head:
+  # anchors
+  anchors: [[10, 13], [16, 30], [33, 23],
+            [30, 61], [62, 45], [59, 119],
+            [116, 90], [156, 198], [373, 326]]
+  # anchor_masks
+  anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+  # loss
+  loss: YOLOv3Loss
+  # whether to use iou_aware
+  iou_aware: true
+  # iou_aware_factor
+  iou_aware_factor: 0.4
+
+# YOLOv3Loss
+YOLOv3Loss:
+  # ignore_thresh
+  ignore_thresh: 0.7
+  # downsample
+  downsample: [32, 16, 8]
+  # whether label_smooth or not
+  label_smooth: false
+  # scale_x_y
+  scale_x_y: 1.05
+  # iou_loss
+  iou_loss: IouLoss
+  # iou_aware_loss
+  iou_aware_loss: IouAwareLoss
+
+# IouLoss
+IouLoss:
+  loss_weight: 2.5
+  loss_square: true
+
+# IouAwareLoss
+IouAwareLoss:
+  loss_weight: 1.0
+
+# BBoxPostProcess
+BBoxPostProcess:
+  decode:
+    name: YOLOBox
+    conf_thresh: 0.01
+    downsample_ratio: 32
+    clip_bbox: true
+    scale_x_y: 1.05
+  # nms setting
+  nms:
+    name: MatrixNMS
+    keep_top_k: 100
+    score_threshold: 0.01
+    post_threshold: 0.01
+    nms_top_k: -1
+    background_label: -1
+
+```
+
+- Runtime file `runtime.yml`
+
+```yaml
+# Whether to use gpu
+use_gpu: true
+# Log Printing interval
+log_iter: 20
+# save_dir
+save_dir: output
+# Model save interval
+snapshot_epoch: 1
+```