更换文档检测模型

This commit is contained in:
2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions

View File

@@ -0,0 +1,261 @@
# RCNN系列模型参数配置教程
标签: 模型参数配置
`faster_rcnn_r50_fpn_1x_coco.yml`为例,这个模型由五个子配置文件组成:
- 数据配置文件 `coco_detection.yml`
```yaml
# 数据评估类型
metric: COCO
# 数据集的类别数
num_classes: 80
# TrainDataset
TrainDataset:
!COCODataSet
# 图像数据路径,相对 dataset_dir 路径os.path.join(dataset_dir, image_dir)
image_dir: train2017
# 标注文件路径,相对 dataset_dir 路径os.path.join(dataset_dir, anno_path)
anno_path: annotations/instances_train2017.json
# 数据文件夹
dataset_dir: dataset/coco
# data_fields
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
EvalDataset:
!COCODataSet
# 图像数据路径,相对 dataset_dir 路径os.path.join(dataset_dir, image_dir)
image_dir: val2017
# 标注文件路径,相对 dataset_dir 路径os.path.join(dataset_dir, anno_path)
anno_path: annotations/instances_val2017.json
# 数据文件夹
dataset_dir: dataset/coco
TestDataset:
!ImageFolder
# 标注文件路径,相对 dataset_dir 路径os.path.join(dataset_dir, anno_path)
anno_path: annotations/instances_val2017.json
```
- 优化器配置文件 `optimizer_1x.yml`
```yaml
# 总训练轮数
epoch: 12
# 学习率设置
LearningRate:
# 默认为8卡训学习率
base_lr: 0.01
# 学习率调整策略
schedulers:
- !PiecewiseDecay
gamma: 0.1
# 学习率变化位置(轮数)
milestones: [8, 11]
- !LinearWarmup
start_factor: 0.1
steps: 1000
# 优化器
OptimizerBuilder:
# 优化器
optimizer:
momentum: 0.9
type: Momentum
# 正则化
regularizer:
factor: 0.0001
type: L2
```
- 数据读取配置文件 `faster_fpn_reader.yml`
```yaml
# 每张GPU reader进程个数
worker_num: 2
# 训练数据
TrainReader:
# 训练数据transforms
sample_transforms:
- Decode: {}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
# 由于模型存在FPN结构输入图片需要padding为32的倍数
- PadBatch: {pad_to_stride: 32}
# 训练时batch_size
batch_size: 1
# 读取数据是否乱序
shuffle: true
# 是否丢弃最后不能完整组成batch的数据
drop_last: true
# 表示reader是否对gt进行组batch的操作在rcnn系列算法中设置为false得到的gt格式为list[Tensor]
collate_batch: false
# 评估数据
EvalReader:
# 评估数据transforms
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
# 由于模型存在FPN结构输入图片需要padding为32的倍数
- PadBatch: {pad_to_stride: 32}
# 评估时batch_size
batch_size: 1
# 读取数据是否乱序
shuffle: false
# 是否丢弃最后不能完整组成batch的数据
drop_last: false
# 测试数据
TestReader:
# 测试数据transforms
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
# 由于模型存在FPN结构输入图片需要padding为32的倍数
- PadBatch: {pad_to_stride: 32}
# 测试时batch_size
batch_size: 1
# 读取数据是否乱序
shuffle: false
# 是否丢弃最后不能完整组成batch的数据
drop_last: false
```
- 模型配置文件 `faster_rcnn_r50_fpn.yml`
```yaml
# 模型结构类型
architecture: FasterRCNN
# 预训练模型地址
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
# FasterRCNN
FasterRCNN:
# backbone
backbone: ResNet
# neck
neck: FPN
# rpn_head
rpn_head: RPNHead
# bbox_head
bbox_head: BBoxHead
# post process
bbox_post_process: BBoxPostProcess
# backbone
ResNet:
# index 0 stands for res2
depth: 50
# norm_type可设置参数bn 或 sync_bn
norm_type: bn
# freeze_at index, 0 represent res2
freeze_at: 0
# return_idx
return_idx: [0,1,2,3]
# num_stages
num_stages: 4
# FPN
FPN:
# channel of FPN
out_channel: 256
# RPNHead
RPNHead:
# anchor generator
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
anchor_sizes: [[32], [64], [128], [256], [512]]
strides: [4, 8, 16, 32, 64]
# rpn_target_assign
rpn_target_assign:
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
use_random: True
# 训练时生成proposal的参数
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 1000
topk_after_collect: True
# 评估时生成proposal的参数
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
# BBoxHead
BBoxHead:
# TwoFCHead as BBoxHead
head: TwoFCHead
# roi align
roi_extractor:
resolution: 7
sampling_ratio: 0
aligned: True
# bbox_assigner
bbox_assigner: BBoxAssigner
# BBoxAssigner
BBoxAssigner:
# batch_size_per_im
batch_size_per_im: 512
# 背景阈值
bg_thresh: 0.5
# 前景阈值
fg_thresh: 0.5
# 前景比例
fg_fraction: 0.25
# 是否随机采样
use_random: True
# TwoFCHead
TwoFCHead:
# TwoFCHead特征维度
out_channel: 1024
# BBoxPostProcess
BBoxPostProcess:
# 解码
decode: RCNNBox
# nms
nms:
# 使用MultiClassNMS
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5
```
- 运行时置文件 `runtime.yml`
```yaml
# 是否使用gpu
use_gpu: true
# 日志打印间隔
log_iter: 20
# save_dir
save_dir: output
# 模型保存间隔时间
snapshot_epoch: 1
```

View File

@@ -0,0 +1,261 @@
# RCNN series model parameter configuration tutorial
Tag: Model parameter configuration
Take `faster_rcnn_r50_fpn_1x_coco.yml` as an example. The model consists of five sub-profiles:
- Data profile `coco_detection.yml`
```yaml
# Data evaluation type
metric: COCO
# The number of categories in the dataset
num_classes: 80
# TrainDataset
TrainDataset:
!COCODataSet
# Image data path, Relative path of dataset_dir, os.path.join(dataset_dir, image_dir)
image_dir: train2017
# Annotation file path, Relative path of dataset_dir, os.path.join(dataset_dir, anno_path)
anno_path: annotations/instances_train2017.json
# data file
dataset_dir: dataset/coco
# data_fields
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
EvalDataset:
!COCODataSet
# Image data path, Relative path of dataset_dir, os.path.join(dataset_dir, image_dir)
image_dir: val2017
# Annotation file path, Relative path of dataset_dir, os.path.join(dataset_dir, anno_path)
anno_path: annotations/instances_val2017.json
# data file file os.path.join(dataset_dir, anno_path)
dataset_dir: dataset/coco
TestDataset:
!ImageFolder
# Annotation file path, Relative path of dataset_dir, os.path.join(dataset_dir, anno_path)
anno_path: annotations/instances_val2017.json
```
- Optimizer configuration file `optimizer_1x.yml`
```yaml
# Total training epoches
epoch: 12
# learning rate setting
LearningRate:
# Default is 8 Gpus training learning rate
base_lr: 0.01
# Learning rate adjustment strategy
schedulers:
- !PiecewiseDecay
gamma: 0.1
# Position of change in learning rate (number of epoches)
milestones: [8, 11]
- !LinearWarmup
start_factor: 0.1
steps: 1000
# Optimizer
OptimizerBuilder:
# Optimizer
optimizer:
momentum: 0.9
type: Momentum
# Regularization
regularizer:
factor: 0.0001
type: L2
```
- Data reads configuration files `faster_fpn_reader.yml`
```yaml
# Number of PROCESSES per GPU Reader
worker_num: 2
# training data
TrainReader:
# Training data transforms
sample_transforms:
- Decode: {}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
# Since the model has FPN structure, the input image needs a multiple of 32 padding
- PadBatch: {pad_to_stride: 32}
# Batch_size during training
batch_size: 1
# Read data is out of order
shuffle: true
# Whether to discard data that does not complete the batch
drop_last: true
# Set it to false. Then you have a sequence of values for GT: List [Tensor]
collate_batch: false
# Evaluate data
EvalReader:
# Evaluate data transforms
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
# Since the model has FPN structure, the input image needs a multiple of 32 padding
- PadBatch: {pad_to_stride: 32}
# batch_size of evaluation
batch_size: 1
# Read data is out of order
shuffle: false
# Whether to discard data that does not complete the batch
drop_last: false
# test data
TestReader:
# test data transforms
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
# Since the model has FPN structure, the input image needs a multiple of 32 padding
- PadBatch: {pad_to_stride: 32}
# batch_size of test
batch_size: 1
# Read data is out of order
shuffle: false
# Whether to discard data that does not complete the batch
drop_last: false
```
- Model profile `faster_rcnn_r50_fpn.yml`
```yaml
# Model structure type
architecture: FasterRCNN
# Pretrain model address
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
# FasterRCNN
FasterRCNN:
# backbone
backbone: ResNet
# neck
neck: FPN
# rpn_head
rpn_head: RPNHead
# bbox_head
bbox_head: BBoxHead
# post process
bbox_post_process: BBoxPostProcess
# backbone
ResNet:
# index 0 stands for res2
depth: 50
# norm_type, Configurable parameter: bn or sync_bn
norm_type: bn
# freeze_at index, 0 represent res2
freeze_at: 0
# return_idx
return_idx: [0,1,2,3]
# num_stages
num_stages: 4
# FPN
FPN:
# channel of FPN
out_channel: 256
# RPNHead
RPNHead:
# anchor generator
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
anchor_sizes: [[32], [64], [128], [256], [512]]
strides: [4, 8, 16, 32, 64]
# rpn_target_assign
rpn_target_assign:
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
use_random: True
# The parameters of the proposal are generated during training
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 1000
topk_after_collect: True
# The parameters of the proposal are generated during evaluation
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
# BBoxHead
BBoxHead:
# TwoFCHead as BBoxHead
head: TwoFCHead
# roi align
roi_extractor:
resolution: 7
sampling_ratio: 0
aligned: True
# bbox_assigner
bbox_assigner: BBoxAssigner
# BBoxAssigner
BBoxAssigner:
# batch_size_per_im
batch_size_per_im: 512
# Background the threshold
bg_thresh: 0.5
# Prospects for threshold
fg_thresh: 0.5
# Prospects of proportion
fg_fraction: 0.25
# Random sampling
use_random: True
# TwoFCHead
TwoFCHead:
# TwoFCHead feature dimension
out_channel: 1024
# BBoxPostProcess
BBoxPostProcess:
# decode
decode: RCNNBox
# nms
nms:
# use MultiClassNMS
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5
```
- runtime configuration file `runtime.yml`
```yaml
# Whether to use gpu
use_gpu: true
# Log Printing interval
log_iter: 20
# save_dir
save_dir: output
# Model save interval
snapshot_epoch: 1
```

View File

@@ -0,0 +1,45 @@
# Multi Scale Test Configuration
Tags: Configuration
---
```yaml
##################################### Multi scale test configuration #####################################
EvalReader:
sample_transforms:
- Decode: {}
- MultiscaleTestResize: {origin_target_size: [800, 1333], target_size: [700 , 900]}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
TestReader:
sample_transforms:
- Decode: {}
- MultiscaleTestResize: {origin_target_size: [800, 1333], target_size: [700 , 900]}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
```
---
Multi Scale Test is a TTA (Test Time Augmentation) method, it can improve object detection performance.
The input image will be scaled into different scales, then model generated predictions (bboxes) at different scales, finally all the predictions will be combined to generate final prediction. (Here **NMS** is used to aggregate the predictions.)
## _MultiscaleTestResize_ option
`MultiscaleTestResize` option is used to enable multi scale test prediction.
`origin_target_size: [800, 1333]` means the input image will be scaled to 800 (for short edge) and 1333 (max edge length cannot be greater than 1333) at first
`target_size: [700 , 900]` property is used to specify different scales.
It can be plugged into evaluation process or test (inference) process, by adding `MultiscaleTestResize` entry to `EvalReader.sample_transforms` or `TestReader.sample_transforms`
---
###Note
Now only CascadeRCNN, FasterRCNN and MaskRCNN are supported for multi scale testing. And batch size must be 1.

View File

@@ -0,0 +1,45 @@
# 多尺度测试的配置
标签: 配置
---
```yaml
##################################### 多尺度测试的配置 #####################################
EvalReader:
sample_transforms:
- Decode: {}
- MultiscaleTestResize: {origin_target_size: [800, 1333], target_size: [700 , 900]}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
TestReader:
sample_transforms:
- Decode: {}
- MultiscaleTestResize: {origin_target_size: [800, 1333], target_size: [700 , 900]}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
```
---
多尺度测试是一种TTA方法测试时增强可以用于提高目标检测的准确率
输入图像首先被缩放为不同尺度的图像,然后模型对这些不同尺度的图像进行预测,最后将这些不同尺度上的预测结果整合为最终预测结果。(这里使用了**NMS**来整合不同尺度的预测结果)
## _MultiscaleTestResize_ 选项
`MultiscaleTestResize` 选项用于开启多尺度测试.
`origin_target_size: [800, 1333]` 项代表输入图像首先缩放为短边为800最长边不超过1333.
`target_size: [700 , 900]` 项设置不同的预测尺度。
通过在`EvalReader.sample_transforms``TestReader.sample_transforms`中设置`MultiscaleTestResize`项,可以在评估过程或预测过程中开启多尺度测试。
---
###注意
目前多尺度测试只支持CascadeRCNN, FasterRCNN and MaskRCNN网络, 并且batch size需要是1.

View File

@@ -0,0 +1,264 @@
# YOLO系列模型参数配置教程
标签: 模型参数配置
`ppyolo_r50vd_dcn_1x_coco.yml`为例,这个模型由五个子配置文件组成:
- 数据配置文件 `coco_detection.yml`
```yaml
# 数据评估类型
metric: COCO
# 数据集的类别数
num_classes: 80
# TrainDataset
TrainDataset:
!COCODataSet
# 图像数据路径,相对 dataset_dir 路径os.path.join(dataset_dir, image_dir)
image_dir: train2017
# 标注文件路径,相对 dataset_dir 路径os.path.join(dataset_dir, anno_path)
anno_path: annotations/instances_train2017.json
# 数据文件夹
dataset_dir: dataset/coco
# data_fields
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
EvalDataset:
!COCODataSet
# 图像数据路径,相对 dataset_dir 路径os.path.join(dataset_dir, image_dir)
image_dir: val2017
# 标注文件路径,相对 dataset_dir 路径os.path.join(dataset_dir, anno_path)
anno_path: annotations/instances_val2017.json
# 数据文件夹os.path.join(dataset_dir, anno_path)
dataset_dir: dataset/coco
TestDataset:
!ImageFolder
# 标注文件路径,相对 dataset_dir 路径
anno_path: annotations/instances_val2017.json
```
- 优化器配置文件 `optimizer_1x.yml`
```yaml
# 总训练轮数
epoch: 405
# 学习率设置
LearningRate:
# 默认为8卡训学习率
base_lr: 0.01
# 学习率调整策略
schedulers:
- !PiecewiseDecay
gamma: 0.1
# 学习率变化位置(轮数)
milestones:
- 243
- 324
# Warmup
- !LinearWarmup
start_factor: 0.
steps: 4000
# 优化器
OptimizerBuilder:
# 优化器
optimizer:
momentum: 0.9
type: Momentum
# 正则化
regularizer:
factor: 0.0005
type: L2
```
- 数据读取配置文件 `ppyolo_reader.yml`
```yaml
# 每张GPU reader进程个数
worker_num: 2
# 训练数据
TrainReader:
inputs_def:
num_max_boxes: 50
# 训练数据transforms
sample_transforms:
- Decode: {}
- Mixup: {alpha: 1.5, beta: 1.5}
- RandomDistort: {}
- RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
- RandomCrop: {}
- RandomFlip: {}
# batch_transforms
batch_transforms:
- BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_size: True, random_interp: True, keep_ratio: False}
- NormalizeBox: {}
- PadBox: {num_max_boxes: 50}
- BboxXYXY2XYWH: {}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- Permute: {}
- Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]}
# 训练时batch_size
batch_size: 24
# 读取数据是否乱序
shuffle: true
# 是否丢弃最后不能完整组成batch的数据
drop_last: true
# mixup_epoch大于最大epoch表示训练过程一直使用mixup数据增广
mixup_epoch: 25000
# 是否通过共享内存进行数据读取加速,需要保证共享内存大小(如/dev/shm)满足大于1G
use_shared_memory: true
# 评估数据
EvalReader:
# 评估数据transforms
sample_transforms:
- Decode: {}
- Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- Permute: {}
# 评估时batch_size
batch_size: 8
# 测试数据
TestReader:
inputs_def:
image_shape: [3, 608, 608]
# 测试数据transforms
sample_transforms:
- Decode: {}
- Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- Permute: {}
# 测试时batch_size
batch_size: 1
```
- 模型配置文件 `ppyolo_r50vd_dcn.yml`
```yaml
# 模型结构类型
architecture: YOLOv3
# 预训练模型地址
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
# norm_type
norm_type: sync_bn
# 是否使用ema
use_ema: true
# ema_decay
ema_decay: 0.9998
# YOLOv3
YOLOv3:
# backbone
backbone: ResNet
# neck
neck: PPYOLOFPN
# yolo_head
yolo_head: YOLOv3Head
# post_process
post_process: BBoxPostProcess
# backbone
ResNet:
# depth
depth: 50
# variant
variant: d
# return_idx, 0 represent res2
return_idx: [1, 2, 3]
# dcn_v2_stages
dcn_v2_stages: [3]
# freeze_at
freeze_at: -1
# freeze_norm
freeze_norm: false
# norm_decay
norm_decay: 0.
# PPYOLOFPN
PPYOLOFPN:
# 是否coord_conv
coord_conv: true
# 是否drop_block
drop_block: true
# block_size
block_size: 3
# keep_prob
keep_prob: 0.9
# 是否spp
spp: true
# YOLOv3Head
YOLOv3Head:
# anchors
anchors: [[10, 13], [16, 30], [33, 23],
[30, 61], [62, 45], [59, 119],
[116, 90], [156, 198], [373, 326]]
# anchor_masks
anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
# loss
loss: YOLOv3Loss
# 是否使用iou_aware
iou_aware: true
# iou_aware_factor
iou_aware_factor: 0.4
# YOLOv3Loss
YOLOv3Loss:
# ignore_thresh
ignore_thresh: 0.7
# downsample
downsample: [32, 16, 8]
# 是否label_smooth
label_smooth: false
# scale_x_y
scale_x_y: 1.05
# iou_loss
iou_loss: IouLoss
# iou_aware_loss
iou_aware_loss: IouAwareLoss
# IouLoss
IouLoss:
loss_weight: 2.5
loss_square: true
# IouAwareLoss
IouAwareLoss:
loss_weight: 1.0
# BBoxPostProcess
BBoxPostProcess:
decode:
name: YOLOBox
conf_thresh: 0.01
downsample_ratio: 32
clip_bbox: true
scale_x_y: 1.05
# nms 配置
nms:
name: MatrixNMS
keep_top_k: 100
score_threshold: 0.01
post_threshold: 0.01
nms_top_k: -1
background_label: -1
```
- 运行时置文件 `runtime.yml`
```yaml
# 是否使用gpu
use_gpu: true
# 日志打印间隔
log_iter: 20
# save_dir
save_dir: output
# 模型保存间隔时间
snapshot_epoch: 1
```

View File

@@ -0,0 +1,264 @@
# YOLO series model parameter configuration tutorial
Tag: Model parameter configuration
Take `ppyolo_r50vd_dcn_1x_coco.yml` as an example, The model consists of five sub-profiles:
- Data profile `coco_detection.yml`
```yaml
# Data evaluation type
metric: COCO
# The number of categories in the dataset
num_classes: 80
# TrainDataset
TrainDataset:
!COCODataSet
# Image data path, Relative path of dataset_dir, os.path.join(dataset_dir, image_dir)
image_dir: train2017
# Annotation file path, Relative path of dataset_dir, os.path.join(dataset_dir, anno_path)
anno_path: annotations/instances_train2017.json
# data file
dataset_dir: dataset/coco
# data_fields
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
EvalDataset:
!COCODataSet
# Image data path, Relative path of dataset_dir, os.path.join(dataset_dir, image_dir)
image_dir: val2017
# Annotation file path, Relative path of dataset_dir, os.path.join(dataset_dir, anno_path)
anno_path: annotations/instances_val2017.json
# data file os.path.join(dataset_dir, anno_path)
dataset_dir: dataset/coco
TestDataset:
!ImageFolder
# Annotation file path, Relative path of dataset_dir, os.path.join(dataset_dir, anno_path)
anno_path: annotations/instances_val2017.json
```
- Optimizer configuration file `optimizer_1x.yml`
```yaml
# Total training epoches
epoch: 405
# learning rate setting
LearningRate:
# Default is 8 Gpus training learning rate
base_lr: 0.01
# Learning rate adjustment strategy
schedulers:
- !PiecewiseDecay
gamma: 0.1
# Position of change in learning rate (number of epoches)
milestones:
- 243
- 324
# Warmup
- !LinearWarmup
start_factor: 0.
steps: 4000
# Optimizer
OptimizerBuilder:
# Optimizer
optimizer:
momentum: 0.9
type: Momentum
# Regularization
regularizer:
factor: 0.0005
type: L2
```
- Data reads configuration files `ppyolo_reader.yml`
```yaml
# Number of PROCESSES per GPU Reader
worker_num: 2
# training data
TrainReader:
inputs_def:
num_max_boxes: 50
# Training data transforms
sample_transforms:
- Decode: {}
- Mixup: {alpha: 1.5, beta: 1.5}
- RandomDistort: {}
- RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
- RandomCrop: {}
- RandomFlip: {}
# batch_transforms
batch_transforms:
- BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_size: True, random_interp: True, keep_ratio: False}
- NormalizeBox: {}
- PadBox: {num_max_boxes: 50}
- BboxXYXY2XYWH: {}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- Permute: {}
- Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]}
# Batch size during training
batch_size: 24
# Read data is out of order
shuffle: true
# Whether to discard data that does not complete the batch
drop_last: true
# mixup_epochGreater than maximum epoch, Indicates that the training process has been augmented with mixup data
mixup_epoch: 25000
# Whether to use the shared memory to accelerate data reading, ensure that the shared memory size (such as /dev/shm) is greater than 1 GB
use_shared_memory: true
# Evaluate data
EvalReader:
# Evaluating data transforms
sample_transforms:
- Decode: {}
- Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- Permute: {}
# Batch_size during evaluation
batch_size: 8
# test data
TestReader:
inputs_def:
image_shape: [3, 608, 608]
# test data transforms
sample_transforms:
- Decode: {}
- Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
- Permute: {}
# batch_size during training
batch_size: 1
```
- Model profile `ppyolo_r50vd_dcn.yml`
```yaml
# Model structure type
architecture: YOLOv3
# Pretrain model address
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
# norm_type
norm_type: sync_bn
# Whether to use EMA
use_ema: true
# ema_decay
ema_decay: 0.9998
# YOLOv3
YOLOv3:
# backbone
backbone: ResNet
# neck
neck: PPYOLOFPN
# yolo_head
yolo_head: YOLOv3Head
# post_process
post_process: BBoxPostProcess
# backbone
ResNet:
# depth
depth: 50
# variant
variant: d
# return_idx, 0 represent res2
return_idx: [1, 2, 3]
# dcn_v2_stages
dcn_v2_stages: [3]
# freeze_at
freeze_at: -1
# freeze_norm
freeze_norm: false
# norm_decay
norm_decay: 0.
# PPYOLOFPN
PPYOLOFPN:
# whether coord_conv or not
coord_conv: true
# whether drop_block or not
drop_block: true
# block_size
block_size: 3
# keep_prob
keep_prob: 0.9
# whether spp or not
spp: true
# YOLOv3Head
YOLOv3Head:
# anchors
anchors: [[10, 13], [16, 30], [33, 23],
[30, 61], [62, 45], [59, 119],
[116, 90], [156, 198], [373, 326]]
# anchor_masks
anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
# loss
loss: YOLOv3Loss
# whether to use iou_aware
iou_aware: true
# iou_aware_factor
iou_aware_factor: 0.4
# YOLOv3Loss
YOLOv3Loss:
# ignore_thresh
ignore_thresh: 0.7
# downsample
downsample: [32, 16, 8]
# whether label_smooth or not
label_smooth: false
# scale_x_y
scale_x_y: 1.05
# iou_loss
iou_loss: IouLoss
# iou_aware_loss
iou_aware_loss: IouAwareLoss
# IouLoss
IouLoss:
loss_weight: 2.5
loss_square: true
# IouAwareLoss
IouAwareLoss:
loss_weight: 1.0
# BBoxPostProcess
BBoxPostProcess:
decode:
name: YOLOBox
conf_thresh: 0.01
downsample_ratio: 32
clip_bbox: true
scale_x_y: 1.05
# nms setting
nms:
name: MatrixNMS
keep_top_k: 100
score_threshold: 0.01
post_threshold: 0.01
nms_top_k: -1
background_label: -1
```
- Runtime file `runtime.yml`
```yaml
# Whether to use gpu
use_gpu: true
# Log Printing interval
log_iter: 20
# save_dir
save_dir: output
# Model save interval
snapshot_epoch: 1
```