更换文档检测模型
This commit is contained in:
@@ -0,0 +1,261 @@
|
||||
# RCNN系列模型参数配置教程
|
||||
|
||||
标签: 模型参数配置
|
||||
|
||||
以`faster_rcnn_r50_fpn_1x_coco.yml`为例,这个模型由五个子配置文件组成:
|
||||
|
||||
- 数据配置文件 `coco_detection.yml`
|
||||
|
||||
```yaml
|
||||
# 数据评估类型
|
||||
metric: COCO
|
||||
# 数据集的类别数
|
||||
num_classes: 80
|
||||
|
||||
# TrainDataset
|
||||
TrainDataset:
|
||||
!COCODataSet
|
||||
# 图像数据路径,相对 dataset_dir 路径,os.path.join(dataset_dir, image_dir)
|
||||
image_dir: train2017
|
||||
# 标注文件路径,相对 dataset_dir 路径,os.path.join(dataset_dir, anno_path)
|
||||
anno_path: annotations/instances_train2017.json
|
||||
# 数据文件夹
|
||||
dataset_dir: dataset/coco
|
||||
# data_fields
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
|
||||
|
||||
EvalDataset:
|
||||
!COCODataSet
|
||||
# 图像数据路径,相对 dataset_dir 路径,os.path.join(dataset_dir, image_dir)
|
||||
image_dir: val2017
|
||||
# 标注文件路径,相对 dataset_dir 路径,os.path.join(dataset_dir, anno_path)
|
||||
anno_path: annotations/instances_val2017.json
|
||||
# 数据文件夹
|
||||
dataset_dir: dataset/coco
|
||||
|
||||
TestDataset:
|
||||
!ImageFolder
|
||||
# 标注文件路径,相对 dataset_dir 路径,os.path.join(dataset_dir, anno_path)
|
||||
anno_path: annotations/instances_val2017.json
|
||||
```
|
||||
|
||||
- 优化器配置文件 `optimizer_1x.yml`
|
||||
|
||||
```yaml
|
||||
# 总训练轮数
|
||||
epoch: 12
|
||||
|
||||
# 学习率设置
|
||||
LearningRate:
|
||||
# 默认为8卡训学习率
|
||||
base_lr: 0.01
|
||||
# 学习率调整策略
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
# 学习率变化位置(轮数)
|
||||
milestones: [8, 11]
|
||||
- !LinearWarmup
|
||||
start_factor: 0.1
|
||||
steps: 1000
|
||||
|
||||
# 优化器
|
||||
OptimizerBuilder:
|
||||
# 优化器
|
||||
optimizer:
|
||||
momentum: 0.9
|
||||
type: Momentum
|
||||
# 正则化
|
||||
regularizer:
|
||||
factor: 0.0001
|
||||
type: L2
|
||||
```
|
||||
|
||||
- 数据读取配置文件 `faster_fpn_reader.yml`
|
||||
|
||||
```yaml
|
||||
# 每张GPU reader进程个数
|
||||
worker_num: 2
|
||||
# 训练数据
|
||||
TrainReader:
|
||||
# 训练数据transforms
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
|
||||
- RandomFlip: {prob: 0.5}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
# 由于模型存在FPN结构,输入图片需要padding为32的倍数
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
# 训练时batch_size
|
||||
batch_size: 1
|
||||
# 读取数据是否乱序
|
||||
shuffle: true
|
||||
# 是否丢弃最后不能完整组成batch的数据
|
||||
drop_last: true
|
||||
# 表示reader是否对gt进行组batch的操作,在rcnn系列算法中设置为false,得到的gt格式为list[Tensor]
|
||||
collate_batch: false
|
||||
|
||||
# 评估数据
|
||||
EvalReader:
|
||||
# 评估数据transforms
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
# 由于模型存在FPN结构,输入图片需要padding为32的倍数
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
# 评估时batch_size
|
||||
batch_size: 1
|
||||
# 读取数据是否乱序
|
||||
shuffle: false
|
||||
# 是否丢弃最后不能完整组成batch的数据
|
||||
drop_last: false
|
||||
|
||||
# 测试数据
|
||||
TestReader:
|
||||
# 测试数据transforms
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
# 由于模型存在FPN结构,输入图片需要padding为32的倍数
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
# 测试时batch_size
|
||||
batch_size: 1
|
||||
# 读取数据是否乱序
|
||||
shuffle: false
|
||||
# 是否丢弃最后不能完整组成batch的数据
|
||||
drop_last: false
|
||||
```
|
||||
|
||||
- 模型配置文件 `faster_rcnn_r50_fpn.yml`
|
||||
|
||||
```yaml
|
||||
# 模型结构类型
|
||||
architecture: FasterRCNN
|
||||
# 预训练模型地址
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
|
||||
|
||||
# FasterRCNN
|
||||
FasterRCNN:
|
||||
# backbone
|
||||
backbone: ResNet
|
||||
# neck
|
||||
neck: FPN
|
||||
# rpn_head
|
||||
rpn_head: RPNHead
|
||||
# bbox_head
|
||||
bbox_head: BBoxHead
|
||||
# post process
|
||||
bbox_post_process: BBoxPostProcess
|
||||
|
||||
|
||||
# backbone
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
# norm_type,可设置参数:bn 或 sync_bn
|
||||
norm_type: bn
|
||||
# freeze_at index, 0 represent res2
|
||||
freeze_at: 0
|
||||
# return_idx
|
||||
return_idx: [0,1,2,3]
|
||||
# num_stages
|
||||
num_stages: 4
|
||||
|
||||
# FPN
|
||||
FPN:
|
||||
# channel of FPN
|
||||
out_channel: 256
|
||||
|
||||
# RPNHead
|
||||
RPNHead:
|
||||
# anchor generator
|
||||
anchor_generator:
|
||||
aspect_ratios: [0.5, 1.0, 2.0]
|
||||
anchor_sizes: [[32], [64], [128], [256], [512]]
|
||||
strides: [4, 8, 16, 32, 64]
|
||||
# rpn_target_assign
|
||||
rpn_target_assign:
|
||||
batch_size_per_im: 256
|
||||
fg_fraction: 0.5
|
||||
negative_overlap: 0.3
|
||||
positive_overlap: 0.7
|
||||
use_random: True
|
||||
# 训练时生成proposal的参数
|
||||
train_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 2000
|
||||
post_nms_top_n: 1000
|
||||
topk_after_collect: True
|
||||
# 评估时生成proposal的参数
|
||||
test_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 1000
|
||||
post_nms_top_n: 1000
|
||||
|
||||
# BBoxHead
|
||||
BBoxHead:
|
||||
# TwoFCHead as BBoxHead
|
||||
head: TwoFCHead
|
||||
# roi align
|
||||
roi_extractor:
|
||||
resolution: 7
|
||||
sampling_ratio: 0
|
||||
aligned: True
|
||||
# bbox_assigner
|
||||
bbox_assigner: BBoxAssigner
|
||||
|
||||
# BBoxAssigner
|
||||
BBoxAssigner:
|
||||
# batch_size_per_im
|
||||
batch_size_per_im: 512
|
||||
# 背景阈值
|
||||
bg_thresh: 0.5
|
||||
# 前景阈值
|
||||
fg_thresh: 0.5
|
||||
# 前景比例
|
||||
fg_fraction: 0.25
|
||||
# 是否随机采样
|
||||
use_random: True
|
||||
|
||||
# TwoFCHead
|
||||
TwoFCHead:
|
||||
# TwoFCHead特征维度
|
||||
out_channel: 1024
|
||||
|
||||
|
||||
# BBoxPostProcess
|
||||
BBoxPostProcess:
|
||||
# 解码
|
||||
decode: RCNNBox
|
||||
# nms
|
||||
nms:
|
||||
# 使用MultiClassNMS
|
||||
name: MultiClassNMS
|
||||
keep_top_k: 100
|
||||
score_threshold: 0.05
|
||||
nms_threshold: 0.5
|
||||
|
||||
```
|
||||
|
||||
- 运行时置文件 `runtime.yml`
|
||||
|
||||
```yaml
|
||||
# 是否使用gpu
|
||||
use_gpu: true
|
||||
# 日志打印间隔
|
||||
log_iter: 20
|
||||
# save_dir
|
||||
save_dir: output
|
||||
# 模型保存间隔时间
|
||||
snapshot_epoch: 1
|
||||
```
|
||||
@@ -0,0 +1,261 @@
|
||||
# RCNN series model parameter configuration tutorial
|
||||
|
||||
Tag: Model parameter configuration
|
||||
|
||||
Take `faster_rcnn_r50_fpn_1x_coco.yml` as an example. The model consists of five sub-profiles:
|
||||
|
||||
- Data profile `coco_detection.yml`
|
||||
|
||||
```yaml
|
||||
# Data evaluation type
|
||||
metric: COCO
|
||||
# The number of categories in the dataset
|
||||
num_classes: 80
|
||||
|
||||
# TrainDataset
|
||||
TrainDataset:
|
||||
!COCODataSet
|
||||
# Image data path, Relative path of dataset_dir, os.path.join(dataset_dir, image_dir)
|
||||
image_dir: train2017
|
||||
# Annotation file path, Relative path of dataset_dir, os.path.join(dataset_dir, anno_path)
|
||||
anno_path: annotations/instances_train2017.json
|
||||
# data file
|
||||
dataset_dir: dataset/coco
|
||||
# data_fields
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
|
||||
|
||||
EvalDataset:
|
||||
!COCODataSet
|
||||
# Image data path, Relative path of dataset_dir, os.path.join(dataset_dir, image_dir)
|
||||
image_dir: val2017
|
||||
# Annotation file path, Relative path of dataset_dir, os.path.join(dataset_dir, anno_path)
|
||||
anno_path: annotations/instances_val2017.json
|
||||
# data file file os.path.join(dataset_dir, anno_path)
|
||||
dataset_dir: dataset/coco
|
||||
|
||||
TestDataset:
|
||||
!ImageFolder
|
||||
# Annotation file path, Relative path of dataset_dir, os.path.join(dataset_dir, anno_path)
|
||||
anno_path: annotations/instances_val2017.json
|
||||
```
|
||||
|
||||
- Optimizer configuration file `optimizer_1x.yml`
|
||||
|
||||
```yaml
|
||||
# Total training epoches
|
||||
epoch: 12
|
||||
|
||||
# learning rate setting
|
||||
LearningRate:
|
||||
# Default is 8 Gpus training learning rate
|
||||
base_lr: 0.01
|
||||
# Learning rate adjustment strategy
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
# Position of change in learning rate (number of epoches)
|
||||
milestones: [8, 11]
|
||||
- !LinearWarmup
|
||||
start_factor: 0.1
|
||||
steps: 1000
|
||||
|
||||
# Optimizer
|
||||
OptimizerBuilder:
|
||||
# Optimizer
|
||||
optimizer:
|
||||
momentum: 0.9
|
||||
type: Momentum
|
||||
# Regularization
|
||||
regularizer:
|
||||
factor: 0.0001
|
||||
type: L2
|
||||
```
|
||||
|
||||
- Data reads configuration files `faster_fpn_reader.yml`
|
||||
|
||||
```yaml
|
||||
# Number of PROCESSES per GPU Reader
|
||||
worker_num: 2
|
||||
# training data
|
||||
TrainReader:
|
||||
# Training data transforms
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
|
||||
- RandomFlip: {prob: 0.5}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
# Since the model has FPN structure, the input image needs a multiple of 32 padding
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
# Batch_size during training
|
||||
batch_size: 1
|
||||
# Read data is out of order
|
||||
shuffle: true
|
||||
# Whether to discard data that does not complete the batch
|
||||
drop_last: true
|
||||
# Set it to false. Then you have a sequence of values for GT: List [Tensor]
|
||||
collate_batch: false
|
||||
|
||||
# Evaluate data
|
||||
EvalReader:
|
||||
# Evaluate data transforms
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
# Since the model has FPN structure, the input image needs a multiple of 32 padding
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
# batch_size of evaluation
|
||||
batch_size: 1
|
||||
# Read data is out of order
|
||||
shuffle: false
|
||||
# Whether to discard data that does not complete the batch
|
||||
drop_last: false
|
||||
|
||||
# test data
|
||||
TestReader:
|
||||
# test data transforms
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
# Since the model has FPN structure, the input image needs a multiple of 32 padding
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
# batch_size of test
|
||||
batch_size: 1
|
||||
# Read data is out of order
|
||||
shuffle: false
|
||||
# Whether to discard data that does not complete the batch
|
||||
drop_last: false
|
||||
```
|
||||
|
||||
- Model profile `faster_rcnn_r50_fpn.yml`
|
||||
|
||||
```yaml
|
||||
# Model structure type
|
||||
architecture: FasterRCNN
|
||||
# Pretrain model address
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
|
||||
|
||||
# FasterRCNN
|
||||
FasterRCNN:
|
||||
# backbone
|
||||
backbone: ResNet
|
||||
# neck
|
||||
neck: FPN
|
||||
# rpn_head
|
||||
rpn_head: RPNHead
|
||||
# bbox_head
|
||||
bbox_head: BBoxHead
|
||||
# post process
|
||||
bbox_post_process: BBoxPostProcess
|
||||
|
||||
|
||||
# backbone
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
# norm_type, Configurable parameter: bn or sync_bn
|
||||
norm_type: bn
|
||||
# freeze_at index, 0 represent res2
|
||||
freeze_at: 0
|
||||
# return_idx
|
||||
return_idx: [0,1,2,3]
|
||||
# num_stages
|
||||
num_stages: 4
|
||||
|
||||
# FPN
|
||||
FPN:
|
||||
# channel of FPN
|
||||
out_channel: 256
|
||||
|
||||
# RPNHead
|
||||
RPNHead:
|
||||
# anchor generator
|
||||
anchor_generator:
|
||||
aspect_ratios: [0.5, 1.0, 2.0]
|
||||
anchor_sizes: [[32], [64], [128], [256], [512]]
|
||||
strides: [4, 8, 16, 32, 64]
|
||||
# rpn_target_assign
|
||||
rpn_target_assign:
|
||||
batch_size_per_im: 256
|
||||
fg_fraction: 0.5
|
||||
negative_overlap: 0.3
|
||||
positive_overlap: 0.7
|
||||
use_random: True
|
||||
# The parameters of the proposal are generated during training
|
||||
train_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 2000
|
||||
post_nms_top_n: 1000
|
||||
topk_after_collect: True
|
||||
# The parameters of the proposal are generated during evaluation
|
||||
test_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 1000
|
||||
post_nms_top_n: 1000
|
||||
|
||||
# BBoxHead
|
||||
BBoxHead:
|
||||
# TwoFCHead as BBoxHead
|
||||
head: TwoFCHead
|
||||
# roi align
|
||||
roi_extractor:
|
||||
resolution: 7
|
||||
sampling_ratio: 0
|
||||
aligned: True
|
||||
# bbox_assigner
|
||||
bbox_assigner: BBoxAssigner
|
||||
|
||||
# BBoxAssigner
|
||||
BBoxAssigner:
|
||||
# batch_size_per_im
|
||||
batch_size_per_im: 512
|
||||
# Background the threshold
|
||||
bg_thresh: 0.5
|
||||
# Prospects for threshold
|
||||
fg_thresh: 0.5
|
||||
# Prospects of proportion
|
||||
fg_fraction: 0.25
|
||||
# Random sampling
|
||||
use_random: True
|
||||
|
||||
# TwoFCHead
|
||||
TwoFCHead:
|
||||
# TwoFCHead feature dimension
|
||||
out_channel: 1024
|
||||
|
||||
|
||||
# BBoxPostProcess
|
||||
BBoxPostProcess:
|
||||
# decode
|
||||
decode: RCNNBox
|
||||
# nms
|
||||
nms:
|
||||
# use MultiClassNMS
|
||||
name: MultiClassNMS
|
||||
keep_top_k: 100
|
||||
score_threshold: 0.05
|
||||
nms_threshold: 0.5
|
||||
|
||||
```
|
||||
|
||||
- runtime configuration file `runtime.yml`
|
||||
|
||||
```yaml
|
||||
# Whether to use gpu
|
||||
use_gpu: true
|
||||
# Log Printing interval
|
||||
log_iter: 20
|
||||
# save_dir
|
||||
save_dir: output
|
||||
# Model save interval
|
||||
snapshot_epoch: 1
|
||||
```
|
||||
@@ -0,0 +1,45 @@
|
||||
# Multi Scale Test Configuration
|
||||
|
||||
Tags: Configuration
|
||||
|
||||
---
|
||||
```yaml
|
||||
|
||||
##################################### Multi scale test configuration #####################################
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- MultiscaleTestResize: {origin_target_size: [800, 1333], target_size: [700 , 900]}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- MultiscaleTestResize: {origin_target_size: [800, 1333], target_size: [700 , 900]}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
Multi Scale Test is a TTA (Test Time Augmentation) method, it can improve object detection performance.
|
||||
|
||||
The input image will be scaled into different scales, then model generated predictions (bboxes) at different scales, finally all the predictions will be combined to generate final prediction. (Here **NMS** is used to aggregate the predictions.)
|
||||
|
||||
## _MultiscaleTestResize_ option
|
||||
|
||||
`MultiscaleTestResize` option is used to enable multi scale test prediction.
|
||||
|
||||
`origin_target_size: [800, 1333]` means the input image will be scaled to 800 (for short edge) and 1333 (max edge length cannot be greater than 1333) at first
|
||||
|
||||
`target_size: [700 , 900]` property is used to specify different scales.
|
||||
|
||||
It can be plugged into evaluation process or test (inference) process, by adding `MultiscaleTestResize` entry to `EvalReader.sample_transforms` or `TestReader.sample_transforms`
|
||||
|
||||
---
|
||||
|
||||
###Note
|
||||
|
||||
Now only CascadeRCNN, FasterRCNN and MaskRCNN are supported for multi scale testing. And batch size must be 1.
|
||||
@@ -0,0 +1,45 @@
|
||||
# 多尺度测试的配置
|
||||
|
||||
标签: 配置
|
||||
|
||||
---
|
||||
```yaml
|
||||
|
||||
##################################### 多尺度测试的配置 #####################################
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- MultiscaleTestResize: {origin_target_size: [800, 1333], target_size: [700 , 900]}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- MultiscaleTestResize: {origin_target_size: [800, 1333], target_size: [700 , 900]}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
多尺度测试是一种TTA方法(测试时增强),可以用于提高目标检测的准确率
|
||||
|
||||
输入图像首先被缩放为不同尺度的图像,然后模型对这些不同尺度的图像进行预测,最后将这些不同尺度上的预测结果整合为最终预测结果。(这里使用了**NMS**来整合不同尺度的预测结果)
|
||||
|
||||
## _MultiscaleTestResize_ 选项
|
||||
|
||||
`MultiscaleTestResize` 选项用于开启多尺度测试.
|
||||
|
||||
`origin_target_size: [800, 1333]` 项代表输入图像首先缩放为短边为800,最长边不超过1333.
|
||||
|
||||
`target_size: [700 , 900]` 项设置不同的预测尺度。
|
||||
|
||||
通过在`EvalReader.sample_transforms`或`TestReader.sample_transforms`中设置`MultiscaleTestResize`项,可以在评估过程或预测过程中开启多尺度测试。
|
||||
|
||||
---
|
||||
|
||||
###注意
|
||||
|
||||
目前多尺度测试只支持CascadeRCNN, FasterRCNN and MaskRCNN网络, 并且batch size需要是1.
|
||||
@@ -0,0 +1,264 @@
|
||||
# YOLO系列模型参数配置教程
|
||||
|
||||
标签: 模型参数配置
|
||||
|
||||
以`ppyolo_r50vd_dcn_1x_coco.yml`为例,这个模型由五个子配置文件组成:
|
||||
|
||||
- 数据配置文件 `coco_detection.yml`
|
||||
|
||||
```yaml
|
||||
# 数据评估类型
|
||||
metric: COCO
|
||||
# 数据集的类别数
|
||||
num_classes: 80
|
||||
|
||||
# TrainDataset
|
||||
TrainDataset:
|
||||
!COCODataSet
|
||||
# 图像数据路径,相对 dataset_dir 路径,os.path.join(dataset_dir, image_dir)
|
||||
image_dir: train2017
|
||||
# 标注文件路径,相对 dataset_dir 路径,os.path.join(dataset_dir, anno_path)
|
||||
anno_path: annotations/instances_train2017.json
|
||||
# 数据文件夹
|
||||
dataset_dir: dataset/coco
|
||||
# data_fields
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
|
||||
|
||||
EvalDataset:
|
||||
!COCODataSet
|
||||
# 图像数据路径,相对 dataset_dir 路径,os.path.join(dataset_dir, image_dir)
|
||||
image_dir: val2017
|
||||
# 标注文件路径,相对 dataset_dir 路径,os.path.join(dataset_dir, anno_path)
|
||||
anno_path: annotations/instances_val2017.json
|
||||
# 数据文件夹,os.path.join(dataset_dir, anno_path)
|
||||
dataset_dir: dataset/coco
|
||||
|
||||
TestDataset:
|
||||
!ImageFolder
|
||||
# 标注文件路径,相对 dataset_dir 路径
|
||||
anno_path: annotations/instances_val2017.json
|
||||
```
|
||||
|
||||
- 优化器配置文件 `optimizer_1x.yml`
|
||||
|
||||
```yaml
|
||||
# 总训练轮数
|
||||
epoch: 405
|
||||
|
||||
# 学习率设置
|
||||
LearningRate:
|
||||
# 默认为8卡训学习率
|
||||
base_lr: 0.01
|
||||
# 学习率调整策略
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
# 学习率变化位置(轮数)
|
||||
milestones:
|
||||
- 243
|
||||
- 324
|
||||
# Warmup
|
||||
- !LinearWarmup
|
||||
start_factor: 0.
|
||||
steps: 4000
|
||||
|
||||
# 优化器
|
||||
OptimizerBuilder:
|
||||
# 优化器
|
||||
optimizer:
|
||||
momentum: 0.9
|
||||
type: Momentum
|
||||
# 正则化
|
||||
regularizer:
|
||||
factor: 0.0005
|
||||
type: L2
|
||||
```
|
||||
|
||||
- 数据读取配置文件 `ppyolo_reader.yml`
|
||||
|
||||
```yaml
|
||||
# 每张GPU reader进程个数
|
||||
worker_num: 2
|
||||
# 训练数据
|
||||
TrainReader:
|
||||
inputs_def:
|
||||
num_max_boxes: 50
|
||||
# 训练数据transforms
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Mixup: {alpha: 1.5, beta: 1.5}
|
||||
- RandomDistort: {}
|
||||
- RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
|
||||
- RandomCrop: {}
|
||||
- RandomFlip: {}
|
||||
# batch_transforms
|
||||
batch_transforms:
|
||||
- BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_size: True, random_interp: True, keep_ratio: False}
|
||||
- NormalizeBox: {}
|
||||
- PadBox: {num_max_boxes: 50}
|
||||
- BboxXYXY2XYWH: {}
|
||||
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
|
||||
- Permute: {}
|
||||
- Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]}
|
||||
# 训练时batch_size
|
||||
batch_size: 24
|
||||
# 读取数据是否乱序
|
||||
shuffle: true
|
||||
# 是否丢弃最后不能完整组成batch的数据
|
||||
drop_last: true
|
||||
# mixup_epoch,大于最大epoch,表示训练过程一直使用mixup数据增广
|
||||
mixup_epoch: 25000
|
||||
# 是否通过共享内存进行数据读取加速,需要保证共享内存大小(如/dev/shm)满足大于1G
|
||||
use_shared_memory: true
|
||||
|
||||
# 评估数据
|
||||
EvalReader:
|
||||
# 评估数据transforms
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
|
||||
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
|
||||
- Permute: {}
|
||||
# 评估时batch_size
|
||||
batch_size: 8
|
||||
|
||||
# 测试数据
|
||||
TestReader:
|
||||
inputs_def:
|
||||
image_shape: [3, 608, 608]
|
||||
# 测试数据transforms
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
|
||||
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
|
||||
- Permute: {}
|
||||
# 测试时batch_size
|
||||
batch_size: 1
|
||||
```
|
||||
|
||||
- 模型配置文件 `ppyolo_r50vd_dcn.yml`
|
||||
|
||||
```yaml
|
||||
# 模型结构类型
|
||||
architecture: YOLOv3
|
||||
# 预训练模型地址
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
|
||||
# norm_type
|
||||
norm_type: sync_bn
|
||||
# 是否使用ema
|
||||
use_ema: true
|
||||
# ema_decay
|
||||
ema_decay: 0.9998
|
||||
|
||||
# YOLOv3
|
||||
YOLOv3:
|
||||
# backbone
|
||||
backbone: ResNet
|
||||
# neck
|
||||
neck: PPYOLOFPN
|
||||
# yolo_head
|
||||
yolo_head: YOLOv3Head
|
||||
# post_process
|
||||
post_process: BBoxPostProcess
|
||||
|
||||
|
||||
# backbone
|
||||
ResNet:
|
||||
# depth
|
||||
depth: 50
|
||||
# variant
|
||||
variant: d
|
||||
# return_idx, 0 represent res2
|
||||
return_idx: [1, 2, 3]
|
||||
# dcn_v2_stages
|
||||
dcn_v2_stages: [3]
|
||||
# freeze_at
|
||||
freeze_at: -1
|
||||
# freeze_norm
|
||||
freeze_norm: false
|
||||
# norm_decay
|
||||
norm_decay: 0.
|
||||
|
||||
# PPYOLOFPN
|
||||
PPYOLOFPN:
|
||||
# 是否coord_conv
|
||||
coord_conv: true
|
||||
# 是否drop_block
|
||||
drop_block: true
|
||||
# block_size
|
||||
block_size: 3
|
||||
# keep_prob
|
||||
keep_prob: 0.9
|
||||
# 是否spp
|
||||
spp: true
|
||||
|
||||
# YOLOv3Head
|
||||
YOLOv3Head:
|
||||
# anchors
|
||||
anchors: [[10, 13], [16, 30], [33, 23],
|
||||
[30, 61], [62, 45], [59, 119],
|
||||
[116, 90], [156, 198], [373, 326]]
|
||||
# anchor_masks
|
||||
anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
|
||||
# loss
|
||||
loss: YOLOv3Loss
|
||||
# 是否使用iou_aware
|
||||
iou_aware: true
|
||||
# iou_aware_factor
|
||||
iou_aware_factor: 0.4
|
||||
|
||||
# YOLOv3Loss
|
||||
YOLOv3Loss:
|
||||
# ignore_thresh
|
||||
ignore_thresh: 0.7
|
||||
# downsample
|
||||
downsample: [32, 16, 8]
|
||||
# 是否label_smooth
|
||||
label_smooth: false
|
||||
# scale_x_y
|
||||
scale_x_y: 1.05
|
||||
# iou_loss
|
||||
iou_loss: IouLoss
|
||||
# iou_aware_loss
|
||||
iou_aware_loss: IouAwareLoss
|
||||
|
||||
# IouLoss
|
||||
IouLoss:
|
||||
loss_weight: 2.5
|
||||
loss_square: true
|
||||
|
||||
# IouAwareLoss
|
||||
IouAwareLoss:
|
||||
loss_weight: 1.0
|
||||
|
||||
# BBoxPostProcess
|
||||
BBoxPostProcess:
|
||||
decode:
|
||||
name: YOLOBox
|
||||
conf_thresh: 0.01
|
||||
downsample_ratio: 32
|
||||
clip_bbox: true
|
||||
scale_x_y: 1.05
|
||||
# nms 配置
|
||||
nms:
|
||||
name: MatrixNMS
|
||||
keep_top_k: 100
|
||||
score_threshold: 0.01
|
||||
post_threshold: 0.01
|
||||
nms_top_k: -1
|
||||
background_label: -1
|
||||
|
||||
```
|
||||
|
||||
- 运行时置文件 `runtime.yml`
|
||||
|
||||
```yaml
|
||||
# 是否使用gpu
|
||||
use_gpu: true
|
||||
# 日志打印间隔
|
||||
log_iter: 20
|
||||
# save_dir
|
||||
save_dir: output
|
||||
# 模型保存间隔时间
|
||||
snapshot_epoch: 1
|
||||
```
|
||||
@@ -0,0 +1,264 @@
|
||||
# YOLO series model parameter configuration tutorial
|
||||
|
||||
Tag: Model parameter configuration
|
||||
|
||||
Take `ppyolo_r50vd_dcn_1x_coco.yml` as an example, The model consists of five sub-profiles:
|
||||
|
||||
- Data profile `coco_detection.yml`
|
||||
|
||||
```yaml
|
||||
# Data evaluation type
|
||||
metric: COCO
|
||||
# The number of categories in the dataset
|
||||
num_classes: 80
|
||||
|
||||
# TrainDataset
|
||||
TrainDataset:
|
||||
!COCODataSet
|
||||
# Image data path, Relative path of dataset_dir, os.path.join(dataset_dir, image_dir)
|
||||
image_dir: train2017
|
||||
# Annotation file path, Relative path of dataset_dir, os.path.join(dataset_dir, anno_path)
|
||||
anno_path: annotations/instances_train2017.json
|
||||
# data file
|
||||
dataset_dir: dataset/coco
|
||||
# data_fields
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
|
||||
|
||||
EvalDataset:
|
||||
!COCODataSet
|
||||
# Image data path, Relative path of dataset_dir, os.path.join(dataset_dir, image_dir)
|
||||
image_dir: val2017
|
||||
# Annotation file path, Relative path of dataset_dir, os.path.join(dataset_dir, anno_path)
|
||||
anno_path: annotations/instances_val2017.json
|
||||
# data file os.path.join(dataset_dir, anno_path)
|
||||
dataset_dir: dataset/coco
|
||||
|
||||
TestDataset:
|
||||
!ImageFolder
|
||||
# Annotation file path, Relative path of dataset_dir, os.path.join(dataset_dir, anno_path)
|
||||
anno_path: annotations/instances_val2017.json
|
||||
```
|
||||
|
||||
- Optimizer configuration file `optimizer_1x.yml`
|
||||
|
||||
```yaml
|
||||
# Total training epoches
|
||||
epoch: 405
|
||||
|
||||
# learning rate setting
|
||||
LearningRate:
|
||||
# Default is 8 Gpus training learning rate
|
||||
base_lr: 0.01
|
||||
# Learning rate adjustment strategy
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
# Position of change in learning rate (number of epoches)
|
||||
milestones:
|
||||
- 243
|
||||
- 324
|
||||
# Warmup
|
||||
- !LinearWarmup
|
||||
start_factor: 0.
|
||||
steps: 4000
|
||||
|
||||
# Optimizer
|
||||
OptimizerBuilder:
|
||||
# Optimizer
|
||||
optimizer:
|
||||
momentum: 0.9
|
||||
type: Momentum
|
||||
# Regularization
|
||||
regularizer:
|
||||
factor: 0.0005
|
||||
type: L2
|
||||
```
|
||||
|
||||
- Data reads configuration files `ppyolo_reader.yml`
|
||||
|
||||
```yaml
|
||||
# Number of PROCESSES per GPU Reader
|
||||
worker_num: 2
|
||||
# training data
|
||||
TrainReader:
|
||||
inputs_def:
|
||||
num_max_boxes: 50
|
||||
# Training data transforms
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Mixup: {alpha: 1.5, beta: 1.5}
|
||||
- RandomDistort: {}
|
||||
- RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
|
||||
- RandomCrop: {}
|
||||
- RandomFlip: {}
|
||||
# batch_transforms
|
||||
batch_transforms:
|
||||
- BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_size: True, random_interp: True, keep_ratio: False}
|
||||
- NormalizeBox: {}
|
||||
- PadBox: {num_max_boxes: 50}
|
||||
- BboxXYXY2XYWH: {}
|
||||
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
|
||||
- Permute: {}
|
||||
- Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]}
|
||||
# Batch size during training
|
||||
batch_size: 24
|
||||
# Read data is out of order
|
||||
shuffle: true
|
||||
# Whether to discard data that does not complete the batch
|
||||
drop_last: true
|
||||
# mixup_epoch,Greater than maximum epoch, Indicates that the training process has been augmented with mixup data
|
||||
mixup_epoch: 25000
|
||||
# Whether to use the shared memory to accelerate data reading, ensure that the shared memory size (such as /dev/shm) is greater than 1 GB
|
||||
use_shared_memory: true
|
||||
|
||||
# Evaluate data
|
||||
EvalReader:
|
||||
# Evaluating data transforms
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
|
||||
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
|
||||
- Permute: {}
|
||||
# Batch_size during evaluation
|
||||
batch_size: 8
|
||||
|
||||
# test data
|
||||
TestReader:
|
||||
inputs_def:
|
||||
image_shape: [3, 608, 608]
|
||||
# test data transforms
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
|
||||
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
|
||||
- Permute: {}
|
||||
# batch_size during training
|
||||
batch_size: 1
|
||||
```
|
||||
|
||||
- Model profile `ppyolo_r50vd_dcn.yml`
|
||||
|
||||
```yaml
|
||||
# Model structure type
|
||||
architecture: YOLOv3
|
||||
# Pretrain model address
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
|
||||
# norm_type
|
||||
norm_type: sync_bn
|
||||
# Whether to use EMA
|
||||
use_ema: true
|
||||
# ema_decay
|
||||
ema_decay: 0.9998
|
||||
|
||||
# YOLOv3
|
||||
YOLOv3:
|
||||
# backbone
|
||||
backbone: ResNet
|
||||
# neck
|
||||
neck: PPYOLOFPN
|
||||
# yolo_head
|
||||
yolo_head: YOLOv3Head
|
||||
# post_process
|
||||
post_process: BBoxPostProcess
|
||||
|
||||
|
||||
# backbone
|
||||
ResNet:
|
||||
# depth
|
||||
depth: 50
|
||||
# variant
|
||||
variant: d
|
||||
# return_idx, 0 represent res2
|
||||
return_idx: [1, 2, 3]
|
||||
# dcn_v2_stages
|
||||
dcn_v2_stages: [3]
|
||||
# freeze_at
|
||||
freeze_at: -1
|
||||
# freeze_norm
|
||||
freeze_norm: false
|
||||
# norm_decay
|
||||
norm_decay: 0.
|
||||
|
||||
# PPYOLOFPN
|
||||
PPYOLOFPN:
|
||||
# whether coord_conv or not
|
||||
coord_conv: true
|
||||
# whether drop_block or not
|
||||
drop_block: true
|
||||
# block_size
|
||||
block_size: 3
|
||||
# keep_prob
|
||||
keep_prob: 0.9
|
||||
# whether spp or not
|
||||
spp: true
|
||||
|
||||
# YOLOv3Head
|
||||
YOLOv3Head:
|
||||
# anchors
|
||||
anchors: [[10, 13], [16, 30], [33, 23],
|
||||
[30, 61], [62, 45], [59, 119],
|
||||
[116, 90], [156, 198], [373, 326]]
|
||||
# anchor_masks
|
||||
anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
|
||||
# loss
|
||||
loss: YOLOv3Loss
|
||||
# whether to use iou_aware
|
||||
iou_aware: true
|
||||
# iou_aware_factor
|
||||
iou_aware_factor: 0.4
|
||||
|
||||
# YOLOv3Loss
|
||||
YOLOv3Loss:
|
||||
# ignore_thresh
|
||||
ignore_thresh: 0.7
|
||||
# downsample
|
||||
downsample: [32, 16, 8]
|
||||
# whether label_smooth or not
|
||||
label_smooth: false
|
||||
# scale_x_y
|
||||
scale_x_y: 1.05
|
||||
# iou_loss
|
||||
iou_loss: IouLoss
|
||||
# iou_aware_loss
|
||||
iou_aware_loss: IouAwareLoss
|
||||
|
||||
# IouLoss
|
||||
IouLoss:
|
||||
loss_weight: 2.5
|
||||
loss_square: true
|
||||
|
||||
# IouAwareLoss
|
||||
IouAwareLoss:
|
||||
loss_weight: 1.0
|
||||
|
||||
# BBoxPostProcess
|
||||
BBoxPostProcess:
|
||||
decode:
|
||||
name: YOLOBox
|
||||
conf_thresh: 0.01
|
||||
downsample_ratio: 32
|
||||
clip_bbox: true
|
||||
scale_x_y: 1.05
|
||||
# nms setting
|
||||
nms:
|
||||
name: MatrixNMS
|
||||
keep_top_k: 100
|
||||
score_threshold: 0.01
|
||||
post_threshold: 0.01
|
||||
nms_top_k: -1
|
||||
background_label: -1
|
||||
|
||||
```
|
||||
|
||||
- Runtime file `runtime.yml`
|
||||
|
||||
```yaml
|
||||
# Whether to use gpu
|
||||
use_gpu: true
|
||||
# Log Printing interval
|
||||
log_iter: 20
|
||||
# save_dir
|
||||
save_dir: output
|
||||
# Model save interval
|
||||
snapshot_epoch: 1
|
||||
```
|
||||
Reference in New Issue
Block a user