更换文档检测模型

This commit is contained in:
2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions

View File

@@ -0,0 +1,22 @@
# SSD: Single Shot MultiBox Detector
## Model Zoo
### SSD on Pascal VOC
| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | 下载 | 配置文件 |
| :-------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
| VGG | SSD | 8 | 240e | ---- | 77.8 | [下载链接](https://paddledet.bj.bcebos.com/models/ssd_vgg16_300_240e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ssd/ssd_vgg16_300_240e_voc.yml) |
| MobileNet v1 | SSD | 32 | 120e | ---- | 73.8 | [下载链接](https://paddledet.bj.bcebos.com/models/ssd_mobilenet_v1_300_120e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ssd/ssd_mobilenet_v1_300_120e_voc.yml) |
**注意:** SSD-VGG使用4GPU在总batch size为32下训练240个epoch。SSD-MobileNetv1使用2GPU在总batch size为64下训练120周期。
## Citations
```
@article{Liu_2016,
title={SSD: Single Shot MultiBox Detector},
journal={ECCV},
author={Liu, Wei and Anguelov, Dragomir and Erhan, Dumitru and Szegedy, Christian and Reed, Scott and Fu, Cheng-Yang and Berg, Alexander C.},
year={2016},
}
```

View File

@@ -0,0 +1,17 @@
epoch: 120
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
milestones: [40, 60, 80, 100]
gamma: [0.5, 0.5, 0.4, 0.1]
use_warmup: false
OptimizerBuilder:
optimizer:
momentum: 0.0
type: RMSProp
regularizer:
factor: 0.00005
type: L2

View File

@@ -0,0 +1,18 @@
epoch: 1700
LearningRate:
base_lr: 0.4
schedulers:
- !CosineDecay
max_epochs: 1700
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 2000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0005
type: L2

View File

@@ -0,0 +1,21 @@
epoch: 240
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones:
- 160
- 200
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0005
type: L2

View File

@@ -0,0 +1,17 @@
epoch: 70
LearningRate:
base_lr: 0.05
schedulers:
- !PiecewiseDecay
milestones: [48, 60]
gamma: [0.1, 0.1]
use_warmup: false
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0005
type: L2

View File

@@ -0,0 +1,39 @@
worker_num: 8
TrainReader:
inputs_def:
num_max_boxes: 90
sample_transforms:
- Decode: {}
- RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
- RandomExpand: {fill_value: [127.5, 127.5, 127.5]}
- RandomCrop: {allow_no_crop: False}
- RandomFlip: {}
- Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeBox: {}
- PadBox: {num_max_boxes: 90}
batch_transforms:
- NormalizeImage: {mean: [127.5, 127.5, 127.5], std: [127.502231, 127.502231, 127.502231], is_scale: false}
- Permute: {}
batch_size: 32
shuffle: true
drop_last: true
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeImage: {mean: [127.5, 127.5, 127.5], std: [127.502231, 127.502231, 127.502231], is_scale: false}
- Permute: {}
batch_size: 1
TestReader:
inputs_def:
image_shape: [3, 300, 300]
sample_transforms:
- Decode: {}
- Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeImage: {mean: [127.5, 127.5, 127.5], std: [127.502231, 127.502231, 127.502231], is_scale: false}
- Permute: {}
batch_size: 1

View File

@@ -0,0 +1,41 @@
architecture: SSD
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ssd_mobilenet_v1_coco_pretrained.pdparams
SSD:
backbone: MobileNet
ssd_head: SSDHead
post_process: BBoxPostProcess
MobileNet:
norm_decay: 0.
scale: 1
conv_learning_rate: 0.1
extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
with_extra_blocks: true
feature_maps: [11, 13, 14, 15, 16, 17]
SSDHead:
kernel_size: 1
padding: 0
anchor_generator:
steps: [0, 0, 0, 0, 0, 0]
aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]]
min_ratio: 20
max_ratio: 90
base_size: 300
min_sizes: [60.0, 105.0, 150.0, 195.0, 240.0, 285.0]
max_sizes: [[], 150.0, 195.0, 240.0, 285.0, 300.0]
offset: 0.5
flip: true
min_max_aspect_ratios_order: false
BBoxPostProcess:
decode:
name: SSDBox
nms:
name: MultiClassNMS
keep_top_k: 200
score_threshold: 0.01
nms_threshold: 0.45
nms_top_k: 400
nms_eta: 1.0

View File

@@ -0,0 +1,38 @@
architecture: SSD
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet34_pretrained.pdparams
SSD:
backbone: ResNet
ssd_head: SSDHead
post_process: BBoxPostProcess
r34_backbone: True
ResNet:
# index 0 stands for res2
depth: 34
norm_type: bn
freeze_norm: False
freeze_at: -1
return_idx: [2]
num_stages: 3
SSDHead:
anchor_generator:
steps: [8, 16, 32, 64, 100, 300]
aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]]
min_sizes: [21.0, 45.0, 99.0, 153.0, 207.0, 261.0]
max_sizes: [45.0, 99.0, 153.0, 207.0, 261.0, 315.0]
offset: 0.5
clip: True
min_max_aspect_ratios_order: True
use_extra_head: True
BBoxPostProcess:
decode:
name: SSDBox
nms:
name: MultiClassNMS
keep_top_k: 200
score_threshold: 0.05
nms_threshold: 0.5
nms_top_k: 400

View File

@@ -0,0 +1,38 @@
worker_num: 3
TrainReader:
inputs_def:
num_max_boxes: 90
sample_transforms:
- Decode: {}
- RandomCrop: {num_attempts: 1}
- RandomFlip: {}
- Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
- RandomDistort: {brightness: [0.875, 1.125, 0.5], random_apply: False}
- NormalizeBox: {}
- PadBox: {num_max_boxes: 90}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
- Permute: {}
batch_size: 64
shuffle: true
drop_last: true
use_shared_memory: true
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
- Permute: {}
batch_size: 1
TestReader:
inputs_def:
image_shape: [3, 300, 300]
sample_transforms:
- Decode: {}
- Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
- Permute: {}
batch_size: 1

View File

@@ -0,0 +1,41 @@
worker_num: 2
TrainReader:
inputs_def:
num_max_boxes: 90
sample_transforms:
- Decode: {}
- RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
- RandomExpand: {fill_value: [104., 117., 123.]}
- RandomCrop: {allow_no_crop: true}
- RandomFlip: {}
- Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeBox: {}
- PadBox: {num_max_boxes: 90}
batch_transforms:
- NormalizeImage: {mean: [104., 117., 123.], std: [1., 1., 1.], is_scale: false}
- Permute: {}
batch_size: 8
shuffle: true
drop_last: true
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeImage: {mean: [104., 117., 123.], std: [1., 1., 1.], is_scale: false}
- Permute: {}
batch_size: 1
TestReader:
inputs_def:
image_shape: [3, 300, 300]
sample_transforms:
- Decode: {}
- Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeImage: {mean: [104., 117., 123.], std: [1., 1., 1.], is_scale: false}
- Permute: {}
batch_size: 1

View File

@@ -0,0 +1,37 @@
architecture: SSD
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/VGG16_caffe_pretrained.pdparams
# Model Architecture
SSD:
# model feat info flow
backbone: VGG
ssd_head: SSDHead
# post process
post_process: BBoxPostProcess
VGG:
depth: 16
normalizations: [20., -1, -1, -1, -1, -1]
SSDHead:
anchor_generator:
steps: [8, 16, 32, 64, 100, 300]
aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]]
min_ratio: 20
max_ratio: 90
min_sizes: [30.0, 60.0, 111.0, 162.0, 213.0, 264.0]
max_sizes: [60.0, 111.0, 162.0, 213.0, 264.0, 315.0]
offset: 0.5
flip: true
min_max_aspect_ratios_order: true
BBoxPostProcess:
decode:
name: SSDBox
nms:
name: MultiClassNMS
keep_top_k: 200
score_threshold: 0.01
nms_threshold: 0.45
nms_top_k: 400
nms_eta: 1.0

View File

@@ -0,0 +1,39 @@
worker_num: 8
TrainReader:
inputs_def:
num_max_boxes: 90
sample_transforms:
- Decode: {}
- RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
- RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
- RandomCrop: {allow_no_crop: Fasle}
- RandomFlip: {}
- Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeBox: {}
- PadBox: {num_max_boxes: 90}
batch_transforms:
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
- Permute: {}
batch_size: 64
shuffle: true
drop_last: true
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
- Permute: {}
batch_size: 1
TestReader:
inputs_def:
image_shape: [3, 300, 300]
sample_transforms:
- Decode: {}
- Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
- Permute: {}
batch_size: 1

View File

@@ -0,0 +1,39 @@
worker_num: 8
TrainReader:
inputs_def:
num_max_boxes: 90
sample_transforms:
- Decode: {}
- RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
- RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
- RandomCrop: {allow_no_crop: Fasle}
- RandomFlip: {}
- Resize: {target_size: [320, 320], keep_ratio: False, interp: 1}
- NormalizeBox: {}
- PadBox: {num_max_boxes: 90}
batch_transforms:
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
- Permute: {}
batch_size: 64
shuffle: true
drop_last: true
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [320, 320], keep_ratio: False, interp: 1}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
- Permute: {}
batch_size: 1
TestReader:
inputs_def:
image_shape: [3, 320, 320]
sample_transforms:
- Decode: {}
- Resize: {target_size: [320, 320], keep_ratio: False, interp: 1}
- NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
- Permute: {}
batch_size: 1

View File

@@ -0,0 +1,42 @@
architecture: SSD
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/GhostNet_x1_3_ssld_pretrained.pdparams
SSD:
backbone: GhostNet
ssd_head: SSDHead
post_process: BBoxPostProcess
GhostNet:
scale: 1.3
conv_decay: 0.00004
with_extra_blocks: true
extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
feature_maps: [13, 18, 19, 20, 21, 22]
lr_mult_list: [0.25, 0.25, 0.5, 0.5, 0.75]
SSDHead:
use_sepconv: True
conv_decay: 0.00004
anchor_generator:
steps: [16, 32, 64, 107, 160, 320]
aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]]
min_ratio: 20
max_ratio: 95
base_size: 320
min_sizes: []
max_sizes: []
offset: 0.5
flip: true
clip: true
min_max_aspect_ratios_order: false
BBoxPostProcess:
decode:
name: SSDBox
nms:
name: MultiClassNMS
keep_top_k: 200
score_threshold: 0.01
nms_threshold: 0.45
nms_top_k: 400
nms_eta: 1.0

View File

@@ -0,0 +1,41 @@
architecture: SSD
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV1_ssld_pretrained.pdparams
SSD:
backbone: MobileNet
ssd_head: SSDHead
post_process: BBoxPostProcess
MobileNet:
conv_decay: 0.00004
scale: 1
extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
with_extra_blocks: true
feature_maps: [11, 13, 14, 15, 16, 17]
SSDHead:
use_sepconv: True
conv_decay: 0.00004
anchor_generator:
steps: [16, 32, 64, 100, 150, 300]
aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]]
min_ratio: 20
max_ratio: 95
base_size: 300
min_sizes: []
max_sizes: []
offset: 0.5
flip: true
clip: true
min_max_aspect_ratios_order: False
BBoxPostProcess:
decode:
name: SSDBox
nms:
name: MultiClassNMS
keep_top_k: 200
score_threshold: 0.01
nms_threshold: 0.45
nms_top_k: 400
nms_eta: 1.0

View File

@@ -0,0 +1,44 @@
architecture: SSD
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams
SSD:
backbone: MobileNetV3
ssd_head: SSDHead
post_process: BBoxPostProcess
MobileNetV3:
scale: 1.0
model_name: large
conv_decay: 0.00004
with_extra_blocks: true
extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
feature_maps: [14, 17, 18, 19, 20, 21]
lr_mult_list: [0.25, 0.25, 0.5, 0.5, 0.75]
multiplier: 0.5
SSDHead:
use_sepconv: True
conv_decay: 0.00004
anchor_generator:
steps: [16, 32, 64, 107, 160, 320]
aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]]
min_ratio: 20
max_ratio: 95
base_size: 320
min_sizes: []
max_sizes: []
offset: 0.5
flip: true
clip: true
min_max_aspect_ratios_order: false
BBoxPostProcess:
decode:
name: SSDBox
nms:
name: MultiClassNMS
keep_top_k: 200
score_threshold: 0.01
nms_threshold: 0.45
nms_top_k: 400
nms_eta: 1.0

View File

@@ -0,0 +1,44 @@
architecture: SSD
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_small_x1_0_ssld_pretrained.pdparams
SSD:
backbone: MobileNetV3
ssd_head: SSDHead
post_process: BBoxPostProcess
MobileNetV3:
scale: 1.0
model_name: small
conv_decay: 0.00004
with_extra_blocks: true
extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
feature_maps: [10, 13, 14, 15, 16, 17]
lr_mult_list: [0.25, 0.25, 0.5, 0.5, 0.75]
multiplier: 0.5
SSDHead:
use_sepconv: True
conv_decay: 0.00004
anchor_generator:
steps: [16, 32, 64, 107, 160, 320]
aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]]
min_ratio: 20
max_ratio: 95
base_size: 320
min_sizes: []
max_sizes: []
offset: 0.5
flip: true
clip: true
min_max_aspect_ratios_order: false
BBoxPostProcess:
decode:
name: SSDBox
nms:
name: MultiClassNMS
keep_top_k: 200
score_threshold: 0.01
nms_threshold: 0.45
nms_top_k: 400
nms_eta: 1.0

View File

@@ -0,0 +1,14 @@
_BASE_: [
'../datasets/voc.yml',
'../runtime.yml',
'_base_/optimizer_120e.yml',
'_base_/ssd_mobilenet_v1_300.yml',
'_base_/ssd_mobilenet_reader.yml',
]
weights: output/ssd_mobilenet_v1_300_120e_voc/model_final
# set collate_batch to false because ground-truth info is needed
# on voc dataset and should not collate data in batch when batch size
# is larger than 1.
EvalReader:
collate_batch: false

View File

@@ -0,0 +1,11 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_70e.yml',
'_base_/ssd_r34_300.yml',
'_base_/ssd_r34_reader.yml',
]
weights: output/ssd_r34_70e_coco/model_final
log_iter: 100
snapshot_epoch: 5

View File

@@ -0,0 +1,14 @@
_BASE_: [
'../datasets/voc.yml',
'../runtime.yml',
'_base_/optimizer_240e.yml',
'_base_/ssd_vgg16_300.yml',
'_base_/ssd_reader.yml',
]
weights: output/ssd_vgg16_300_240e_voc/model_final
# set collate_batch to false because ground-truth info is needed
# on voc dataset and should not collate data in batch when batch size
# is larger than 1.
EvalReader:
collate_batch: false

View File

@@ -0,0 +1,27 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_1700e.yml',
'_base_/ssdlite_ghostnet_320.yml',
'_base_/ssdlite320_reader.yml',
]
weights: output/ssdlite_ghostnet_320_coco/model_final
epoch: 1700
LearningRate:
base_lr: 0.2
schedulers:
- !CosineDecay
max_epochs: 1700
- !LinearWarmup
start_factor: 0.33333
steps: 2000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0005
type: L2

View File

@@ -0,0 +1,8 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_1700e.yml',
'_base_/ssdlite_mobilenet_v1_300.yml',
'_base_/ssdlite300_reader.yml',
]
weights: output/ssdlite_mobilenet_v1_300_coco/model_final

View File

@@ -0,0 +1,8 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_1700e.yml',
'_base_/ssdlite_mobilenet_v3_large_320.yml',
'_base_/ssdlite320_reader.yml',
]
weights: output/ssdlite_mobilenet_v3_large_320_coco/model_final

View File

@@ -0,0 +1,8 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_1700e.yml',
'_base_/ssdlite_mobilenet_v3_small_320.yml',
'_base_/ssdlite320_reader.yml',
]
weights: output/ssdlite_mobilenet_v3_small_320_coco/model_final