更换文档检测模型
This commit is contained in:
28
paddle_detection/configs/cascade_rcnn/README.md
Normal file
28
paddle_detection/configs/cascade_rcnn/README.md
Normal file
@@ -0,0 +1,28 @@
|
||||
# Cascade R-CNN: High Quality Object Detection and Instance Segmentation
|
||||
|
||||
## Model Zoo
|
||||
|
||||
| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | Mask AP | 下载 | 配置文件 |
|
||||
| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----: | :-----------------------------------------------------: | :-----: |
|
||||
| ResNet50-FPN | Cascade Faster | 1 | 1x | ---- | 41.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.yml) |
|
||||
| ResNet50-FPN | Cascade Mask | 1 | 1x | ---- | 41.8 | 36.3 | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.yml) |
|
||||
| ResNet50-vd-SSLDv2-FPN | Cascade Faster | 1 | 1x | ---- | 44.4 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
|
||||
| ResNet50-vd-SSLDv2-FPN | Cascade Faster | 1 | 2x | ---- | 45.0 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
|
||||
| ResNet50-vd-SSLDv2-FPN | Cascade Mask | 1 | 1x | ---- | 44.9 | 39.1 | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
|
||||
| ResNet50-vd-SSLDv2-FPN | Cascade Mask | 1 | 2x | ---- | 45.7 | 39.7 | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
|
||||
|
||||
|
||||
## Citations
|
||||
```
|
||||
@article{Cai_2019,
|
||||
title={Cascade R-CNN: High Quality Object Detection and Instance Segmentation},
|
||||
ISSN={1939-3539},
|
||||
url={http://dx.doi.org/10.1109/tpami.2019.2956516},
|
||||
DOI={10.1109/tpami.2019.2956516},
|
||||
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
|
||||
publisher={Institute of Electrical and Electronics Engineers (IEEE)},
|
||||
author={Cai, Zhaowei and Vasconcelos, Nuno},
|
||||
year={2019},
|
||||
pages={1–1}
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,40 @@
|
||||
worker_num: 2
|
||||
TrainReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
|
||||
- RandomFlip: {prob: 0.5}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 1
|
||||
shuffle: true
|
||||
drop_last: true
|
||||
collate_batch: false
|
||||
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
@@ -0,0 +1,40 @@
|
||||
worker_num: 2
|
||||
TrainReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
|
||||
- RandomFlip: {prob: 0.5}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 1
|
||||
shuffle: true
|
||||
drop_last: true
|
||||
collate_batch: false
|
||||
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
@@ -0,0 +1,97 @@
|
||||
architecture: CascadeRCNN
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
|
||||
|
||||
|
||||
CascadeRCNN:
|
||||
backbone: ResNet
|
||||
neck: FPN
|
||||
rpn_head: RPNHead
|
||||
bbox_head: CascadeHead
|
||||
mask_head: MaskHead
|
||||
# post process
|
||||
bbox_post_process: BBoxPostProcess
|
||||
mask_post_process: MaskPostProcess
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
|
||||
FPN:
|
||||
out_channel: 256
|
||||
|
||||
RPNHead:
|
||||
anchor_generator:
|
||||
aspect_ratios: [0.5, 1.0, 2.0]
|
||||
anchor_sizes: [[32], [64], [128], [256], [512]]
|
||||
strides: [4, 8, 16, 32, 64]
|
||||
rpn_target_assign:
|
||||
batch_size_per_im: 256
|
||||
fg_fraction: 0.5
|
||||
negative_overlap: 0.3
|
||||
positive_overlap: 0.7
|
||||
use_random: True
|
||||
train_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 2000
|
||||
post_nms_top_n: 2000
|
||||
topk_after_collect: True
|
||||
test_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 1000
|
||||
post_nms_top_n: 1000
|
||||
|
||||
|
||||
CascadeHead:
|
||||
head: CascadeTwoFCHead
|
||||
roi_extractor:
|
||||
resolution: 7
|
||||
sampling_ratio: 0
|
||||
aligned: True
|
||||
bbox_assigner: BBoxAssigner
|
||||
|
||||
BBoxAssigner:
|
||||
batch_size_per_im: 512
|
||||
bg_thresh: 0.5
|
||||
fg_thresh: 0.5
|
||||
fg_fraction: 0.25
|
||||
cascade_iou: [0.5, 0.6, 0.7]
|
||||
use_random: True
|
||||
|
||||
CascadeTwoFCHead:
|
||||
out_channel: 1024
|
||||
|
||||
BBoxPostProcess:
|
||||
decode:
|
||||
name: RCNNBox
|
||||
prior_box_var: [30.0, 30.0, 15.0, 15.0]
|
||||
nms:
|
||||
name: MultiClassNMS
|
||||
keep_top_k: 100
|
||||
score_threshold: 0.05
|
||||
nms_threshold: 0.5
|
||||
|
||||
|
||||
MaskHead:
|
||||
head: MaskFeat
|
||||
roi_extractor:
|
||||
resolution: 14
|
||||
sampling_ratio: 0
|
||||
aligned: True
|
||||
mask_assigner: MaskAssigner
|
||||
share_bbox_feat: False
|
||||
|
||||
MaskFeat:
|
||||
num_convs: 4
|
||||
out_channel: 256
|
||||
|
||||
MaskAssigner:
|
||||
mask_resolution: 28
|
||||
|
||||
MaskPostProcess:
|
||||
binary_thresh: 0.5
|
||||
@@ -0,0 +1,75 @@
|
||||
architecture: CascadeRCNN
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
|
||||
|
||||
|
||||
CascadeRCNN:
|
||||
backbone: ResNet
|
||||
neck: FPN
|
||||
rpn_head: RPNHead
|
||||
bbox_head: CascadeHead
|
||||
# post process
|
||||
bbox_post_process: BBoxPostProcess
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
|
||||
FPN:
|
||||
out_channel: 256
|
||||
|
||||
RPNHead:
|
||||
anchor_generator:
|
||||
aspect_ratios: [0.5, 1.0, 2.0]
|
||||
anchor_sizes: [[32], [64], [128], [256], [512]]
|
||||
strides: [4, 8, 16, 32, 64]
|
||||
rpn_target_assign:
|
||||
batch_size_per_im: 256
|
||||
fg_fraction: 0.5
|
||||
negative_overlap: 0.3
|
||||
positive_overlap: 0.7
|
||||
use_random: True
|
||||
train_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 2000
|
||||
post_nms_top_n: 2000
|
||||
topk_after_collect: True
|
||||
test_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 1000
|
||||
post_nms_top_n: 1000
|
||||
|
||||
|
||||
CascadeHead:
|
||||
head: CascadeTwoFCHead
|
||||
roi_extractor:
|
||||
resolution: 7
|
||||
sampling_ratio: 0
|
||||
aligned: True
|
||||
bbox_assigner: BBoxAssigner
|
||||
|
||||
BBoxAssigner:
|
||||
batch_size_per_im: 512
|
||||
bg_thresh: 0.5
|
||||
fg_thresh: 0.5
|
||||
fg_fraction: 0.25
|
||||
cascade_iou: [0.5, 0.6, 0.7]
|
||||
use_random: True
|
||||
|
||||
CascadeTwoFCHead:
|
||||
out_channel: 1024
|
||||
|
||||
BBoxPostProcess:
|
||||
decode:
|
||||
name: RCNNBox
|
||||
prior_box_var: [30.0, 30.0, 15.0, 15.0]
|
||||
nms:
|
||||
name: MultiClassNMS
|
||||
keep_top_k: 100
|
||||
score_threshold: 0.05
|
||||
nms_threshold: 0.5
|
||||
@@ -0,0 +1,19 @@
|
||||
epoch: 12
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.01
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [8, 11]
|
||||
- !LinearWarmup
|
||||
start_factor: 0.001
|
||||
steps: 1000
|
||||
|
||||
OptimizerBuilder:
|
||||
optimizer:
|
||||
momentum: 0.9
|
||||
type: Momentum
|
||||
regularizer:
|
||||
factor: 0.0001
|
||||
type: L2
|
||||
@@ -0,0 +1,8 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_instance.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_1x.yml',
|
||||
'_base_/cascade_mask_rcnn_r50_fpn.yml',
|
||||
'_base_/cascade_mask_fpn_reader.yml',
|
||||
]
|
||||
weights: output/cascade_mask_rcnn_r50_fpn_1x_coco/model_final
|
||||
@@ -0,0 +1,18 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_instance.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_1x.yml',
|
||||
'_base_/cascade_mask_rcnn_r50_fpn.yml',
|
||||
'_base_/cascade_mask_fpn_reader.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
|
||||
weights: output/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
depth: 50
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
lr_mult_list: [0.05, 0.05, 0.1, 0.15]
|
||||
@@ -0,0 +1,29 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_instance.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_1x.yml',
|
||||
'_base_/cascade_mask_rcnn_r50_fpn.yml',
|
||||
'_base_/cascade_mask_fpn_reader.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
|
||||
weights: output/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
depth: 50
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
lr_mult_list: [0.05, 0.05, 0.1, 0.15]
|
||||
|
||||
epoch: 24
|
||||
LearningRate:
|
||||
base_lr: 0.01
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [12, 22]
|
||||
- !LinearWarmup
|
||||
start_factor: 0.1
|
||||
steps: 1000
|
||||
@@ -0,0 +1,8 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_1x.yml',
|
||||
'_base_/cascade_rcnn_r50_fpn.yml',
|
||||
'_base_/cascade_fpn_reader.yml',
|
||||
]
|
||||
weights: output/cascade_rcnn_r50_fpn_1x_coco/model_final
|
||||
@@ -0,0 +1,18 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_1x.yml',
|
||||
'_base_/cascade_rcnn_r50_fpn.yml',
|
||||
'_base_/cascade_fpn_reader.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
|
||||
weights: output/cascade_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
depth: 50
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
lr_mult_list: [0.05, 0.05, 0.1, 0.15]
|
||||
@@ -0,0 +1,29 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_1x.yml',
|
||||
'_base_/cascade_rcnn_r50_fpn.yml',
|
||||
'_base_/cascade_fpn_reader.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
|
||||
weights: output/cascade_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
depth: 50
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
lr_mult_list: [0.05, 0.05, 0.1, 0.15]
|
||||
|
||||
epoch: 24
|
||||
LearningRate:
|
||||
base_lr: 0.01
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [12, 22]
|
||||
- !LinearWarmup
|
||||
start_factor: 0.1
|
||||
steps: 1000
|
||||
37
paddle_detection/configs/centernet/README.md
Normal file
37
paddle_detection/configs/centernet/README.md
Normal file
@@ -0,0 +1,37 @@
|
||||
English | [简体中文](README_cn.md)
|
||||
|
||||
# CenterNet (CenterNet: Objects as Points)
|
||||
|
||||
## Table of Contents
|
||||
- [Introduction](#Introduction)
|
||||
- [Model Zoo](#Model_Zoo)
|
||||
- [Citations](#Citations)
|
||||
|
||||
## Introduction
|
||||
|
||||
[CenterNet](http://arxiv.org/abs/1904.07850) is an Anchor Free detector, which model an object as a single point -- the center point of its bounding box. The detector uses keypoint estimation to find center points and regresses to all other object properties. The center point based approach, CenterNet, is end-to-end differentiable, simpler, faster, and more accurate than corresponding bounding box based detectors.
|
||||
|
||||
## Model Zoo
|
||||
|
||||
### CenterNet Results on COCO-val 2017
|
||||
|
||||
| backbone | input shape | mAP | FPS | download | config |
|
||||
| :--------------| :------- | :----: | :------: | :----: |:-----: |
|
||||
| DLA-34(paper) | 512x512 | 37.4 | - | - | - |
|
||||
| DLA-34 | 512x512 | 37.6 | - | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_dla34_140e_coco.pdparams) | [config](./centernet_dla34_140e_coco.yml) |
|
||||
| ResNet50 + DLAUp | 512x512 | 38.9 | - | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_r50_140e_coco.pdparams) | [config](./centernet_r50_140e_coco.yml) |
|
||||
| MobileNetV1 + DLAUp | 512x512 | 28.2 | - | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv1_140e_coco.pdparams) | [config](./centernet_mbv1_140e_coco.yml) |
|
||||
| MobileNetV3_small + DLAUp | 512x512 | 17 | - | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv3_small_140e_coco.pdparams) | [config](./centernet_mbv3_small_140e_coco.yml) |
|
||||
| MobileNetV3_large + DLAUp | 512x512 | 27.1 | - | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv3_large_140e_coco.pdparams) | [config](./centernet_mbv3_large_140e_coco.yml) |
|
||||
| ShuffleNetV2 + DLAUp | 512x512 | 23.8 | - | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_shufflenetv2_140e_coco.pdparams) | [config](./centernet_shufflenetv2_140e_coco.yml) |
|
||||
|
||||
|
||||
## Citations
|
||||
```
|
||||
@article{zhou2019objects,
|
||||
title={Objects as points},
|
||||
author={Zhou, Xingyi and Wang, Dequan and Kr{\"a}henb{\"u}hl, Philipp},
|
||||
journal={arXiv preprint arXiv:1904.07850},
|
||||
year={2019}
|
||||
}
|
||||
```
|
||||
36
paddle_detection/configs/centernet/README_cn.md
Normal file
36
paddle_detection/configs/centernet/README_cn.md
Normal file
@@ -0,0 +1,36 @@
|
||||
简体中文 | [English](README.md)
|
||||
|
||||
# CenterNet (CenterNet: Objects as Points)
|
||||
|
||||
## 内容
|
||||
- [简介](#简介)
|
||||
- [模型库](#模型库)
|
||||
- [引用](#引用)
|
||||
|
||||
## 内容
|
||||
|
||||
[CenterNet](http://arxiv.org/abs/1904.07850)是Anchor Free检测器,将物体表示为一个目标框中心点。CenterNet使用关键点检测的方式定位中心点并回归物体的其他属性。CenterNet是以中心点为基础的检测方法,是端到端可训练的,并且相较于基于anchor的检测器更加检测高效。
|
||||
|
||||
## 模型库
|
||||
|
||||
### CenterNet在COCO-val 2017上结果
|
||||
|
||||
| 骨干网络 | 输入尺寸 | mAP | FPS | 下载链接 | 配置文件 |
|
||||
| :--------------| :------- | :----: | :------: | :----: |:-----: |
|
||||
| DLA-34(paper) | 512x512 | 37.4 | - | - | - |
|
||||
| DLA-34 | 512x512 | 37.6 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_dla34_140e_coco.pdparams) | [配置文件](./centernet_dla34_140e_coco.yml) |
|
||||
| ResNet50 + DLAUp | 512x512 | 38.9 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_r50_140e_coco.pdparams) | [配置文件](./centernet_r50_140e_coco.yml) |
|
||||
| MobileNetV1 + DLAUp | 512x512 | 28.2 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv1_140e_coco.pdparams) | [配置文件](./centernet_mbv1_140e_coco.yml) |
|
||||
| MobileNetV3_small + DLAUp | 512x512 | 17 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv3_small_140e_coco.pdparams) | [配置文件](./centernet_mbv3_small_140e_coco.yml) |
|
||||
| MobileNetV3_large + DLAUp | 512x512 | 27.1 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv3_large_140e_coco.pdparams) | [配置文件](./centernet_mbv3_large_140e_coco.yml) |
|
||||
| ShuffleNetV2 + DLAUp | 512x512 | 23.8 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_shufflenetv2_140e_coco.pdparams) | [配置文件](./centernet_shufflenetv2_140e_coco.yml) |
|
||||
|
||||
## 引用
|
||||
```
|
||||
@article{zhou2019objects,
|
||||
title={Objects as points},
|
||||
author={Zhou, Xingyi and Wang, Dequan and Kr{\"a}henb{\"u}hl, Philipp},
|
||||
journal={arXiv preprint arXiv:1904.07850},
|
||||
year={2019}
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,22 @@
|
||||
architecture: CenterNet
|
||||
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/DLA34_pretrain.pdparams
|
||||
|
||||
CenterNet:
|
||||
backbone: DLA
|
||||
neck: CenterNetDLAFPN
|
||||
head: CenterNetHead
|
||||
post_process: CenterNetPostProcess
|
||||
|
||||
DLA:
|
||||
depth: 34
|
||||
|
||||
CenterNetDLAFPN:
|
||||
down_ratio: 4
|
||||
|
||||
CenterNetHead:
|
||||
head_planes: 256
|
||||
regress_ltrb: False
|
||||
|
||||
CenterNetPostProcess:
|
||||
max_per_img: 100
|
||||
regress_ltrb: False
|
||||
34
paddle_detection/configs/centernet/_base_/centernet_r50.yml
Normal file
34
paddle_detection/configs/centernet/_base_/centernet_r50.yml
Normal file
@@ -0,0 +1,34 @@
|
||||
architecture: CenterNet
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
|
||||
norm_type: sync_bn
|
||||
use_ema: true
|
||||
ema_decay: 0.9998
|
||||
|
||||
CenterNet:
|
||||
backbone: ResNet
|
||||
neck: CenterNetDLAFPN
|
||||
head: CenterNetHead
|
||||
post_process: CenterNetPostProcess
|
||||
|
||||
ResNet:
|
||||
depth: 50
|
||||
variant: d
|
||||
return_idx: [0, 1, 2, 3]
|
||||
freeze_at: -1
|
||||
norm_decay: 0.
|
||||
dcn_v2_stages: [3]
|
||||
|
||||
|
||||
CenterNetDLAFPN:
|
||||
first_level: 0
|
||||
last_level: 4
|
||||
down_ratio: 4
|
||||
dcn_v2: False
|
||||
|
||||
CenterNetHead:
|
||||
head_planes: 256
|
||||
regress_ltrb: False
|
||||
|
||||
CenterNetPostProcess:
|
||||
max_per_img: 100
|
||||
regress_ltrb: False
|
||||
@@ -0,0 +1,35 @@
|
||||
worker_num: 4
|
||||
TrainReader:
|
||||
inputs_def:
|
||||
image_shape: [3, 512, 512]
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- FlipWarpAffine: {keep_res: False, input_h: 512, input_w: 512, use_random: True}
|
||||
- CenterRandColor: {}
|
||||
- Lighting: {eigval: [0.2141788, 0.01817699, 0.00341571], eigvec: [[-0.58752847, -0.69563484, 0.41340352], [-0.5832747, 0.00994535, -0.81221408], [-0.56089297, 0.71832671, 0.41158938]]}
|
||||
- NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834], is_scale: False}
|
||||
- Permute: {}
|
||||
- Gt2CenterNetTarget: {down_ratio: 4, max_objs: 128}
|
||||
batch_size: 16
|
||||
shuffle: True
|
||||
drop_last: True
|
||||
use_shared_memory: True
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- WarpAffine: {keep_res: True, input_h: 512, input_w: 512}
|
||||
- NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834]}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
|
||||
|
||||
TestReader:
|
||||
inputs_def:
|
||||
image_shape: [3, 512, 512]
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- WarpAffine: {keep_res: True, input_h: 512, input_w: 512}
|
||||
- NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834], is_scale: True}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
14
paddle_detection/configs/centernet/_base_/optimizer_140e.yml
Normal file
14
paddle_detection/configs/centernet/_base_/optimizer_140e.yml
Normal file
@@ -0,0 +1,14 @@
|
||||
epoch: 140
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.0005
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [90, 120]
|
||||
use_warmup: False
|
||||
|
||||
OptimizerBuilder:
|
||||
optimizer:
|
||||
type: Adam
|
||||
regularizer: NULL
|
||||
@@ -0,0 +1,9 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_140e.yml',
|
||||
'_base_/centernet_dla34.yml',
|
||||
'_base_/centernet_reader.yml',
|
||||
]
|
||||
|
||||
weights: output/centernet_dla34_140e_coco/model_final
|
||||
@@ -0,0 +1,21 @@
|
||||
_BASE_: [
|
||||
'centernet_r50_140e_coco.yml'
|
||||
]
|
||||
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV1_pretrained.pdparams
|
||||
weights: output/centernet_mbv1_140e_coco/model_final
|
||||
|
||||
CenterNet:
|
||||
backbone: MobileNet
|
||||
neck: CenterNetDLAFPN
|
||||
head: CenterNetHead
|
||||
post_process: CenterNetPostProcess
|
||||
|
||||
MobileNet:
|
||||
scale: 1.
|
||||
with_extra_blocks: false
|
||||
extra_block_filters: []
|
||||
feature_maps: [3, 5, 11, 13]
|
||||
|
||||
TrainReader:
|
||||
batch_size: 32
|
||||
@@ -0,0 +1,22 @@
|
||||
_BASE_: [
|
||||
'centernet_r50_140e_coco.yml'
|
||||
]
|
||||
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams
|
||||
weights: output/centernet_mbv3_large_140e_coco/model_final
|
||||
|
||||
CenterNet:
|
||||
backbone: MobileNetV3
|
||||
neck: CenterNetDLAFPN
|
||||
head: CenterNetHead
|
||||
post_process: CenterNetPostProcess
|
||||
|
||||
MobileNetV3:
|
||||
model_name: large
|
||||
scale: 1.
|
||||
with_extra_blocks: false
|
||||
extra_block_filters: []
|
||||
feature_maps: [4, 7, 13, 16]
|
||||
|
||||
TrainReader:
|
||||
batch_size: 32
|
||||
@@ -0,0 +1,28 @@
|
||||
_BASE_: [
|
||||
'centernet_r50_140e_coco.yml'
|
||||
]
|
||||
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_small_x1_0_ssld_pretrained.pdparams
|
||||
weights: output/centernet_mbv3_small_140e_coco/model_final
|
||||
|
||||
CenterNet:
|
||||
backbone: MobileNetV3
|
||||
neck: CenterNetDLAFPN
|
||||
head: CenterNetHead
|
||||
post_process: CenterNetPostProcess
|
||||
|
||||
MobileNetV3:
|
||||
model_name: small
|
||||
scale: 1.
|
||||
with_extra_blocks: false
|
||||
extra_block_filters: []
|
||||
feature_maps: [4, 9, 12]
|
||||
|
||||
CenterNetDLAFPN:
|
||||
first_level: 0
|
||||
last_level: 3
|
||||
down_ratio: 8
|
||||
dcn_v2: False
|
||||
|
||||
TrainReader:
|
||||
batch_size: 32
|
||||
@@ -0,0 +1,9 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_140e.yml',
|
||||
'_base_/centernet_r50.yml',
|
||||
'_base_/centernet_reader.yml',
|
||||
]
|
||||
|
||||
weights: output/centernet_r50_140e_coco/model_final
|
||||
@@ -0,0 +1,33 @@
|
||||
_BASE_: [
|
||||
'centernet_r50_140e_coco.yml'
|
||||
]
|
||||
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ShuffleNetV2_x1_0_pretrained.pdparams
|
||||
weights: output/centernet_shufflenetv2_140e_coco/model_final
|
||||
|
||||
CenterNet:
|
||||
backbone: ShuffleNetV2
|
||||
neck: CenterNetDLAFPN
|
||||
head: CenterNetHead
|
||||
post_process: CenterNetPostProcess
|
||||
|
||||
ShuffleNetV2:
|
||||
scale: 1.0
|
||||
feature_maps: [5, 13, 17]
|
||||
act: leaky_relu
|
||||
|
||||
CenterNetDLAFPN:
|
||||
first_level: 0
|
||||
last_level: 3
|
||||
down_ratio: 8
|
||||
dcn_v2: False
|
||||
|
||||
TrainReader:
|
||||
batch_size: 32
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- WarpAffine: {keep_res: False, input_h: 512, input_w: 512}
|
||||
- NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834]}
|
||||
- Permute: {}
|
||||
68
paddle_detection/configs/clrnet/README.cn.md
Normal file
68
paddle_detection/configs/clrnet/README.cn.md
Normal file
@@ -0,0 +1,68 @@
|
||||
简体中文 | [English](README.md)
|
||||
|
||||
# CLRNet (CLRNet: Cross Layer Refinement Network for Lane Detection)
|
||||
|
||||
## 目录
|
||||
- [简介](#简介)
|
||||
- [模型库](#模型库)
|
||||
- [引用](#引用)
|
||||
|
||||
## 介绍
|
||||
|
||||
[CLRNet](https://arxiv.org/abs/2203.10350)是一个车道线检测模型。CLRNet模型设计了车道线检测的直线先验轨迹,车道线iou以及nms方法,融合提取车道线轨迹的上下文高层特征与底层特征,利用FPN多尺度进行refine,在车道线检测相关数据集取得了SOTA的性能。
|
||||
|
||||
## 模型库
|
||||
|
||||
### CLRNet在CUlane上结果
|
||||
|
||||
| 骨架网络 | mF1 | F1@50 | F1@75 | 下载链接 | 配置文件 |训练日志|
|
||||
| :--------------| :------- | :----: | :------: | :----: |:-----: |:-----: |
|
||||
| ResNet-18 | 54.98 | 79.46 | 62.10 | [下载链接](https://paddledet.bj.bcebos.com/models/clrnet_resnet18_culane.pdparams) | [配置文件](./clrnet_resnet18_culane.yml) |[训练日志](https://bj.bcebos.com/v1/paddledet/logs/train_clrnet_r18_15_culane.log)|
|
||||
|
||||
### 数据集下载
|
||||
下载[CULane数据集](https://xingangpan.github.io/projects/CULane.html)并解压到`dataset/culane`目录。
|
||||
|
||||
您的数据集目录结构如下:
|
||||
```shell
|
||||
culane/driver_xx_xxframe # data folders x6
|
||||
culane/laneseg_label_w16 # lane segmentation labels
|
||||
culane/list # data lists
|
||||
```
|
||||
如果您使用百度云链接下载,注意确保`driver_23_30frame_part1.tar.gz`和`driver_23_30frame_part2.tar.gz`解压后的文件都在`driver_23_30frame`目录下。
|
||||
|
||||
现已将用于测试的小数据集上传到PaddleDetection,可通过运行训练脚本,自动下载并解压数据,如需复现结果请下载链接中的全量数据集训练。
|
||||
|
||||
### 训练
|
||||
- GPU单卡训练
|
||||
```shell
|
||||
python tools/train.py -c configs/clrnet/clr_resnet18_culane.yml
|
||||
```
|
||||
- GPU多卡训练
|
||||
```shell
|
||||
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
||||
python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/clrnet/clr_resnet18_culane.yml
|
||||
```
|
||||
|
||||
### 评估
|
||||
```shell
|
||||
python tools/eval.py -c configs/clrnet/clr_resnet18_culane.yml -o weights=output/clr_resnet18_culane/model_final.pdparams
|
||||
```
|
||||
|
||||
### 预测
|
||||
```shell
|
||||
python tools/infer_culane.py -c configs/clrnet/clr_resnet18_culane.yml -o weights=output/clr_resnet18_culane/model_final.pdparams --infer_img=demo/lane00000.jpg
|
||||
```
|
||||
|
||||
注意:预测功能暂不支持模型静态图推理部署。
|
||||
|
||||
## 引用
|
||||
```
|
||||
@InProceedings{Zheng_2022_CVPR,
|
||||
author = {Zheng, Tu and Huang, Yifei and Liu, Yang and Tang, Wenjian and Yang, Zheng and Cai, Deng and He, Xiaofei},
|
||||
title = {CLRNet: Cross Layer Refinement Network for Lane Detection},
|
||||
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
||||
month = {June},
|
||||
year = {2022},
|
||||
pages = {898-907}
|
||||
}
|
||||
```
|
||||
68
paddle_detection/configs/clrnet/README.md
Normal file
68
paddle_detection/configs/clrnet/README.md
Normal file
@@ -0,0 +1,68 @@
|
||||
English | [简体中文](README_cn.md)
|
||||
|
||||
# CLRNet (CLRNet: Cross Layer Refinement Network for Lane Detection)
|
||||
|
||||
## Table of Contents
|
||||
- [Introduction](#Introduction)
|
||||
- [Model Zoo](#Model_Zoo)
|
||||
- [Citations](#Citations)
|
||||
|
||||
## Introduction
|
||||
|
||||
[CLRNet](https://arxiv.org/abs/2203.10350) is a lane detection model. The CLRNet model is designed with line prior for lane detection, line iou loss as well as nms method, fused to extract contextual high-level features of lane line with low-level features, and refined by FPN multi-scale. Finally, the model achieved SOTA performance in lane detection datasets.
|
||||
|
||||
## Model Zoo
|
||||
|
||||
### CLRNet Results on CULane dataset
|
||||
|
||||
| backbone | mF1 | F1@50 | F1@75 | download | config |
|
||||
| :--------------| :------- | :----: | :------: | :----: |:-----: |
|
||||
| ResNet-18 | 54.98 | 79.46 | 62.10 | [model](https://paddledet.bj.bcebos.com/models/clrnet_resnet18_culane.pdparams) | [config](./clrnet_resnet18_culane.yml) |
|
||||
|
||||
### Download
|
||||
Download [CULane](https://xingangpan.github.io/projects/CULane.html). Then extract them to `dataset/culane`.
|
||||
|
||||
For CULane, you should have structure like this:
|
||||
```shell
|
||||
culane/driver_xx_xxframe # data folders x6
|
||||
culane/laneseg_label_w16 # lane segmentation labels
|
||||
culane/list # data lists
|
||||
```
|
||||
If you use Baidu Cloud, make sure that images in `driver_23_30frame_part1.tar.gz` and `driver_23_30frame_part2.tar.gz` are located in one folder `driver_23_30frame` instead of two seperate folders after you decompress them.
|
||||
|
||||
Now we have uploaded a small subset of CULane dataset to PaddleDetection for code checking. You can simply run the training script below to download it automatically. If you want to implement the results, you need to download the full dataset at th link for training.
|
||||
|
||||
### Training
|
||||
- single GPU
|
||||
```shell
|
||||
python tools/train.py -c configs/clrnet/clr_resnet18_culane.yml
|
||||
```
|
||||
- multi GPU
|
||||
```shell
|
||||
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
||||
python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/clrnet/clr_resnet18_culane.yml
|
||||
```
|
||||
|
||||
### Evaluation
|
||||
```shell
|
||||
python tools/eval.py -c configs/clrnet/clr_resnet18_culane.yml -o weights=output/clr_resnet18_culane/model_final.pdparams
|
||||
```
|
||||
|
||||
### Inference
|
||||
```shell
|
||||
python tools/infer_culane.py -c configs/clrnet/clr_resnet18_culane.yml -o weights=output/clr_resnet18_culane/model_final.pdparams --infer_img=demo/lane00000.jpg
|
||||
```
|
||||
|
||||
Notice: The inference phase does not support static model graph deploy at present.
|
||||
|
||||
## Citations
|
||||
```
|
||||
@InProceedings{Zheng_2022_CVPR,
|
||||
author = {Zheng, Tu and Huang, Yifei and Liu, Yang and Tang, Wenjian and Yang, Zheng and Cai, Deng and He, Xiaofei},
|
||||
title = {CLRNet: Cross Layer Refinement Network for Lane Detection},
|
||||
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
||||
month = {June},
|
||||
year = {2022},
|
||||
pages = {898-907}
|
||||
}
|
||||
```
|
||||
41
paddle_detection/configs/clrnet/_base_/clrnet_r18_fpn.yml
Normal file
41
paddle_detection/configs/clrnet/_base_/clrnet_r18_fpn.yml
Normal file
@@ -0,0 +1,41 @@
|
||||
architecture: CLRNet
|
||||
|
||||
CLRNet:
|
||||
backbone: CLRResNet
|
||||
neck: CLRFPN
|
||||
clr_head: CLRHead
|
||||
|
||||
CLRResNet:
|
||||
resnet: 'resnet18'
|
||||
pretrained: True
|
||||
|
||||
CLRFPN:
|
||||
in_channels: [128,256,512]
|
||||
out_channel: 64
|
||||
extra_stage: 0
|
||||
|
||||
CLRHead:
|
||||
prior_feat_channels: 64
|
||||
fc_hidden_dim: 64
|
||||
num_priors: 192
|
||||
num_fc: 2
|
||||
refine_layers: 3
|
||||
sample_points: 36
|
||||
loss: CLRNetLoss
|
||||
conf_threshold: 0.4
|
||||
nms_thres: 0.8
|
||||
|
||||
CLRNetLoss:
|
||||
cls_loss_weight : 2.0
|
||||
xyt_loss_weight : 0.2
|
||||
iou_loss_weight : 2.0
|
||||
seg_loss_weight : 1.0
|
||||
refine_layers : 3
|
||||
ignore_label: 255
|
||||
bg_weight: 0.4
|
||||
|
||||
# for visualize lane detection results
|
||||
sample_y:
|
||||
start: 589
|
||||
end: 230
|
||||
step: -20
|
||||
37
paddle_detection/configs/clrnet/_base_/clrnet_reader.yml
Normal file
37
paddle_detection/configs/clrnet/_base_/clrnet_reader.yml
Normal file
@@ -0,0 +1,37 @@
|
||||
worker_num: 10
|
||||
|
||||
img_h: &img_h 320
|
||||
img_w: &img_w 800
|
||||
ori_img_h: &ori_img_h 590
|
||||
ori_img_w: &ori_img_w 1640
|
||||
num_points: &num_points 72
|
||||
max_lanes: &max_lanes 4
|
||||
|
||||
TrainReader:
|
||||
batch_size: 24
|
||||
batch_transforms:
|
||||
- CULaneTrainProcess: {img_h: *img_h, img_w: *img_w}
|
||||
- CULaneDataProcess: {num_points: *num_points, max_lanes: *max_lanes, img_w: *img_w, img_h: *img_h}
|
||||
shuffle: True
|
||||
drop_last: False
|
||||
|
||||
|
||||
|
||||
|
||||
EvalReader:
|
||||
batch_size: 24
|
||||
batch_transforms:
|
||||
- CULaneResize: {prob: 1.0, img_h: *img_h, img_w: *img_w}
|
||||
- CULaneDataProcess: {num_points: *num_points, max_lanes: *max_lanes, img_w: *img_w, img_h: *img_h}
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
|
||||
|
||||
|
||||
TestReader:
|
||||
batch_size: 24
|
||||
batch_transforms:
|
||||
- CULaneResize: {prob: 1.0, img_h: *img_h, img_w: *img_w}
|
||||
- CULaneDataProcess: {num_points: *num_points, max_lanes: *max_lanes, img_w: *img_w, img_h: *img_h}
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
14
paddle_detection/configs/clrnet/_base_/optimizer_1x.yml
Normal file
14
paddle_detection/configs/clrnet/_base_/optimizer_1x.yml
Normal file
@@ -0,0 +1,14 @@
|
||||
epoch: 15
|
||||
snapshot_epoch: 5
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.6e-3
|
||||
schedulers:
|
||||
- !CosineDecay
|
||||
max_epochs: 15
|
||||
use_warmup: False
|
||||
|
||||
OptimizerBuilder:
|
||||
regularizer: False
|
||||
optimizer:
|
||||
type: AdamW
|
||||
@@ -0,0 +1,9 @@
|
||||
_BASE_: [
|
||||
'../datasets/culane.yml',
|
||||
'_base_/clrnet_reader.yml',
|
||||
'_base_/clrnet_r18_fpn.yml',
|
||||
'_base_/optimizer_1x.yml',
|
||||
'../runtime.yml'
|
||||
]
|
||||
|
||||
weights: output/clr_resnet18_culane/model_final
|
||||
20
paddle_detection/configs/convnext/README.md
Normal file
20
paddle_detection/configs/convnext/README.md
Normal file
@@ -0,0 +1,20 @@
|
||||
# ConvNeXt (A ConvNet for the 2020s)
|
||||
|
||||
## 模型库
|
||||
### ConvNeXt on COCO
|
||||
|
||||
| 网络网络 | 输入尺寸 | 图片数/GPU | 学习率策略 | mAP<sup>val<br>0.5:0.95 | mAP<sup>val<br>0.5 | Params(M) | FLOPs(G) | 下载链接 | 配置文件 |
|
||||
| :------------- | :------- | :-------: | :------: | :------------: | :---------------------: | :----------------: |:---------: | :------: |:---------------: |
|
||||
| PP-YOLOE-ConvNeXt-tiny | 640 | 16 | 36e | 44.6 | 63.3 | 33.04 | 13.87 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_convnext_tiny_36e_coco.pdparams) | [配置文件](./ppyoloe_convnext_tiny_36e_coco.yml) |
|
||||
| YOLOX-ConvNeXt-s | 640 | 8 | 36e | 44.6 | 65.3 | 36.20 | 27.52 | [下载链接](https://paddledet.bj.bcebos.com/models/yolox_convnext_s_36e_coco.pdparams) | [配置文件](./yolox_convnext_s_36e_coco.yml) |
|
||||
|
||||
|
||||
## Citations
|
||||
```
|
||||
@Article{liu2022convnet,
|
||||
author = {Zhuang Liu and Hanzi Mao and Chao-Yuan Wu and Christoph Feichtenhofer and Trevor Darrell and Saining Xie},
|
||||
title = {A ConvNet for the 2020s},
|
||||
journal = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
||||
year = {2022},
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,55 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'../ppyoloe/_base_/ppyoloe_crn.yml',
|
||||
'../ppyoloe/_base_/ppyoloe_reader.yml',
|
||||
]
|
||||
depth_mult: 0.25
|
||||
width_mult: 0.50
|
||||
|
||||
log_iter: 100
|
||||
snapshot_epoch: 5
|
||||
weights: output/ppyoloe_convnext_tiny_36e_coco/model_final
|
||||
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/convnext_tiny_22k_224.pdparams
|
||||
|
||||
|
||||
YOLOv3:
|
||||
backbone: ConvNeXt
|
||||
neck: CustomCSPPAN
|
||||
yolo_head: PPYOLOEHead
|
||||
post_process: ~
|
||||
|
||||
ConvNeXt:
|
||||
arch: 'tiny'
|
||||
drop_path_rate: 0.4
|
||||
layer_scale_init_value: 1.0
|
||||
return_idx: [1, 2, 3]
|
||||
|
||||
|
||||
PPYOLOEHead:
|
||||
static_assigner_epoch: 12
|
||||
nms:
|
||||
nms_top_k: 1000
|
||||
keep_top_k: 300
|
||||
score_threshold: 0.01
|
||||
nms_threshold: 0.7
|
||||
|
||||
|
||||
TrainReader:
|
||||
batch_size: 16
|
||||
|
||||
|
||||
epoch: 36
|
||||
LearningRate:
|
||||
base_lr: 0.0002
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [36]
|
||||
use_warmup: false
|
||||
|
||||
OptimizerBuilder:
|
||||
regularizer: false
|
||||
optimizer:
|
||||
type: AdamW
|
||||
weight_decay: 0.0005
|
||||
@@ -0,0 +1,58 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'../yolox/_base_/yolox_cspdarknet.yml',
|
||||
'../yolox/_base_/yolox_reader.yml'
|
||||
]
|
||||
depth_mult: 0.33
|
||||
width_mult: 0.50
|
||||
|
||||
log_iter: 100
|
||||
snapshot_epoch: 5
|
||||
weights: output/yolox_convnext_s_36e_coco/model_final
|
||||
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/convnext_tiny_22k_224.pdparams
|
||||
|
||||
|
||||
YOLOX:
|
||||
backbone: ConvNeXt
|
||||
neck: YOLOCSPPAN
|
||||
head: YOLOXHead
|
||||
size_stride: 32
|
||||
size_range: [15, 25] # multi-scale range [480*480 ~ 800*800]
|
||||
|
||||
ConvNeXt:
|
||||
arch: 'tiny'
|
||||
drop_path_rate: 0.4
|
||||
layer_scale_init_value: 1.0
|
||||
return_idx: [1, 2, 3]
|
||||
|
||||
|
||||
TrainReader:
|
||||
batch_size: 8
|
||||
mosaic_epoch: 30
|
||||
|
||||
|
||||
YOLOXHead:
|
||||
l1_epoch: 30
|
||||
nms:
|
||||
name: MultiClassNMS
|
||||
nms_top_k: 10000
|
||||
keep_top_k: 1000
|
||||
score_threshold: 0.001
|
||||
nms_threshold: 0.65
|
||||
|
||||
|
||||
epoch: 36
|
||||
LearningRate:
|
||||
base_lr: 0.0002
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [36]
|
||||
use_warmup: false
|
||||
|
||||
OptimizerBuilder:
|
||||
regularizer: false
|
||||
optimizer:
|
||||
type: AdamW
|
||||
weight_decay: 0.0005
|
||||
21
paddle_detection/configs/datasets/coco_detection.yml
Normal file
21
paddle_detection/configs/datasets/coco_detection.yml
Normal file
@@ -0,0 +1,21 @@
|
||||
metric: COCO
|
||||
num_classes: 80
|
||||
|
||||
TrainDataset:
|
||||
name: COCODataSet
|
||||
image_dir: train2017
|
||||
anno_path: annotations/instances_train2017.json
|
||||
dataset_dir: dataset/coco
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
|
||||
|
||||
EvalDataset:
|
||||
name: COCODataSet
|
||||
image_dir: val2017
|
||||
anno_path: annotations/instances_val2017.json
|
||||
dataset_dir: dataset/coco
|
||||
allow_empty: true
|
||||
|
||||
TestDataset:
|
||||
name: ImageFolder
|
||||
anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
|
||||
dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
|
||||
20
paddle_detection/configs/datasets/coco_instance.yml
Normal file
20
paddle_detection/configs/datasets/coco_instance.yml
Normal file
@@ -0,0 +1,20 @@
|
||||
metric: COCO
|
||||
num_classes: 80
|
||||
|
||||
TrainDataset:
|
||||
name: COCODataSet
|
||||
image_dir: train2017
|
||||
anno_path: annotations/instances_train2017.json
|
||||
dataset_dir: dataset/coco
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
|
||||
|
||||
EvalDataset:
|
||||
name: COCODataSet
|
||||
image_dir: val2017
|
||||
anno_path: annotations/instances_val2017.json
|
||||
dataset_dir: dataset/coco
|
||||
|
||||
TestDataset:
|
||||
name: ImageFolder
|
||||
anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
|
||||
dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
|
||||
28
paddle_detection/configs/datasets/culane.yml
Normal file
28
paddle_detection/configs/datasets/culane.yml
Normal file
@@ -0,0 +1,28 @@
|
||||
metric: CULaneMetric
|
||||
num_classes: 5 # 4 lanes + background
|
||||
|
||||
cut_height: &cut_height 270
|
||||
dataset_dir: &dataset_dir dataset/culane
|
||||
|
||||
TrainDataset:
|
||||
name: CULaneDataSet
|
||||
dataset_dir: *dataset_dir
|
||||
list_path: 'list/train_gt.txt'
|
||||
split: train
|
||||
cut_height: *cut_height
|
||||
|
||||
|
||||
EvalDataset:
|
||||
name: CULaneDataSet
|
||||
dataset_dir: *dataset_dir
|
||||
list_path: 'list/test.txt'
|
||||
split: test
|
||||
cut_height: *cut_height
|
||||
|
||||
|
||||
TestDataset:
|
||||
name: CULaneDataSet
|
||||
dataset_dir: *dataset_dir
|
||||
list_path: 'list/test.txt'
|
||||
split: test
|
||||
cut_height: *cut_height
|
||||
21
paddle_detection/configs/datasets/dota.yml
Normal file
21
paddle_detection/configs/datasets/dota.yml
Normal file
@@ -0,0 +1,21 @@
|
||||
metric: RBOX
|
||||
num_classes: 15
|
||||
|
||||
TrainDataset:
|
||||
!COCODataSet
|
||||
image_dir: trainval1024/images
|
||||
anno_path: trainval1024/DOTA_trainval1024.json
|
||||
dataset_dir: dataset/dota/
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
|
||||
|
||||
EvalDataset:
|
||||
!COCODataSet
|
||||
image_dir: trainval1024/images
|
||||
anno_path: trainval1024/DOTA_trainval1024.json
|
||||
dataset_dir: dataset/dota/
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
|
||||
|
||||
TestDataset:
|
||||
!ImageFolder
|
||||
anno_path: test1024/DOTA_test1024.json
|
||||
dataset_dir: dataset/dota/
|
||||
21
paddle_detection/configs/datasets/dota_ms.yml
Normal file
21
paddle_detection/configs/datasets/dota_ms.yml
Normal file
@@ -0,0 +1,21 @@
|
||||
metric: RBOX
|
||||
num_classes: 15
|
||||
|
||||
TrainDataset:
|
||||
!COCODataSet
|
||||
image_dir: trainval1024/images
|
||||
anno_path: trainval1024/DOTA_trainval1024.json
|
||||
dataset_dir: dataset/dota_ms/
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
|
||||
|
||||
EvalDataset:
|
||||
!COCODataSet
|
||||
image_dir: trainval1024/images
|
||||
anno_path: trainval1024/DOTA_trainval1024.json
|
||||
dataset_dir: dataset/dota_ms/
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
|
||||
|
||||
TestDataset:
|
||||
!ImageFolder
|
||||
anno_path: test1024/DOTA_test1024.json
|
||||
dataset_dir: dataset/dota_ms/
|
||||
25
paddle_detection/configs/datasets/mcmot.yml
Normal file
25
paddle_detection/configs/datasets/mcmot.yml
Normal file
@@ -0,0 +1,25 @@
|
||||
metric: MCMOT
|
||||
num_classes: 10
|
||||
# using VisDrone2019 MOT dataset with 10 classes as default, you can modify it for your needs.
|
||||
|
||||
# for MCMOT training
|
||||
TrainDataset:
|
||||
!MCMOTDataSet
|
||||
dataset_dir: dataset/mot
|
||||
image_lists: ['visdrone_mcmot.train']
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide']
|
||||
label_list: label_list.txt
|
||||
|
||||
# for MCMOT evaluation
|
||||
# If you want to change the MCMOT evaluation dataset, please modify 'data_root'
|
||||
EvalMOTDataset:
|
||||
!MOTImageFolder
|
||||
dataset_dir: dataset/mot
|
||||
data_root: visdrone_mcmot/images/val
|
||||
keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT
|
||||
|
||||
# for MCMOT video inference
|
||||
TestMOTDataset:
|
||||
!MOTImageFolder
|
||||
dataset_dir: dataset/mot
|
||||
keep_ori_im: True # set True if save visualization images or video
|
||||
23
paddle_detection/configs/datasets/mot.yml
Normal file
23
paddle_detection/configs/datasets/mot.yml
Normal file
@@ -0,0 +1,23 @@
|
||||
metric: MOT
|
||||
num_classes: 1
|
||||
|
||||
# for MOT training
|
||||
TrainDataset:
|
||||
!MOTDataSet
|
||||
dataset_dir: dataset/mot
|
||||
image_lists: ['mot17.train', 'caltech.all', 'cuhksysu.train', 'prw.train', 'citypersons.train', 'eth.train']
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide']
|
||||
|
||||
# for MOT evaluation
|
||||
# If you want to change the MOT evaluation dataset, please modify 'data_root'
|
||||
EvalMOTDataset:
|
||||
!MOTImageFolder
|
||||
dataset_dir: dataset/mot
|
||||
data_root: MOT16/images/train
|
||||
keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT
|
||||
|
||||
# for MOT video inference
|
||||
TestMOTDataset:
|
||||
!MOTImageFolder
|
||||
dataset_dir: dataset/mot
|
||||
keep_ori_im: True # set True if save visualization images or video
|
||||
21
paddle_detection/configs/datasets/objects365_detection.yml
Normal file
21
paddle_detection/configs/datasets/objects365_detection.yml
Normal file
@@ -0,0 +1,21 @@
|
||||
metric: COCO
|
||||
num_classes: 365
|
||||
|
||||
TrainDataset:
|
||||
!COCODataSet
|
||||
image_dir: train
|
||||
anno_path: annotations/zhiyuan_objv2_train.json
|
||||
dataset_dir: dataset/objects365
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
|
||||
|
||||
EvalDataset:
|
||||
!COCODataSet
|
||||
image_dir: val
|
||||
anno_path: annotations/zhiyuan_objv2_val.json
|
||||
dataset_dir: dataset/objects365
|
||||
allow_empty: true
|
||||
|
||||
TestDataset:
|
||||
!ImageFolder
|
||||
anno_path: annotations/zhiyuan_objv2_val.json
|
||||
dataset_dir: dataset/objects365/
|
||||
21
paddle_detection/configs/datasets/roadsign_voc.yml
Normal file
21
paddle_detection/configs/datasets/roadsign_voc.yml
Normal file
@@ -0,0 +1,21 @@
|
||||
metric: VOC
|
||||
map_type: integral
|
||||
num_classes: 4
|
||||
|
||||
TrainDataset:
|
||||
name: VOCDataSet
|
||||
dataset_dir: dataset/roadsign_voc
|
||||
anno_path: train.txt
|
||||
label_list: label_list.txt
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
|
||||
|
||||
EvalDataset:
|
||||
name: VOCDataSet
|
||||
dataset_dir: dataset/roadsign_voc
|
||||
anno_path: valid.txt
|
||||
label_list: label_list.txt
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
|
||||
|
||||
TestDataset:
|
||||
name: ImageFolder
|
||||
anno_path: dataset/roadsign_voc/label_list.txt
|
||||
47
paddle_detection/configs/datasets/sniper_coco_detection.yml
Normal file
47
paddle_detection/configs/datasets/sniper_coco_detection.yml
Normal file
@@ -0,0 +1,47 @@
|
||||
metric: SNIPERCOCO
|
||||
num_classes: 80
|
||||
|
||||
TrainDataset:
|
||||
!SniperCOCODataSet
|
||||
image_dir: train2017
|
||||
anno_path: annotations/instances_train2017.json
|
||||
dataset_dir: dataset/coco
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
|
||||
allow_empty: true
|
||||
is_trainset: true
|
||||
image_target_sizes: [2000, 1000]
|
||||
valid_box_ratio_ranges: [[-1, 0.1],[0.08, -1]]
|
||||
chip_target_size: 512
|
||||
chip_target_stride: 200
|
||||
use_neg_chip: false
|
||||
max_neg_num_per_im: 8
|
||||
|
||||
|
||||
EvalDataset:
|
||||
!SniperCOCODataSet
|
||||
image_dir: val2017
|
||||
anno_path: annotations/instances_val2017.json
|
||||
dataset_dir: dataset/coco
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
|
||||
allow_empty: true
|
||||
is_trainset: false
|
||||
image_target_sizes: [2000, 1000]
|
||||
valid_box_ratio_ranges: [[-1, 0.1], [0.08, -1]]
|
||||
chip_target_size: 512
|
||||
chip_target_stride: 200
|
||||
max_per_img: -1
|
||||
nms_thresh: 0.5
|
||||
|
||||
TestDataset:
|
||||
!SniperCOCODataSet
|
||||
image_dir: val2017
|
||||
dataset_dir: dataset/coco
|
||||
is_trainset: false
|
||||
image_target_sizes: [2000, 1000]
|
||||
valid_box_ratio_ranges: [[-1, 0.1],[0.08, -1]]
|
||||
chip_target_size: 500
|
||||
chip_target_stride: 200
|
||||
max_per_img: -1
|
||||
nms_thresh: 0.5
|
||||
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
metric: SNIPERCOCO
|
||||
num_classes: 9
|
||||
|
||||
TrainDataset:
|
||||
!SniperCOCODataSet
|
||||
image_dir: train
|
||||
anno_path: annotations/train.json
|
||||
dataset_dir: dataset/VisDrone2019_coco
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
|
||||
allow_empty: true
|
||||
is_trainset: true
|
||||
image_target_sizes: [8145, 2742]
|
||||
valid_box_ratio_ranges: [[-1, 0.03142857142857144], [0.02333211853008726, -1]]
|
||||
chip_target_size: 1536
|
||||
chip_target_stride: 1184
|
||||
use_neg_chip: false
|
||||
max_neg_num_per_im: 8
|
||||
|
||||
|
||||
EvalDataset:
|
||||
!SniperCOCODataSet
|
||||
image_dir: val
|
||||
anno_path: annotations/val.json
|
||||
dataset_dir: dataset/VisDrone2019_coco
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
|
||||
allow_empty: true
|
||||
is_trainset: false
|
||||
image_target_sizes: [8145, 2742]
|
||||
valid_box_ratio_ranges: [[-1, 0.03142857142857144], [0.02333211853008726, -1]]
|
||||
chip_target_size: 1536
|
||||
chip_target_stride: 1184
|
||||
max_per_img: -1
|
||||
nms_thresh: 0.5
|
||||
|
||||
TestDataset:
|
||||
!SniperCOCODataSet
|
||||
image_dir: val
|
||||
dataset_dir: dataset/VisDrone2019_coco
|
||||
is_trainset: false
|
||||
image_target_sizes: [8145, 2742]
|
||||
valid_box_ratio_ranges: [[-1, 0.03142857142857144], [0.02333211853008726, -1]]
|
||||
chip_target_size: 1536
|
||||
chip_target_stride: 1184
|
||||
max_per_img: -1
|
||||
nms_thresh: 0.5
|
||||
|
||||
|
||||
21
paddle_detection/configs/datasets/spine_coco.yml
Normal file
21
paddle_detection/configs/datasets/spine_coco.yml
Normal file
@@ -0,0 +1,21 @@
|
||||
metric: RBOX
|
||||
num_classes: 9
|
||||
|
||||
TrainDataset:
|
||||
!COCODataSet
|
||||
image_dir: images
|
||||
anno_path: annotations/train.json
|
||||
dataset_dir: dataset/spine_coco
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
|
||||
|
||||
EvalDataset:
|
||||
!COCODataSet
|
||||
image_dir: images
|
||||
anno_path: annotations/valid.json
|
||||
dataset_dir: dataset/spine_coco
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
|
||||
|
||||
TestDataset:
|
||||
!ImageFolder
|
||||
anno_path: annotations/valid.json
|
||||
dataset_dir: dataset/spine_coco
|
||||
22
paddle_detection/configs/datasets/visdrone_detection.yml
Normal file
22
paddle_detection/configs/datasets/visdrone_detection.yml
Normal file
@@ -0,0 +1,22 @@
|
||||
metric: COCO
|
||||
num_classes: 10
|
||||
|
||||
TrainDataset:
|
||||
!COCODataSet
|
||||
image_dir: VisDrone2019-DET-train
|
||||
anno_path: train.json
|
||||
dataset_dir: dataset/visdrone
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
|
||||
|
||||
EvalDataset:
|
||||
!COCODataSet
|
||||
image_dir: VisDrone2019-DET-val
|
||||
anno_path: val.json
|
||||
# image_dir: test_dev
|
||||
# anno_path: test_dev.json
|
||||
dataset_dir: dataset/visdrone
|
||||
|
||||
TestDataset:
|
||||
!ImageFolder
|
||||
anno_path: val.json
|
||||
dataset_dir: dataset/visdrone
|
||||
21
paddle_detection/configs/datasets/voc.yml
Normal file
21
paddle_detection/configs/datasets/voc.yml
Normal file
@@ -0,0 +1,21 @@
|
||||
metric: VOC
|
||||
map_type: 11point
|
||||
num_classes: 20
|
||||
|
||||
TrainDataset:
|
||||
name: VOCDataSet
|
||||
dataset_dir: dataset/voc
|
||||
anno_path: trainval.txt
|
||||
label_list: label_list.txt
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
|
||||
|
||||
EvalDataset:
|
||||
name: VOCDataSet
|
||||
dataset_dir: dataset/voc
|
||||
anno_path: test.txt
|
||||
label_list: label_list.txt
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
|
||||
|
||||
TestDataset:
|
||||
name: ImageFolder
|
||||
anno_path: dataset/voc/label_list.txt
|
||||
20
paddle_detection/configs/datasets/wider_face.yml
Normal file
20
paddle_detection/configs/datasets/wider_face.yml
Normal file
@@ -0,0 +1,20 @@
|
||||
metric: WiderFace
|
||||
num_classes: 1
|
||||
|
||||
TrainDataset:
|
||||
!WIDERFaceDataSet
|
||||
dataset_dir: dataset/wider_face
|
||||
anno_path: wider_face_split/wider_face_train_bbx_gt.txt
|
||||
image_dir: WIDER_train/images
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class']
|
||||
|
||||
EvalDataset:
|
||||
!WIDERFaceDataSet
|
||||
dataset_dir: dataset/wider_face
|
||||
anno_path: wider_face_split/wider_face_val_bbx_gt.txt
|
||||
image_dir: WIDER_val/images
|
||||
data_fields: ['image']
|
||||
|
||||
TestDataset:
|
||||
!ImageFolder
|
||||
use_default_label: true
|
||||
37
paddle_detection/configs/dcn/README.md
Normal file
37
paddle_detection/configs/dcn/README.md
Normal file
@@ -0,0 +1,37 @@
|
||||
### Deformable ConvNets v2
|
||||
|
||||
| 骨架网络 | 网络类型 | 卷积 | 每张GPU图片个数 | 学习率策略 |推理时间(fps)| Box AP | Mask AP | 下载 | 配置文件 |
|
||||
| :------------------- | :------------- | :-----: |:--------: | :-----: | :-----------: |:----: | :-----: | :----------------------------------------------------------: | :----: |
|
||||
| ResNet50-FPN | Faster | c3-c5 | 1 | 1x | - | 42.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_r50_fpn_1x_coco.yml) |
|
||||
| ResNet50-vd-FPN | Faster | c3-c5 | 1 | 1x | - | 42.7 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r50_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_1x_coco.yml) |
|
||||
| ResNet50-vd-FPN | Faster | c3-c5 | 1 | 2x | - | 43.7 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x_coco.yml) |
|
||||
| ResNet101-vd-FPN | Faster | c3-c5 | 1 | 1x | - | 45.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x_coco.yml) |
|
||||
| ResNeXt101-vd-FPN | Faster | c3-c5 | 1 | 1x | - | 46.5 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.pdparams) |[配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml) |
|
||||
| ResNet50-FPN | Mask | c3-c5 | 1 | 1x | - | 42.7 | 38.4 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/mask_rcnn_dcn_r50_fpn_1x_coco.yml) |
|
||||
| ResNet50-vd-FPN | Mask | c3-c5 | 1 | 2x | - | 44.6 | 39.8 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x_coco.yml) |
|
||||
| ResNet101-vd-FPN | Mask | c3-c5 | 1 | 1x | - | 45.6 | 40.6 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x_coco.yml) |
|
||||
| ResNeXt101-vd-FPN | Mask | c3-c5 | 1 | 1x | - | 47.3 | 42.0 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml) |
|
||||
| ResNet50-FPN | Cascade Faster | c3-c5 | 1 | 1x | - | 42.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x_coco.yml) |
|
||||
| ResNeXt101-vd-FPN | Cascade Faster | c3-c5 | 1 | 1x | - | 48.8 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml) |
|
||||
|
||||
|
||||
**注意事项:**
|
||||
|
||||
- Deformable卷积网络v2(dcn_v2)参考自论文[Deformable ConvNets v2](https://arxiv.org/abs/1811.11168).
|
||||
- `c3-c5`意思是在resnet模块的3到5阶段增加`dcn`.
|
||||
|
||||
## Citations
|
||||
```
|
||||
@inproceedings{dai2017deformable,
|
||||
title={Deformable Convolutional Networks},
|
||||
author={Dai, Jifeng and Qi, Haozhi and Xiong, Yuwen and Li, Yi and Zhang, Guodong and Hu, Han and Wei, Yichen},
|
||||
booktitle={Proceedings of the IEEE international conference on computer vision},
|
||||
year={2017}
|
||||
}
|
||||
@article{zhu2018deformable,
|
||||
title={Deformable ConvNets v2: More Deformable, Better Results},
|
||||
author={Zhu, Xizhou and Hu, Han and Lin, Stephen and Dai, Jifeng},
|
||||
journal={arXiv preprint arXiv:1811.11168},
|
||||
year={2018}
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,16 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'../cascade_rcnn/_base_/optimizer_1x.yml',
|
||||
'../cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml',
|
||||
'../cascade_rcnn/_base_/cascade_fpn_reader.yml',
|
||||
]
|
||||
weights: output/cascade_rcnn_dcn_r50_fpn_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
depth: 50
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
@@ -0,0 +1,16 @@
|
||||
_BASE_: [
|
||||
'cascade_rcnn_dcn_r50_fpn_1x_coco.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
|
||||
weights: output/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
depth: 101
|
||||
groups: 64
|
||||
base_width: 4
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
@@ -0,0 +1,15 @@
|
||||
_BASE_: [
|
||||
'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
|
||||
weights: output/faster_rcnn_dcn_r101_vd_fpn_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 101
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
@@ -0,0 +1,16 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'../faster_rcnn/_base_/optimizer_1x.yml',
|
||||
'../faster_rcnn/_base_/faster_rcnn_r50_fpn.yml',
|
||||
'../faster_rcnn/_base_/faster_fpn_reader.yml',
|
||||
]
|
||||
weights: output/faster_rcnn_dcn_r50_fpn_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
depth: 50
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
@@ -0,0 +1,15 @@
|
||||
_BASE_: [
|
||||
'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
|
||||
weights: output/faster_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
@@ -0,0 +1,26 @@
|
||||
_BASE_: [
|
||||
'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
|
||||
weights: output/faster_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
|
||||
epoch: 24
|
||||
LearningRate:
|
||||
base_lr: 0.01
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [16, 22]
|
||||
- !LinearWarmup
|
||||
start_factor: 0.1
|
||||
steps: 1000
|
||||
@@ -0,0 +1,17 @@
|
||||
_BASE_: [
|
||||
'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
|
||||
weights: output/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
# for ResNeXt: groups, base_width, base_channels
|
||||
depth: 101
|
||||
groups: 64
|
||||
base_width: 4
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
@@ -0,0 +1,15 @@
|
||||
_BASE_: [
|
||||
'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
|
||||
weights: output/mask_rcnn_dcn_r101_vd_fpn_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 101
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
@@ -0,0 +1,16 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_instance.yml',
|
||||
'../runtime.yml',
|
||||
'../mask_rcnn/_base_/optimizer_1x.yml',
|
||||
'../mask_rcnn/_base_/mask_rcnn_r50_fpn.yml',
|
||||
'../mask_rcnn/_base_/mask_fpn_reader.yml',
|
||||
]
|
||||
weights: output/mask_rcnn_dcn_r50_fpn_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
depth: 50
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
@@ -0,0 +1,26 @@
|
||||
_BASE_: [
|
||||
'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
|
||||
weights: output/mask_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
|
||||
epoch: 24
|
||||
LearningRate:
|
||||
base_lr: 0.01
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [16, 22]
|
||||
- !LinearWarmup
|
||||
start_factor: 0.1
|
||||
steps: 1000
|
||||
@@ -0,0 +1,17 @@
|
||||
_BASE_: [
|
||||
'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
|
||||
weights: output/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
# for ResNeXt: groups, base_width, base_channels
|
||||
depth: 101
|
||||
variant: d
|
||||
groups: 64
|
||||
base_width: 4
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
36
paddle_detection/configs/deformable_detr/README.md
Normal file
36
paddle_detection/configs/deformable_detr/README.md
Normal file
@@ -0,0 +1,36 @@
|
||||
# Deformable DETR
|
||||
|
||||
## Introduction
|
||||
|
||||
|
||||
Deformable DETR is an object detection model based on DETR. We reproduced the model of the paper.
|
||||
|
||||
|
||||
## Model Zoo
|
||||
|
||||
| Backbone | Model | Images/GPU | Epochs | Box AP | Config | Log | Download |
|
||||
|:--------:|:---------------:|:----------:|:------:|:------:|:------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------:|
|
||||
| R-50 | Deformable DETR | 2 | 50 | 44.5 | [config](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/deformable_detr/deformable_detr_r50_1x_coco.yml) | [log](https://bj.bcebos.com/v1/paddledet/logs/deformable_detr_r50_1x_coco_44.5.log) | [model](https://paddledet.bj.bcebos.com/models/deformable_detr_r50_1x_coco.pdparams) |
|
||||
|
||||
**Notes:**
|
||||
|
||||
- Deformable DETR is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
|
||||
- Deformable DETR uses 8GPU to train 50 epochs.
|
||||
|
||||
GPU multi-card training
|
||||
```bash
|
||||
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
|
||||
python -m paddle.distributed.launch --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/deformable_detr/deformable_detr_r50_1x_coco.yml --fleet
|
||||
```
|
||||
|
||||
## Citations
|
||||
```
|
||||
@inproceedings{
|
||||
zhu2021deformable,
|
||||
title={Deformable DETR: Deformable Transformers for End-to-End Object Detection},
|
||||
author={Xizhou Zhu and Weijie Su and Lewei Lu and Bin Li and Xiaogang Wang and Jifeng Dai},
|
||||
booktitle={International Conference on Learning Representations},
|
||||
year={2021},
|
||||
url={https://openreview.net/forum?id=gZ9hCDWe6ke}
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,48 @@
|
||||
architecture: DETR
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vb_normal_pretrained.pdparams
|
||||
hidden_dim: 256
|
||||
use_focal_loss: True
|
||||
|
||||
|
||||
DETR:
|
||||
backbone: ResNet
|
||||
transformer: DeformableTransformer
|
||||
detr_head: DeformableDETRHead
|
||||
post_process: DETRPostProcess
|
||||
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [1, 2, 3]
|
||||
lr_mult_list: [0.0, 0.1, 0.1, 0.1]
|
||||
num_stages: 4
|
||||
|
||||
|
||||
DeformableTransformer:
|
||||
num_queries: 300
|
||||
position_embed_type: sine
|
||||
nhead: 8
|
||||
num_encoder_layers: 6
|
||||
num_decoder_layers: 6
|
||||
dim_feedforward: 1024
|
||||
dropout: 0.1
|
||||
activation: relu
|
||||
num_feature_levels: 4
|
||||
num_encoder_points: 4
|
||||
num_decoder_points: 4
|
||||
|
||||
|
||||
DeformableDETRHead:
|
||||
num_mlp_layers: 3
|
||||
|
||||
|
||||
DETRLoss:
|
||||
loss_coeff: {class: 2, bbox: 5, giou: 2}
|
||||
aux_loss: True
|
||||
|
||||
|
||||
HungarianMatcher:
|
||||
matcher_coeff: {class: 2, bbox: 5, giou: 2}
|
||||
@@ -0,0 +1,44 @@
|
||||
worker_num: 2
|
||||
TrainReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- RandomFlip: {prob: 0.5}
|
||||
- RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
|
||||
transforms2: [
|
||||
RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
|
||||
RandomSizeCrop: { min_size: 384, max_size: 600 },
|
||||
RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
|
||||
}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- NormalizeBox: {}
|
||||
- BboxXYXY2XYWH: {}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
|
||||
batch_size: 2
|
||||
shuffle: true
|
||||
drop_last: true
|
||||
collate_batch: false
|
||||
use_shared_memory: false
|
||||
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
@@ -0,0 +1,16 @@
|
||||
epoch: 50
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.0002
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [40]
|
||||
use_warmup: false
|
||||
|
||||
OptimizerBuilder:
|
||||
clip_grad_by_norm: 0.1
|
||||
regularizer: false
|
||||
optimizer:
|
||||
type: AdamW
|
||||
weight_decay: 0.0001
|
||||
@@ -0,0 +1,9 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/deformable_optimizer_1x.yml',
|
||||
'_base_/deformable_detr_r50.yml',
|
||||
'_base_/deformable_detr_reader.yml',
|
||||
]
|
||||
weights: output/deformable_detr_r50_1x_coco/model_final
|
||||
find_unused_parameters: True
|
||||
39
paddle_detection/configs/detr/README.md
Normal file
39
paddle_detection/configs/detr/README.md
Normal file
@@ -0,0 +1,39 @@
|
||||
# DETR
|
||||
|
||||
## Introduction
|
||||
|
||||
|
||||
DETR is an object detection model based on transformer. We reproduced the model of the paper.
|
||||
|
||||
|
||||
## Model Zoo
|
||||
|
||||
| Backbone | Model | Images/GPU | Inf time (fps) | Box AP | Config | Download |
|
||||
|:------:|:--------:|:--------:|:--------------:|:------:|:------:|:--------:|
|
||||
| R-50 | DETR | 4 | --- | 42.3 | [config](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/detr/detr_r50_1x_coco.yml) | [model](https://paddledet.bj.bcebos.com/models/detr_r50_1x_coco.pdparams) |
|
||||
|
||||
**Notes:**
|
||||
|
||||
- DETR is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
|
||||
- DETR uses 8GPU to train 500 epochs.
|
||||
|
||||
GPU multi-card training
|
||||
```bash
|
||||
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
|
||||
python -m paddle.distributed.launch --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/detr/detr_r50_1x_coco.yml --fleet
|
||||
```
|
||||
|
||||
## Citations
|
||||
```
|
||||
@inproceedings{detr,
|
||||
author = {Nicolas Carion and
|
||||
Francisco Massa and
|
||||
Gabriel Synnaeve and
|
||||
Nicolas Usunier and
|
||||
Alexander Kirillov and
|
||||
Sergey Zagoruyko},
|
||||
title = {End-to-End Object Detection with Transformers},
|
||||
booktitle = {ECCV},
|
||||
year = {2020}
|
||||
}
|
||||
```
|
||||
44
paddle_detection/configs/detr/_base_/detr_r50.yml
Normal file
44
paddle_detection/configs/detr/_base_/detr_r50.yml
Normal file
@@ -0,0 +1,44 @@
|
||||
architecture: DETR
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vb_normal_pretrained.pdparams
|
||||
hidden_dim: 256
|
||||
|
||||
|
||||
DETR:
|
||||
backbone: ResNet
|
||||
transformer: DETRTransformer
|
||||
detr_head: DETRHead
|
||||
post_process: DETRPostProcess
|
||||
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [3]
|
||||
lr_mult_list: [0.0, 0.1, 0.1, 0.1]
|
||||
num_stages: 4
|
||||
|
||||
|
||||
DETRTransformer:
|
||||
num_queries: 100
|
||||
position_embed_type: sine
|
||||
nhead: 8
|
||||
num_encoder_layers: 6
|
||||
num_decoder_layers: 6
|
||||
dim_feedforward: 2048
|
||||
dropout: 0.1
|
||||
activation: relu
|
||||
|
||||
|
||||
DETRHead:
|
||||
num_mlp_layers: 3
|
||||
|
||||
|
||||
DETRLoss:
|
||||
loss_coeff: {class: 1, bbox: 5, giou: 2, no_object: 0.1}
|
||||
aux_loss: True
|
||||
|
||||
|
||||
HungarianMatcher:
|
||||
matcher_coeff: {class: 1, bbox: 5, giou: 2}
|
||||
44
paddle_detection/configs/detr/_base_/detr_reader.yml
Normal file
44
paddle_detection/configs/detr/_base_/detr_reader.yml
Normal file
@@ -0,0 +1,44 @@
|
||||
worker_num: 0
|
||||
TrainReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- RandomFlip: {prob: 0.5}
|
||||
- RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
|
||||
transforms2: [
|
||||
RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
|
||||
RandomSizeCrop: { min_size: 384, max_size: 600 },
|
||||
RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
|
||||
}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- NormalizeBox: {}
|
||||
- BboxXYXY2XYWH: {}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
|
||||
batch_size: 2
|
||||
shuffle: true
|
||||
drop_last: true
|
||||
collate_batch: false
|
||||
use_shared_memory: false
|
||||
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
16
paddle_detection/configs/detr/_base_/optimizer_1x.yml
Normal file
16
paddle_detection/configs/detr/_base_/optimizer_1x.yml
Normal file
@@ -0,0 +1,16 @@
|
||||
epoch: 500
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.0001
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [400]
|
||||
use_warmup: false
|
||||
|
||||
OptimizerBuilder:
|
||||
clip_grad_by_norm: 0.1
|
||||
regularizer: false
|
||||
optimizer:
|
||||
type: AdamW
|
||||
weight_decay: 0.0001
|
||||
9
paddle_detection/configs/detr/detr_r50_1x_coco.yml
Normal file
9
paddle_detection/configs/detr/detr_r50_1x_coco.yml
Normal file
@@ -0,0 +1,9 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_1x.yml',
|
||||
'_base_/detr_r50.yml',
|
||||
'_base_/detr_reader.yml',
|
||||
]
|
||||
weights: output/detr_r50_1x_coco/model_final
|
||||
find_unused_parameters: True
|
||||
39
paddle_detection/configs/dino/README.md
Normal file
39
paddle_detection/configs/dino/README.md
Normal file
@@ -0,0 +1,39 @@
|
||||
# DINO: DETR with Improved DeNoising Anchor Boxes for End-to-End Object Detection
|
||||
|
||||
## Introduction
|
||||
|
||||
|
||||
[DINO](https://arxiv.org/abs/2203.03605) is an object detection model based on DETR. We reproduced the model of the paper.
|
||||
|
||||
|
||||
## Model Zoo
|
||||
|
||||
| Backbone | Model | Epochs | Box AP | Config | Log | Download |
|
||||
|:------:|:---------------:|:------:|:------:|:---------------------------------------:|:-------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------:|
|
||||
| R-50 | dino_r50_4scale | 12 | 49.5 | [config](./dino_r50_4scale_1x_coco.yml) | [log](https://bj.bcebos.com/v1/paddledet/logs/dino_r50_4scale_1x_coco_49.5.log) | [model](https://paddledet.bj.bcebos.com/models/dino_r50_4scale_1x_coco.pdparams) |
|
||||
| R-50 | dino_r50_4scale | 24 | 50.8 | [config](./dino_r50_4scale_2x_coco.yml) | [log](https://bj.bcebos.com/v1/paddledet/logs/dino_r50_4scale_2x_coco_50.8.log) | [model](https://paddledet.bj.bcebos.com/models/dino_r50_4scale_2x_coco.pdparams) |
|
||||
|
||||
**Notes:**
|
||||
|
||||
- DINO is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
|
||||
- DINO uses 4GPU to train.
|
||||
|
||||
GPU multi-card training
|
||||
```bash
|
||||
python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/dino/dino_r50_4scale_1x_coco.yml --fleet --eval
|
||||
```
|
||||
|
||||
## Custom Operator
|
||||
- Multi-scale deformable attention custom operator see [here](../../ppdet/modeling/transformers/ext_op).
|
||||
|
||||
## Citations
|
||||
```
|
||||
@misc{zhang2022dino,
|
||||
title={DINO: DETR with Improved DeNoising Anchor Boxes for End-to-End Object Detection},
|
||||
author={Hao Zhang and Feng Li and Shilong Liu and Lei Zhang and Hang Su and Jun Zhu and Lionel M. Ni and Heung-Yeung Shum},
|
||||
year={2022},
|
||||
eprint={2203.03605},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CV}
|
||||
}
|
||||
```
|
||||
45
paddle_detection/configs/dino/_base_/dino_focalnet.yml
Normal file
45
paddle_detection/configs/dino/_base_/dino_focalnet.yml
Normal file
@@ -0,0 +1,45 @@
|
||||
architecture: DETR
|
||||
# pretrain_weights: # rewrite in FocalNet.pretrained in ppdet/modeling/backbones/focalnet.py
|
||||
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_large_lrf_384_fl4_pretrained.pdparams
|
||||
hidden_dim: 256
|
||||
use_focal_loss: True
|
||||
|
||||
DETR:
|
||||
backbone: FocalNet
|
||||
transformer: DINOTransformer
|
||||
detr_head: DINOHead
|
||||
post_process: DETRPostProcess
|
||||
|
||||
FocalNet:
|
||||
arch: 'focalnet_L_384_22k_fl4'
|
||||
out_indices: [1, 2, 3]
|
||||
pretrained: https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_large_lrf_384_fl4_pretrained.pdparams
|
||||
|
||||
DINOTransformer:
|
||||
num_queries: 900
|
||||
position_embed_type: sine
|
||||
num_levels: 4
|
||||
nhead: 8
|
||||
num_encoder_layers: 6
|
||||
num_decoder_layers: 6
|
||||
dim_feedforward: 2048
|
||||
dropout: 0.0
|
||||
activation: relu
|
||||
pe_temperature: 20
|
||||
pe_offset: 0.0
|
||||
num_denoising: 100
|
||||
label_noise_ratio: 0.5
|
||||
box_noise_scale: 1.0
|
||||
learnt_init_query: True
|
||||
|
||||
DINOHead:
|
||||
loss:
|
||||
name: DINOLoss
|
||||
loss_coeff: {class: 1, bbox: 5, giou: 2}
|
||||
aux_loss: True
|
||||
matcher:
|
||||
name: HungarianMatcher
|
||||
matcher_coeff: {class: 2, bbox: 5, giou: 2}
|
||||
|
||||
DETRPostProcess:
|
||||
num_top_queries: 300
|
||||
49
paddle_detection/configs/dino/_base_/dino_r50.yml
Normal file
49
paddle_detection/configs/dino/_base_/dino_r50.yml
Normal file
@@ -0,0 +1,49 @@
|
||||
architecture: DETR
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
|
||||
hidden_dim: 256
|
||||
use_focal_loss: True
|
||||
|
||||
|
||||
DETR:
|
||||
backbone: ResNet
|
||||
transformer: DINOTransformer
|
||||
detr_head: DINOHead
|
||||
post_process: DETRPostProcess
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [1, 2, 3]
|
||||
lr_mult_list: [0.0, 0.1, 0.1, 0.1]
|
||||
num_stages: 4
|
||||
|
||||
DINOTransformer:
|
||||
num_queries: 900
|
||||
position_embed_type: sine
|
||||
num_levels: 4
|
||||
nhead: 8
|
||||
num_encoder_layers: 6
|
||||
num_decoder_layers: 6
|
||||
dim_feedforward: 2048
|
||||
dropout: 0.0
|
||||
activation: relu
|
||||
pe_temperature: 20
|
||||
pe_offset: 0.0
|
||||
num_denoising: 100
|
||||
label_noise_ratio: 0.5
|
||||
box_noise_scale: 1.0
|
||||
learnt_init_query: True
|
||||
|
||||
DINOHead:
|
||||
loss:
|
||||
name: DINOLoss
|
||||
loss_coeff: {class: 1, bbox: 5, giou: 2}
|
||||
aux_loss: True
|
||||
matcher:
|
||||
name: HungarianMatcher
|
||||
matcher_coeff: {class: 2, bbox: 5, giou: 2}
|
||||
|
||||
DETRPostProcess:
|
||||
num_top_queries: 300
|
||||
40
paddle_detection/configs/dino/_base_/dino_reader.yml
Normal file
40
paddle_detection/configs/dino/_base_/dino_reader.yml
Normal file
@@ -0,0 +1,40 @@
|
||||
worker_num: 4
|
||||
TrainReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- RandomFlip: {prob: 0.5}
|
||||
- RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
|
||||
transforms2: [
|
||||
RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
|
||||
RandomSizeCrop: { min_size: 384, max_size: 600 },
|
||||
RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
|
||||
}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
|
||||
- NormalizeBox: {}
|
||||
- BboxXYXY2XYWH: {}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
|
||||
batch_size: 4
|
||||
shuffle: true
|
||||
drop_last: true
|
||||
collate_batch: false
|
||||
use_shared_memory: false
|
||||
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
46
paddle_detection/configs/dino/_base_/dino_swin.yml
Normal file
46
paddle_detection/configs/dino/_base_/dino_swin.yml
Normal file
@@ -0,0 +1,46 @@
|
||||
architecture: DETR
|
||||
# pretrain_weights: # rewrite in SwinTransformer.pretrained in ppdet/modeling/backbones/swin_transformer.py
|
||||
hidden_dim: 256
|
||||
use_focal_loss: True
|
||||
|
||||
DETR:
|
||||
backbone: SwinTransformer
|
||||
transformer: DINOTransformer
|
||||
detr_head: DINOHead
|
||||
post_process: DETRPostProcess
|
||||
|
||||
SwinTransformer:
|
||||
arch: 'swin_L_384' # ['swin_T_224', 'swin_S_224', 'swin_B_224', 'swin_L_224', 'swin_B_384', 'swin_L_384']
|
||||
ape: false
|
||||
drop_path_rate: 0.2
|
||||
patch_norm: true
|
||||
out_indices: [1, 2, 3]
|
||||
|
||||
DINOTransformer:
|
||||
num_queries: 900
|
||||
position_embed_type: sine
|
||||
num_levels: 4
|
||||
nhead: 8
|
||||
num_encoder_layers: 6
|
||||
num_decoder_layers: 6
|
||||
dim_feedforward: 2048
|
||||
dropout: 0.0
|
||||
activation: relu
|
||||
pe_temperature: 10000
|
||||
pe_offset: -0.5
|
||||
num_denoising: 100
|
||||
label_noise_ratio: 0.5
|
||||
box_noise_scale: 1.0
|
||||
learnt_init_query: True
|
||||
|
||||
DINOHead:
|
||||
loss:
|
||||
name: DINOLoss
|
||||
loss_coeff: {class: 1, bbox: 5, giou: 2}
|
||||
aux_loss: True
|
||||
matcher:
|
||||
name: HungarianMatcher
|
||||
matcher_coeff: {class: 2, bbox: 5, giou: 2}
|
||||
|
||||
DETRPostProcess:
|
||||
num_top_queries: 300
|
||||
16
paddle_detection/configs/dino/_base_/optimizer_1x.yml
Normal file
16
paddle_detection/configs/dino/_base_/optimizer_1x.yml
Normal file
@@ -0,0 +1,16 @@
|
||||
epoch: 12
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.0001
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [11]
|
||||
use_warmup: false
|
||||
|
||||
OptimizerBuilder:
|
||||
clip_grad_by_norm: 0.1
|
||||
regularizer: false
|
||||
optimizer:
|
||||
type: AdamW
|
||||
weight_decay: 0.0001
|
||||
16
paddle_detection/configs/dino/_base_/optimizer_2x.yml
Normal file
16
paddle_detection/configs/dino/_base_/optimizer_2x.yml
Normal file
@@ -0,0 +1,16 @@
|
||||
epoch: 24
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.0001
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [20]
|
||||
use_warmup: false
|
||||
|
||||
OptimizerBuilder:
|
||||
clip_grad_by_norm: 0.1
|
||||
regularizer: false
|
||||
optimizer:
|
||||
type: AdamW
|
||||
weight_decay: 0.0001
|
||||
16
paddle_detection/configs/dino/_base_/optimizer_3x.yml
Normal file
16
paddle_detection/configs/dino/_base_/optimizer_3x.yml
Normal file
@@ -0,0 +1,16 @@
|
||||
epoch: 36
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.0001
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [33]
|
||||
use_warmup: false
|
||||
|
||||
OptimizerBuilder:
|
||||
clip_grad_by_norm: 0.1
|
||||
regularizer: false
|
||||
optimizer:
|
||||
type: AdamW
|
||||
weight_decay: 0.0001
|
||||
11
paddle_detection/configs/dino/dino_r50_4scale_1x_coco.yml
Normal file
11
paddle_detection/configs/dino/dino_r50_4scale_1x_coco.yml
Normal file
@@ -0,0 +1,11 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_1x.yml',
|
||||
'_base_/dino_r50.yml',
|
||||
'_base_/dino_reader.yml',
|
||||
]
|
||||
|
||||
weights: output/dino_r50_4scale_1x_coco/model_final
|
||||
find_unused_parameters: True
|
||||
log_iter: 100
|
||||
11
paddle_detection/configs/dino/dino_r50_4scale_2x_coco.yml
Normal file
11
paddle_detection/configs/dino/dino_r50_4scale_2x_coco.yml
Normal file
@@ -0,0 +1,11 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_2x.yml',
|
||||
'_base_/dino_r50.yml',
|
||||
'_base_/dino_reader.yml',
|
||||
]
|
||||
|
||||
weights: output/dino_r50_4scale_2x_coco/model_final
|
||||
find_unused_parameters: True
|
||||
log_iter: 100
|
||||
176
paddle_detection/configs/face_detection/README.md
Normal file
176
paddle_detection/configs/face_detection/README.md
Normal file
@@ -0,0 +1,176 @@
|
||||
# 人脸检测模型
|
||||
|
||||
## 简介
|
||||
`face_detection`中提供高效、高速的人脸检测解决方案,包括最先进的模型和经典模型。
|
||||
|
||||

|
||||
|
||||
## 模型库
|
||||
|
||||
#### WIDER-FACE数据集上的mAP
|
||||
|
||||
| 网络结构 | 输入尺寸 | 图片个数/GPU | 学习率策略 | Easy/Medium/Hard Set | 预测时延(SD855)| 模型大小(MB) | 下载 | 配置文件 |
|
||||
|:------------:|:--------:|:----:|:-------:|:-------:|:---------:|:----------:|:---------:|:--------:|
|
||||
| BlazeFace | 640 | 8 | 1000e | 0.885 / 0.855 / 0.731 | - | 0.472 |[下载链接](https://paddledet.bj.bcebos.com/models/blazeface_1000e.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/face_detection/blazeface_1000e.yml) |
|
||||
| BlazeFace-FPN-SSH | 640 | 8 | 1000e | 0.907 / 0.883 / 0.793 | - | 0.479 |[下载链接](https://paddledet.bj.bcebos.com/models/blazeface_fpn_ssh_1000e.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/face_detection/blazeface_fpn_ssh_1000e.yml) |
|
||||
|
||||
**注意:**
|
||||
- 我们使用多尺度评估策略得到`Easy/Medium/Hard Set`里的mAP。具体细节请参考[在WIDER-FACE数据集上评估](#在WIDER-FACE数据集上评估)。
|
||||
|
||||
## 快速开始
|
||||
|
||||
### 数据准备
|
||||
我们使用[WIDER-FACE数据集](http://shuoyang1213.me/WIDERFACE/)进行训练和模型测试,官方网站提供了详细的数据介绍。
|
||||
- WIDER-Face数据源:
|
||||
使用如下目录结构加载`wider_face`类型的数据集:
|
||||
|
||||
```
|
||||
dataset/wider_face/
|
||||
├── wider_face_split
|
||||
│ ├── wider_face_train_bbx_gt.txt
|
||||
│ ├── wider_face_val_bbx_gt.txt
|
||||
├── WIDER_train
|
||||
│ ├── images
|
||||
│ │ ├── 0--Parade
|
||||
│ │ │ ├── 0_Parade_marchingband_1_100.jpg
|
||||
│ │ │ ├── 0_Parade_marchingband_1_381.jpg
|
||||
│ │ │ │ ...
|
||||
│ │ ├── 10--People_Marching
|
||||
│ │ │ ...
|
||||
├── WIDER_val
|
||||
│ ├── images
|
||||
│ │ ├── 0--Parade
|
||||
│ │ │ ├── 0_Parade_marchingband_1_1004.jpg
|
||||
│ │ │ ├── 0_Parade_marchingband_1_1045.jpg
|
||||
│ │ │ │ ...
|
||||
│ │ ├── 10--People_Marching
|
||||
│ │ │ ...
|
||||
```
|
||||
|
||||
- 手动下载数据集:
|
||||
要下载WIDER-FACE数据集,请运行以下命令:
|
||||
```
|
||||
cd dataset/wider_face && ./download_wider_face.sh
|
||||
```
|
||||
|
||||
### 参数配置
|
||||
基础模型的配置可以参考`configs/face_detection/_base_/blazeface.yml`;
|
||||
改进模型增加FPN和SSH的neck结构,配置文件可以参考`configs/face_detection/_base_/blazeface_fpn.yml`,可以根据需求配置FPN和SSH,具体如下:
|
||||
```yaml
|
||||
BlazeNet:
|
||||
blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
|
||||
double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
|
||||
[96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
|
||||
act: hard_swish #配置backbone中BlazeBlock的激活函数,基础模型为relu,增加FPN和SSH时需使用hard_swish
|
||||
|
||||
BlazeNeck:
|
||||
neck_type : fpn_ssh #可选only_fpn、only_ssh和fpn_ssh
|
||||
in_channel: [96,96]
|
||||
```
|
||||
|
||||
|
||||
|
||||
### 训练与评估
|
||||
训练流程与评估流程方法与其他算法一致,请参考[GETTING_STARTED_cn.md](../../docs/tutorials/GETTING_STARTED_cn.md)。
|
||||
**注意:** 人脸检测模型目前不支持边训练边评估。
|
||||
|
||||
#### 在WIDER-FACE数据集上评估
|
||||
- 步骤一:评估并生成结果文件:
|
||||
```shell
|
||||
python -u tools/eval.py -c configs/face_detection/blazeface_1000e.yml \
|
||||
-o weights=output/blazeface_1000e/model_final \
|
||||
multi_scale=True
|
||||
```
|
||||
设置`multi_scale=True`进行多尺度评估,评估完成后,将在`output/pred`中生成txt格式的测试结果。
|
||||
|
||||
- 步骤二:下载官方评估脚本和Ground Truth文件:
|
||||
```
|
||||
wget http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/eval_script/eval_tools.zip
|
||||
unzip eval_tools.zip && rm -f eval_tools.zip
|
||||
```
|
||||
|
||||
- 步骤三:开始评估
|
||||
|
||||
方法一:python评估:
|
||||
```
|
||||
git clone https://github.com/wondervictor/WiderFace-Evaluation.git
|
||||
cd WiderFace-Evaluation
|
||||
# 编译
|
||||
python3 setup.py build_ext --inplace
|
||||
# 开始评估
|
||||
python3 evaluation.py -p /path/to/PaddleDetection/output/pred -g /path/to/eval_tools/ground_truth
|
||||
```
|
||||
|
||||
方法二:MatLab评估:
|
||||
```
|
||||
# 在`eval_tools/wider_eval.m`中修改保存结果路径和绘制曲线的名称:
|
||||
pred_dir = './pred';
|
||||
legend_name = 'Paddle-BlazeFace';
|
||||
|
||||
`wider_eval.m` 是评估模块的主要执行程序。运行命令如下:
|
||||
matlab -nodesktop -nosplash -nojvm -r "run wider_eval.m;quit;"
|
||||
```
|
||||
|
||||
### Python脚本预测
|
||||
为了支持二次开发,这里提供通过Python脚本使用Paddle Detection whl包来进行预测的示例。
|
||||
```python
|
||||
import cv2
|
||||
import paddle
|
||||
import numpy as np
|
||||
from ppdet.core.workspace import load_config
|
||||
from ppdet.engine import Trainer
|
||||
from ppdet.metrics import get_infer_results
|
||||
from ppdet.data.transform.operators import NormalizeImage, Permute
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 准备基础的参数
|
||||
config_path = 'PaddleDetection/configs/face_detection/blazeface_1000e.yml'
|
||||
cfg = load_config(config_path)
|
||||
weight_path = 'PaddleDetection/output/blazeface_1000e.pdparams'
|
||||
infer_img_path = 'PaddleDetection/demo/hrnet_demo.jpg'
|
||||
cfg.weights = weight_path
|
||||
bbox_thre = 0.8
|
||||
paddle.set_device('gpu')
|
||||
# 创建所需的类
|
||||
trainer = Trainer(cfg, mode='test')
|
||||
trainer.load_weights(cfg.weights)
|
||||
trainer.model.eval()
|
||||
normaler = NormalizeImage(mean=[123, 117, 104], std=[127.502231, 127.502231, 127.502231], is_scale=False)
|
||||
permuter = Permute()
|
||||
# 进行图片读取
|
||||
im = cv2.imread(infer_img_path)
|
||||
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
|
||||
# 准备数据字典
|
||||
data_dict = {'image': im}
|
||||
data_dict = normaler(data_dict)
|
||||
data_dict = permuter(data_dict)
|
||||
h, w, c = im.shape
|
||||
data_dict['im_id'] = paddle.Tensor(np.array([[0]]))
|
||||
data_dict['im_shape'] = paddle.Tensor(np.array([[h, w]], dtype=np.float32))
|
||||
data_dict['scale_factor'] = paddle.Tensor(np.array([[1., 1.]], dtype=np.float32))
|
||||
data_dict['image'] = paddle.Tensor(data_dict['image'].reshape((1, c, h, w)))
|
||||
data_dict['curr_iter'] = paddle.Tensor(np.array([0]))
|
||||
# 进行预测
|
||||
outs = trainer.model(data_dict)
|
||||
# 对预测的数据进行后处理得到最终的bbox信息
|
||||
for key in ['im_shape', 'scale_factor', 'im_id']:
|
||||
outs[key] = data_dict[key]
|
||||
for key, value in outs.items():
|
||||
outs[key] = value.numpy()
|
||||
clsid2catid, catid2name = {0: 'face'}, {0: 0}
|
||||
batch_res = get_infer_results(outs, clsid2catid)
|
||||
bbox = [sub_dict for sub_dict in batch_res['bbox'] if sub_dict['score'] > bbox_thre]
|
||||
print(bbox)
|
||||
```
|
||||
|
||||
## Citations
|
||||
|
||||
```
|
||||
@article{bazarevsky2019blazeface,
|
||||
title={BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs},
|
||||
author={Valentin Bazarevsky and Yury Kartynnik and Andrey Vakunov and Karthik Raveendran and Matthias Grundmann},
|
||||
year={2019},
|
||||
eprint={1907.05047},
|
||||
archivePrefix={arXiv},
|
||||
```
|
||||
176
paddle_detection/configs/face_detection/README_en.md
Normal file
176
paddle_detection/configs/face_detection/README_en.md
Normal file
@@ -0,0 +1,176 @@
|
||||
# Face Detection Model
|
||||
|
||||
## Introduction
|
||||
`face_detection` High efficiency, high speed face detection solutions, including the most advanced models and classic models.
|
||||
|
||||

|
||||
|
||||
## Model Library
|
||||
|
||||
#### A mAP on the WIDERFACE dataset
|
||||
|
||||
| Network structure | size | images/GPUs | Learning rate strategy | Easy/Medium/Hard Set | Prediction delay(SD855)| Model size(MB) | Download | Configuration File |
|
||||
|:------------:|:--------:|:----:|:-------:|:-------:|:---------:|:----------:|:---------:|:--------:|
|
||||
| BlazeFace | 640 | 8 | 1000e | 0.885 / 0.855 / 0.731 | - | 0.472 |[link](https://paddledet.bj.bcebos.com/models/blazeface_1000e.pdparams) | [Configuration File](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/face_detection/blazeface_1000e.yml) |
|
||||
| BlazeFace-FPN-SSH | 640 | 8 | 1000e | 0.907 / 0.883 / 0.793 | - | 0.479 |[link](https://paddledet.bj.bcebos.com/models/blazeface_fpn_ssh_1000e.pdparams) | [Configuration File](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/face_detection/blazeface_fpn_ssh_1000e.yml) |
|
||||
|
||||
**Attention:**
|
||||
- We use a multi-scale evaluation strategy to get the mAP in `Easy/Medium/Hard Set`. Please refer to the [evaluation on the WIDER FACE dataset](#Evaluated-on-the-WIDER-FACE-Dataset) for details.
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Data preparation
|
||||
We use [WIDER-FACE dataset](http://shuoyang1213.me/WIDERFACE/) for training and model tests, the official web site provides detailed data is introduced.
|
||||
- WIDER-Face data source:
|
||||
- Load a dataset of type `wider_face` using the following directory structure:
|
||||
```
|
||||
dataset/wider_face/
|
||||
├── wider_face_split
|
||||
│ ├── wider_face_train_bbx_gt.txt
|
||||
│ ├── wider_face_val_bbx_gt.txt
|
||||
├── WIDER_train
|
||||
│ ├── images
|
||||
│ │ ├── 0--Parade
|
||||
│ │ │ ├── 0_Parade_marchingband_1_100.jpg
|
||||
│ │ │ ├── 0_Parade_marchingband_1_381.jpg
|
||||
│ │ │ │ ...
|
||||
│ │ ├── 10--People_Marching
|
||||
│ │ │ ...
|
||||
├── WIDER_val
|
||||
│ ├── images
|
||||
│ │ ├── 0--Parade
|
||||
│ │ │ ├── 0_Parade_marchingband_1_1004.jpg
|
||||
│ │ │ ├── 0_Parade_marchingband_1_1045.jpg
|
||||
│ │ │ │ ...
|
||||
│ │ ├── 10--People_Marching
|
||||
│ │ │ ...
|
||||
```
|
||||
|
||||
- Manually download the dataset:
|
||||
To download the WIDER-FACE dataset, run the following command:
|
||||
```
|
||||
cd dataset/wider_face && ./download_wider_face.sh
|
||||
```
|
||||
|
||||
### Parameter configuration
|
||||
The configuration of the base model can be referenced to `configs/face_detection/_base_/blazeface.yml`;
|
||||
Improved model to add FPN and SSH neck structure, configuration files can be referenced to `configs/face_detection/_base_/blazeface_fpn.yml`, You can configure FPN and SSH as required
|
||||
```yaml
|
||||
BlazeNet:
|
||||
blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
|
||||
double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
|
||||
[96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
|
||||
act: hard_swish #Configure Blaze Block activation function in Backbone. The basic model is Relu. hard_swish is needed to add FPN and SSH
|
||||
|
||||
BlazeNeck:
|
||||
neck_type : fpn_ssh #only_fpn, only_ssh and fpn_ssh
|
||||
in_channel: [96,96]
|
||||
```
|
||||
|
||||
|
||||
|
||||
### Training and Evaluation
|
||||
The training process and evaluation process methods are consistent with other algorithms, please refer to [GETTING_STARTED_cn.md](../../docs/tutorials/GETTING_STARTED_cn.md)。
|
||||
**Attention:** Face detection models currently do not support training and evaluation.
|
||||
|
||||
#### Evaluated on the WIDER-FACE Dataset
|
||||
- Step 1: Evaluate and generate a result file:
|
||||
```shell
|
||||
python -u tools/eval.py -c configs/face_detection/blazeface_1000e.yml \
|
||||
-o weights=output/blazeface_1000e/model_final \
|
||||
multi_scale=True
|
||||
```
|
||||
Set `multi_scale=True` for multi-scale evaluation. After evaluation, test results in TXT format will be generated in `output/pred`.
|
||||
|
||||
- Step 2: Download the official evaluation script and Ground Truth file:
|
||||
```
|
||||
wget http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/eval_script/eval_tools.zip
|
||||
unzip eval_tools.zip && rm -f eval_tools.zip
|
||||
```
|
||||
|
||||
- Step 3: Start the evaluation
|
||||
|
||||
Method 1: Python evaluation:
|
||||
```
|
||||
git clone https://github.com/wondervictor/WiderFace-Evaluation.git
|
||||
cd WiderFace-Evaluation
|
||||
# compile
|
||||
python3 setup.py build_ext --inplace
|
||||
# Begin to assess
|
||||
python3 evaluation.py -p /path/to/PaddleDetection/output/pred -g /path/to/eval_tools/ground_truth
|
||||
```
|
||||
|
||||
Method 2: MatLab evaluation:
|
||||
```
|
||||
# Change the name of save result path and draw curve in `eval_tools/wider_eval.m`:
|
||||
pred_dir = './pred';
|
||||
legend_name = 'Paddle-BlazeFace';
|
||||
|
||||
`wider_eval.m` is the main implementation of the evaluation module. Run the following command:
|
||||
matlab -nodesktop -nosplash -nojvm -r "run wider_eval.m;quit;"
|
||||
```
|
||||
|
||||
### Use by Python Code
|
||||
In order to support development, here is an example of using the Paddle Detection whl package to make predictions through Python code.
|
||||
```python
|
||||
import cv2
|
||||
import paddle
|
||||
import numpy as np
|
||||
from ppdet.core.workspace import load_config
|
||||
from ppdet.engine import Trainer
|
||||
from ppdet.metrics import get_infer_results
|
||||
from ppdet.data.transform.operators import NormalizeImage, Permute
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# prepare for the parameters
|
||||
config_path = 'PaddleDetection/configs/face_detection/blazeface_1000e.yml'
|
||||
cfg = load_config(config_path)
|
||||
weight_path = 'PaddleDetection/output/blazeface_1000e.pdparams'
|
||||
infer_img_path = 'PaddleDetection/demo/hrnet_demo.jpg'
|
||||
cfg.weights = weight_path
|
||||
bbox_thre = 0.8
|
||||
paddle.set_device('gpu')
|
||||
# create the class object
|
||||
trainer = Trainer(cfg, mode='test')
|
||||
trainer.load_weights(cfg.weights)
|
||||
trainer.model.eval()
|
||||
normaler = NormalizeImage(mean=[123, 117, 104], std=[127.502231, 127.502231, 127.502231], is_scale=False)
|
||||
permuter = Permute()
|
||||
# read the image file
|
||||
im = cv2.imread(infer_img_path)
|
||||
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
|
||||
# prepare for the data dict
|
||||
data_dict = {'image': im}
|
||||
data_dict = normaler(data_dict)
|
||||
data_dict = permuter(data_dict)
|
||||
h, w, c = im.shape
|
||||
data_dict['im_id'] = paddle.Tensor(np.array([[0]]))
|
||||
data_dict['im_shape'] = paddle.Tensor(np.array([[h, w]], dtype=np.float32))
|
||||
data_dict['scale_factor'] = paddle.Tensor(np.array([[1., 1.]], dtype=np.float32))
|
||||
data_dict['image'] = paddle.Tensor(data_dict['image'].reshape((1, c, h, w)))
|
||||
data_dict['curr_iter'] = paddle.Tensor(np.array([0]))
|
||||
# do the prediction
|
||||
outs = trainer.model(data_dict)
|
||||
# to do the postprocess to get the final bbox info
|
||||
for key in ['im_shape', 'scale_factor', 'im_id']:
|
||||
outs[key] = data_dict[key]
|
||||
for key, value in outs.items():
|
||||
outs[key] = value.numpy()
|
||||
clsid2catid, catid2name = {0: 'face'}, {0: 0}
|
||||
batch_res = get_infer_results(outs, clsid2catid)
|
||||
bbox = [sub_dict for sub_dict in batch_res['bbox'] if sub_dict['score'] > bbox_thre]
|
||||
print(bbox)
|
||||
```
|
||||
|
||||
|
||||
## Citations
|
||||
|
||||
```
|
||||
@article{bazarevsky2019blazeface,
|
||||
title={BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs},
|
||||
author={Valentin Bazarevsky and Yury Kartynnik and Andrey Vakunov and Karthik Raveendran and Matthias Grundmann},
|
||||
year={2019},
|
||||
eprint={1907.05047},
|
||||
archivePrefix={arXiv},
|
||||
```
|
||||
45
paddle_detection/configs/face_detection/_base_/blazeface.yml
Normal file
45
paddle_detection/configs/face_detection/_base_/blazeface.yml
Normal file
@@ -0,0 +1,45 @@
|
||||
architecture: BlazeFace
|
||||
|
||||
BlazeFace:
|
||||
backbone: BlazeNet
|
||||
neck: BlazeNeck
|
||||
blaze_head: FaceHead
|
||||
post_process: BBoxPostProcess
|
||||
|
||||
BlazeNet:
|
||||
blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
|
||||
double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
|
||||
[96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
|
||||
act: relu
|
||||
|
||||
BlazeNeck:
|
||||
neck_type : None
|
||||
in_channel: [96,96]
|
||||
|
||||
FaceHead:
|
||||
in_channels: [96,96]
|
||||
anchor_generator: AnchorGeneratorSSD
|
||||
loss: SSDLoss
|
||||
|
||||
SSDLoss:
|
||||
overlap_threshold: 0.35
|
||||
|
||||
AnchorGeneratorSSD:
|
||||
steps: [8., 16.]
|
||||
aspect_ratios: [[1.], [1.]]
|
||||
min_sizes: [[16.,24.], [32., 48., 64., 80., 96., 128.]]
|
||||
max_sizes: [[], []]
|
||||
offset: 0.5
|
||||
flip: False
|
||||
min_max_aspect_ratios_order: false
|
||||
|
||||
BBoxPostProcess:
|
||||
decode:
|
||||
name: SSDBox
|
||||
nms:
|
||||
name: MultiClassNMS
|
||||
keep_top_k: 750
|
||||
score_threshold: 0.01
|
||||
nms_threshold: 0.3
|
||||
nms_top_k: 5000
|
||||
nms_eta: 1.0
|
||||
@@ -0,0 +1,45 @@
|
||||
architecture: BlazeFace
|
||||
|
||||
BlazeFace:
|
||||
backbone: BlazeNet
|
||||
neck: BlazeNeck
|
||||
blaze_head: FaceHead
|
||||
post_process: BBoxPostProcess
|
||||
|
||||
BlazeNet:
|
||||
blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
|
||||
double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
|
||||
[96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
|
||||
act: hard_swish
|
||||
|
||||
BlazeNeck:
|
||||
neck_type : fpn_ssh
|
||||
in_channel: [96,96]
|
||||
|
||||
FaceHead:
|
||||
in_channels: [48, 48]
|
||||
anchor_generator: AnchorGeneratorSSD
|
||||
loss: SSDLoss
|
||||
|
||||
SSDLoss:
|
||||
overlap_threshold: 0.35
|
||||
|
||||
AnchorGeneratorSSD:
|
||||
steps: [8., 16.]
|
||||
aspect_ratios: [[1.], [1.]]
|
||||
min_sizes: [[16.,24.], [32., 48., 64., 80., 96., 128.]]
|
||||
max_sizes: [[], []]
|
||||
offset: 0.5
|
||||
flip: False
|
||||
min_max_aspect_ratios_order: false
|
||||
|
||||
BBoxPostProcess:
|
||||
decode:
|
||||
name: SSDBox
|
||||
nms:
|
||||
name: MultiClassNMS
|
||||
keep_top_k: 750
|
||||
score_threshold: 0.01
|
||||
nms_threshold: 0.3
|
||||
nms_top_k: 5000
|
||||
nms_eta: 1.0
|
||||
@@ -0,0 +1,44 @@
|
||||
worker_num: 2
|
||||
TrainReader:
|
||||
inputs_def:
|
||||
num_max_boxes: 90
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
|
||||
- RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
|
||||
- RandomFlip: {}
|
||||
- CropWithDataAchorSampling: {
|
||||
anchor_sampler: [[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]],
|
||||
batch_sampler: [
|
||||
[1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
|
||||
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
|
||||
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
|
||||
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
|
||||
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
|
||||
],
|
||||
target_size: 640}
|
||||
- Resize: {target_size: [640, 640], keep_ratio: False, interp: 1}
|
||||
- NormalizeBox: {}
|
||||
- PadBox: {num_max_boxes: 90}
|
||||
batch_transforms:
|
||||
- NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
|
||||
- Permute: {}
|
||||
batch_size: 8
|
||||
shuffle: true
|
||||
drop_last: true
|
||||
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
@@ -0,0 +1,21 @@
|
||||
epoch: 1000
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.001
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones:
|
||||
- 333
|
||||
- 800
|
||||
- !LinearWarmup
|
||||
start_factor: 0.3333333333333333
|
||||
steps: 500
|
||||
|
||||
OptimizerBuilder:
|
||||
optimizer:
|
||||
momentum: 0.0
|
||||
type: RMSProp
|
||||
regularizer:
|
||||
factor: 0.0005
|
||||
type: L2
|
||||
@@ -0,0 +1,9 @@
|
||||
_BASE_: [
|
||||
'../datasets/wider_face.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_1000e.yml',
|
||||
'_base_/blazeface.yml',
|
||||
'_base_/face_reader.yml',
|
||||
]
|
||||
weights: output/blazeface_1000e/model_final
|
||||
multi_scale_eval: True
|
||||
@@ -0,0 +1,9 @@
|
||||
_BASE_: [
|
||||
'../datasets/wider_face.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_1000e.yml',
|
||||
'_base_/blazeface_fpn.yml',
|
||||
'_base_/face_reader.yml',
|
||||
]
|
||||
weights: output/blazeface_fpn_ssh_1000e/model_final
|
||||
multi_scale_eval: True
|
||||
38
paddle_detection/configs/faster_rcnn/README.md
Normal file
38
paddle_detection/configs/faster_rcnn/README.md
Normal file
@@ -0,0 +1,38 @@
|
||||
# Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks
|
||||
|
||||
## Model Zoo
|
||||
|
||||
| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | 下载 | 配置文件 |
|
||||
| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
|
||||
| ResNet50 | Faster | 1 | 1x | ---- | 36.7 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_1x_coco.yml) |
|
||||
| ResNet50-vd | Faster | 1 | 1x | ---- | 37.6 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_1x_coco.yml) |
|
||||
| ResNet101 | Faster | 1 | 1x | ---- | 39.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_1x_coco.pdparams) | [配置文件](./faster_rcnn_r101_1x_coco.yml) |
|
||||
| ResNet34-FPN | Faster | 1 | 1x | ---- | 37.8 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r34_fpn_1x_coco.yml) |
|
||||
| ResNet34-FPN-MultiScaleTest | Faster | 1 | 1x | ---- | 38.2 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_fpn_multiscaletest_1x_coco.pdparams) | [配置文件](./faster_rcnn_r34_fpn_multiscaletest_1x_coco.yml) |
|
||||
| ResNet34-vd-FPN | Faster | 1 | 1x | ---- | 38.5 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_vd_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r34_vd_fpn_1x_coco.yml) |
|
||||
| ResNet50-FPN | Faster | 1 | 1x | ---- | 38.4 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_fpn_1x_coco.yml) |
|
||||
| ResNet50-FPN | Faster | 1 | 2x | ---- | 40.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_r50_fpn_2x_coco.yml) |
|
||||
| ResNet50-vd-FPN | Faster | 1 | 1x | ---- | 39.5 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_1x_coco.yml) |
|
||||
| ResNet50-vd-FPN | Faster | 1 | 2x | ---- | 40.8 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_2x_coco.yml) |
|
||||
| ResNet101-FPN | Faster | 1 | 2x | ---- | 41.4 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_r101_fpn_2x_coco.yml) |
|
||||
| ResNet101-vd-FPN | Faster | 1 | 1x | ---- | 42.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r101_vd_fpn_1x_coco.yml) |
|
||||
| ResNet101-vd-FPN | Faster | 1 | 2x | ---- | 43.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_vd_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_r101_vd_fpn_2x_coco.yml) |
|
||||
| ResNeXt101-vd-FPN | Faster | 1 | 1x | ---- | 43.4 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_x101_vd_64x4d_fpn_1x_coco.yml) |
|
||||
| ResNeXt101-vd-FPN | Faster | 1 | 2x | ---- | 44.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_x101_vd_64x4d_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_x101_vd_64x4d_fpn_2x_coco.yml) |
|
||||
| ResNet50-vd-SSLDv2-FPN | Faster | 1 | 1x | ---- | 41.4 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
|
||||
| ResNet50-vd-SSLDv2-FPN | Faster | 1 | 2x | ---- | 42.3 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
|
||||
| Swin-Tiny-FPN | Faster | 2 | 1x | ---- | 42.6 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_swin_tiny_fpn_1x_coco.yml) |
|
||||
| Swin-Tiny-FPN | Faster | 2 | 2x | ---- | 44.8 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_swin_tiny_fpn_2x_coco.yml) |
|
||||
| Swin-Tiny-FPN | Faster | 2 | 3x | ---- | 45.3 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_3x_coco.pdparams) | [配置文件](../swin/faster_rcnn_swin_tiny_fpn_3x_coco.yml) |
|
||||
|
||||
## Citations
|
||||
```
|
||||
@article{Ren_2017,
|
||||
title={Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks},
|
||||
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
|
||||
publisher={Institute of Electrical and Electronics Engineers (IEEE)},
|
||||
author={Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun, Jian},
|
||||
year={2017},
|
||||
month={Jun},
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,40 @@
|
||||
worker_num: 2
|
||||
TrainReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
|
||||
- RandomFlip: {prob: 0.5}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 1
|
||||
shuffle: true
|
||||
drop_last: true
|
||||
collate_batch: false
|
||||
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
@@ -0,0 +1,66 @@
|
||||
architecture: FasterRCNN
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
|
||||
|
||||
FasterRCNN:
|
||||
backbone: ResNet
|
||||
rpn_head: RPNHead
|
||||
bbox_head: BBoxHead
|
||||
# post process
|
||||
bbox_post_process: BBoxPostProcess
|
||||
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [2]
|
||||
num_stages: 3
|
||||
|
||||
RPNHead:
|
||||
anchor_generator:
|
||||
aspect_ratios: [0.5, 1.0, 2.0]
|
||||
anchor_sizes: [32, 64, 128, 256, 512]
|
||||
strides: [16]
|
||||
rpn_target_assign:
|
||||
batch_size_per_im: 256
|
||||
fg_fraction: 0.5
|
||||
negative_overlap: 0.3
|
||||
positive_overlap: 0.7
|
||||
use_random: True
|
||||
train_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 12000
|
||||
post_nms_top_n: 2000
|
||||
topk_after_collect: False
|
||||
test_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 6000
|
||||
post_nms_top_n: 1000
|
||||
|
||||
|
||||
BBoxHead:
|
||||
head: Res5Head
|
||||
roi_extractor:
|
||||
resolution: 14
|
||||
sampling_ratio: 0
|
||||
aligned: True
|
||||
bbox_assigner: BBoxAssigner
|
||||
with_pool: true
|
||||
|
||||
BBoxAssigner:
|
||||
batch_size_per_im: 512
|
||||
bg_thresh: 0.5
|
||||
fg_thresh: 0.5
|
||||
fg_fraction: 0.25
|
||||
use_random: True
|
||||
|
||||
BBoxPostProcess:
|
||||
decode: RCNNBox
|
||||
nms:
|
||||
name: MultiClassNMS
|
||||
keep_top_k: 100
|
||||
score_threshold: 0.05
|
||||
nms_threshold: 0.5
|
||||
@@ -0,0 +1,73 @@
|
||||
architecture: FasterRCNN
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
|
||||
|
||||
FasterRCNN:
|
||||
backbone: ResNet
|
||||
neck: FPN
|
||||
rpn_head: RPNHead
|
||||
bbox_head: BBoxHead
|
||||
# post process
|
||||
bbox_post_process: BBoxPostProcess
|
||||
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
|
||||
FPN:
|
||||
out_channel: 256
|
||||
|
||||
RPNHead:
|
||||
anchor_generator:
|
||||
aspect_ratios: [0.5, 1.0, 2.0]
|
||||
anchor_sizes: [[32], [64], [128], [256], [512]]
|
||||
strides: [4, 8, 16, 32, 64]
|
||||
rpn_target_assign:
|
||||
batch_size_per_im: 256
|
||||
fg_fraction: 0.5
|
||||
negative_overlap: 0.3
|
||||
positive_overlap: 0.7
|
||||
use_random: True
|
||||
train_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 2000
|
||||
post_nms_top_n: 1000
|
||||
topk_after_collect: True
|
||||
test_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 1000
|
||||
post_nms_top_n: 1000
|
||||
|
||||
|
||||
BBoxHead:
|
||||
head: TwoFCHead
|
||||
roi_extractor:
|
||||
resolution: 7
|
||||
sampling_ratio: 0
|
||||
aligned: True
|
||||
bbox_assigner: BBoxAssigner
|
||||
|
||||
BBoxAssigner:
|
||||
batch_size_per_im: 512
|
||||
bg_thresh: 0.5
|
||||
fg_thresh: 0.5
|
||||
fg_fraction: 0.25
|
||||
use_random: True
|
||||
|
||||
TwoFCHead:
|
||||
out_channel: 1024
|
||||
|
||||
|
||||
BBoxPostProcess:
|
||||
decode: RCNNBox
|
||||
nms:
|
||||
name: MultiClassNMS
|
||||
keep_top_k: 100
|
||||
score_threshold: 0.05
|
||||
nms_threshold: 0.5
|
||||
@@ -0,0 +1,41 @@
|
||||
worker_num: 2
|
||||
TrainReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- RandomResizeCrop: {resizes: [400, 500, 600], cropsizes: [[384, 600], ], prob: 0.5}
|
||||
- RandomResize: {target_size: [[480, 1333], [512, 1333], [544, 1333], [576, 1333], [608, 1333], [640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: True, interp: 2}
|
||||
- RandomFlip: {prob: 0.5}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 2
|
||||
shuffle: true
|
||||
drop_last: true
|
||||
collate_batch: false
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
|
||||
|
||||
TestReader:
|
||||
inputs_def:
|
||||
image_shape: [-1, 3, 640, 640]
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: 640, keep_ratio: True}
|
||||
- Pad: {size: 640}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
@@ -0,0 +1,70 @@
|
||||
architecture: FasterRCNN
|
||||
# pretrain_weights: # rewrite in SwinTransformer.pretrained in ppdet/modeling/backbones/swin_transformer.py
|
||||
|
||||
FasterRCNN:
|
||||
backbone: SwinTransformer
|
||||
neck: FPN
|
||||
rpn_head: RPNHead
|
||||
bbox_head: BBoxHead
|
||||
bbox_post_process: BBoxPostProcess
|
||||
|
||||
SwinTransformer:
|
||||
arch: 'swin_T_224'
|
||||
ape: false
|
||||
drop_path_rate: 0.1
|
||||
patch_norm: true
|
||||
out_indices: [0, 1, 2, 3]
|
||||
pretrained: https://paddledet.bj.bcebos.com/models/pretrained/swin_tiny_patch4_window7_224_22kto1k_pretrained.pdparams
|
||||
|
||||
FPN:
|
||||
out_channel: 256
|
||||
|
||||
RPNHead:
|
||||
anchor_generator:
|
||||
aspect_ratios: [0.5, 1.0, 2.0]
|
||||
anchor_sizes: [[32], [64], [128], [256], [512]]
|
||||
strides: [4, 8, 16, 32, 64]
|
||||
rpn_target_assign:
|
||||
batch_size_per_im: 256
|
||||
fg_fraction: 0.5
|
||||
negative_overlap: 0.3
|
||||
positive_overlap: 0.7
|
||||
use_random: True
|
||||
train_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 2000
|
||||
post_nms_top_n: 1000
|
||||
topk_after_collect: True
|
||||
test_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 1000
|
||||
post_nms_top_n: 1000
|
||||
|
||||
|
||||
BBoxHead:
|
||||
head: TwoFCHead
|
||||
roi_extractor:
|
||||
resolution: 7
|
||||
sampling_ratio: 0
|
||||
aligned: True
|
||||
bbox_assigner: BBoxAssigner
|
||||
|
||||
BBoxAssigner:
|
||||
batch_size_per_im: 512
|
||||
bg_thresh: 0.5
|
||||
fg_thresh: 0.5
|
||||
fg_fraction: 0.25
|
||||
use_random: True
|
||||
|
||||
TwoFCHead:
|
||||
out_channel: 1024
|
||||
|
||||
BBoxPostProcess:
|
||||
decode: RCNNBox
|
||||
nms:
|
||||
name: MultiClassNMS
|
||||
keep_top_k: 100
|
||||
score_threshold: 0.05
|
||||
nms_threshold: 0.5
|
||||
@@ -0,0 +1,40 @@
|
||||
worker_num: 2
|
||||
TrainReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
|
||||
- RandomFlip: {prob: 0.5}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: -1}
|
||||
batch_size: 1
|
||||
shuffle: true
|
||||
drop_last: true
|
||||
collate_batch: false
|
||||
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: -1}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: -1}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
19
paddle_detection/configs/faster_rcnn/_base_/optimizer_1x.yml
Normal file
19
paddle_detection/configs/faster_rcnn/_base_/optimizer_1x.yml
Normal file
@@ -0,0 +1,19 @@
|
||||
epoch: 12
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.01
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [8, 11]
|
||||
- !LinearWarmup
|
||||
start_factor: 0.1
|
||||
steps: 1000
|
||||
|
||||
OptimizerBuilder:
|
||||
optimizer:
|
||||
momentum: 0.9
|
||||
type: Momentum
|
||||
regularizer:
|
||||
factor: 0.0001
|
||||
type: L2
|
||||
@@ -0,0 +1,20 @@
|
||||
epoch: 12
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.0001
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [8, 11]
|
||||
- !LinearWarmup
|
||||
start_factor: 0.1
|
||||
steps: 1000
|
||||
|
||||
OptimizerBuilder:
|
||||
clip_grad_by_norm: 1.0
|
||||
optimizer:
|
||||
type: AdamW
|
||||
weight_decay: 0.05
|
||||
param_groups:
|
||||
- params: ['absolute_pos_embed', 'relative_position_bias_table', 'norm']
|
||||
weight_decay: 0.0
|
||||
@@ -0,0 +1,14 @@
|
||||
_BASE_: [
|
||||
'faster_rcnn_r50_1x_coco.yml',
|
||||
]
|
||||
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
|
||||
weights: output/faster_rcnn_r101_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 101
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [2]
|
||||
num_stages: 3
|
||||
@@ -0,0 +1,14 @@
|
||||
_BASE_: [
|
||||
'faster_rcnn_r50_fpn_1x_coco.yml',
|
||||
]
|
||||
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
|
||||
weights: output/faster_rcnn_r101_fpn_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 101
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
@@ -0,0 +1,25 @@
|
||||
_BASE_: [
|
||||
'faster_rcnn_r50_fpn_1x_coco.yml',
|
||||
]
|
||||
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
|
||||
weights: output/faster_rcnn_r101_fpn_2x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 101
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
|
||||
epoch: 24
|
||||
LearningRate:
|
||||
base_lr: 0.01
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [16, 22]
|
||||
- !LinearWarmup
|
||||
start_factor: 0.1
|
||||
steps: 1000
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user