移动paddle_detection
This commit is contained in:
@@ -0,0 +1,28 @@
|
||||
# Cascade R-CNN: High Quality Object Detection and Instance Segmentation
|
||||
|
||||
## Model Zoo
|
||||
|
||||
| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | Mask AP | 下载 | 配置文件 |
|
||||
| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----: | :-----------------------------------------------------: | :-----: |
|
||||
| ResNet50-FPN | Cascade Faster | 1 | 1x | ---- | 41.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.yml) |
|
||||
| ResNet50-FPN | Cascade Mask | 1 | 1x | ---- | 41.8 | 36.3 | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.yml) |
|
||||
| ResNet50-vd-SSLDv2-FPN | Cascade Faster | 1 | 1x | ---- | 44.4 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
|
||||
| ResNet50-vd-SSLDv2-FPN | Cascade Faster | 1 | 2x | ---- | 45.0 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
|
||||
| ResNet50-vd-SSLDv2-FPN | Cascade Mask | 1 | 1x | ---- | 44.9 | 39.1 | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
|
||||
| ResNet50-vd-SSLDv2-FPN | Cascade Mask | 1 | 2x | ---- | 45.7 | 39.7 | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
|
||||
|
||||
|
||||
## Citations
|
||||
```
|
||||
@article{Cai_2019,
|
||||
title={Cascade R-CNN: High Quality Object Detection and Instance Segmentation},
|
||||
ISSN={1939-3539},
|
||||
url={http://dx.doi.org/10.1109/tpami.2019.2956516},
|
||||
DOI={10.1109/tpami.2019.2956516},
|
||||
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
|
||||
publisher={Institute of Electrical and Electronics Engineers (IEEE)},
|
||||
author={Cai, Zhaowei and Vasconcelos, Nuno},
|
||||
year={2019},
|
||||
pages={1–1}
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,40 @@
|
||||
worker_num: 2
|
||||
TrainReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
|
||||
- RandomFlip: {prob: 0.5}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 1
|
||||
shuffle: true
|
||||
drop_last: true
|
||||
collate_batch: false
|
||||
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
@@ -0,0 +1,40 @@
|
||||
worker_num: 2
|
||||
TrainReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
|
||||
- RandomFlip: {prob: 0.5}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 1
|
||||
shuffle: true
|
||||
drop_last: true
|
||||
collate_batch: false
|
||||
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
@@ -0,0 +1,97 @@
|
||||
architecture: CascadeRCNN
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
|
||||
|
||||
|
||||
CascadeRCNN:
|
||||
backbone: ResNet
|
||||
neck: FPN
|
||||
rpn_head: RPNHead
|
||||
bbox_head: CascadeHead
|
||||
mask_head: MaskHead
|
||||
# post process
|
||||
bbox_post_process: BBoxPostProcess
|
||||
mask_post_process: MaskPostProcess
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
|
||||
FPN:
|
||||
out_channel: 256
|
||||
|
||||
RPNHead:
|
||||
anchor_generator:
|
||||
aspect_ratios: [0.5, 1.0, 2.0]
|
||||
anchor_sizes: [[32], [64], [128], [256], [512]]
|
||||
strides: [4, 8, 16, 32, 64]
|
||||
rpn_target_assign:
|
||||
batch_size_per_im: 256
|
||||
fg_fraction: 0.5
|
||||
negative_overlap: 0.3
|
||||
positive_overlap: 0.7
|
||||
use_random: True
|
||||
train_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 2000
|
||||
post_nms_top_n: 2000
|
||||
topk_after_collect: True
|
||||
test_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 1000
|
||||
post_nms_top_n: 1000
|
||||
|
||||
|
||||
CascadeHead:
|
||||
head: CascadeTwoFCHead
|
||||
roi_extractor:
|
||||
resolution: 7
|
||||
sampling_ratio: 0
|
||||
aligned: True
|
||||
bbox_assigner: BBoxAssigner
|
||||
|
||||
BBoxAssigner:
|
||||
batch_size_per_im: 512
|
||||
bg_thresh: 0.5
|
||||
fg_thresh: 0.5
|
||||
fg_fraction: 0.25
|
||||
cascade_iou: [0.5, 0.6, 0.7]
|
||||
use_random: True
|
||||
|
||||
CascadeTwoFCHead:
|
||||
out_channel: 1024
|
||||
|
||||
BBoxPostProcess:
|
||||
decode:
|
||||
name: RCNNBox
|
||||
prior_box_var: [30.0, 30.0, 15.0, 15.0]
|
||||
nms:
|
||||
name: MultiClassNMS
|
||||
keep_top_k: 100
|
||||
score_threshold: 0.05
|
||||
nms_threshold: 0.5
|
||||
|
||||
|
||||
MaskHead:
|
||||
head: MaskFeat
|
||||
roi_extractor:
|
||||
resolution: 14
|
||||
sampling_ratio: 0
|
||||
aligned: True
|
||||
mask_assigner: MaskAssigner
|
||||
share_bbox_feat: False
|
||||
|
||||
MaskFeat:
|
||||
num_convs: 4
|
||||
out_channel: 256
|
||||
|
||||
MaskAssigner:
|
||||
mask_resolution: 28
|
||||
|
||||
MaskPostProcess:
|
||||
binary_thresh: 0.5
|
||||
@@ -0,0 +1,75 @@
|
||||
architecture: CascadeRCNN
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
|
||||
|
||||
|
||||
CascadeRCNN:
|
||||
backbone: ResNet
|
||||
neck: FPN
|
||||
rpn_head: RPNHead
|
||||
bbox_head: CascadeHead
|
||||
# post process
|
||||
bbox_post_process: BBoxPostProcess
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
|
||||
FPN:
|
||||
out_channel: 256
|
||||
|
||||
RPNHead:
|
||||
anchor_generator:
|
||||
aspect_ratios: [0.5, 1.0, 2.0]
|
||||
anchor_sizes: [[32], [64], [128], [256], [512]]
|
||||
strides: [4, 8, 16, 32, 64]
|
||||
rpn_target_assign:
|
||||
batch_size_per_im: 256
|
||||
fg_fraction: 0.5
|
||||
negative_overlap: 0.3
|
||||
positive_overlap: 0.7
|
||||
use_random: True
|
||||
train_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 2000
|
||||
post_nms_top_n: 2000
|
||||
topk_after_collect: True
|
||||
test_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 1000
|
||||
post_nms_top_n: 1000
|
||||
|
||||
|
||||
CascadeHead:
|
||||
head: CascadeTwoFCHead
|
||||
roi_extractor:
|
||||
resolution: 7
|
||||
sampling_ratio: 0
|
||||
aligned: True
|
||||
bbox_assigner: BBoxAssigner
|
||||
|
||||
BBoxAssigner:
|
||||
batch_size_per_im: 512
|
||||
bg_thresh: 0.5
|
||||
fg_thresh: 0.5
|
||||
fg_fraction: 0.25
|
||||
cascade_iou: [0.5, 0.6, 0.7]
|
||||
use_random: True
|
||||
|
||||
CascadeTwoFCHead:
|
||||
out_channel: 1024
|
||||
|
||||
BBoxPostProcess:
|
||||
decode:
|
||||
name: RCNNBox
|
||||
prior_box_var: [30.0, 30.0, 15.0, 15.0]
|
||||
nms:
|
||||
name: MultiClassNMS
|
||||
keep_top_k: 100
|
||||
score_threshold: 0.05
|
||||
nms_threshold: 0.5
|
||||
@@ -0,0 +1,19 @@
|
||||
epoch: 12
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.01
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [8, 11]
|
||||
- !LinearWarmup
|
||||
start_factor: 0.001
|
||||
steps: 1000
|
||||
|
||||
OptimizerBuilder:
|
||||
optimizer:
|
||||
momentum: 0.9
|
||||
type: Momentum
|
||||
regularizer:
|
||||
factor: 0.0001
|
||||
type: L2
|
||||
@@ -0,0 +1,8 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_instance.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_1x.yml',
|
||||
'_base_/cascade_mask_rcnn_r50_fpn.yml',
|
||||
'_base_/cascade_mask_fpn_reader.yml',
|
||||
]
|
||||
weights: output/cascade_mask_rcnn_r50_fpn_1x_coco/model_final
|
||||
@@ -0,0 +1,18 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_instance.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_1x.yml',
|
||||
'_base_/cascade_mask_rcnn_r50_fpn.yml',
|
||||
'_base_/cascade_mask_fpn_reader.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
|
||||
weights: output/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
depth: 50
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
lr_mult_list: [0.05, 0.05, 0.1, 0.15]
|
||||
@@ -0,0 +1,29 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_instance.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_1x.yml',
|
||||
'_base_/cascade_mask_rcnn_r50_fpn.yml',
|
||||
'_base_/cascade_mask_fpn_reader.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
|
||||
weights: output/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
depth: 50
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
lr_mult_list: [0.05, 0.05, 0.1, 0.15]
|
||||
|
||||
epoch: 24
|
||||
LearningRate:
|
||||
base_lr: 0.01
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [12, 22]
|
||||
- !LinearWarmup
|
||||
start_factor: 0.1
|
||||
steps: 1000
|
||||
@@ -0,0 +1,8 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_1x.yml',
|
||||
'_base_/cascade_rcnn_r50_fpn.yml',
|
||||
'_base_/cascade_fpn_reader.yml',
|
||||
]
|
||||
weights: output/cascade_rcnn_r50_fpn_1x_coco/model_final
|
||||
@@ -0,0 +1,18 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_1x.yml',
|
||||
'_base_/cascade_rcnn_r50_fpn.yml',
|
||||
'_base_/cascade_fpn_reader.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
|
||||
weights: output/cascade_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
depth: 50
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
lr_mult_list: [0.05, 0.05, 0.1, 0.15]
|
||||
@@ -0,0 +1,29 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_1x.yml',
|
||||
'_base_/cascade_rcnn_r50_fpn.yml',
|
||||
'_base_/cascade_fpn_reader.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
|
||||
weights: output/cascade_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
depth: 50
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
lr_mult_list: [0.05, 0.05, 0.1, 0.15]
|
||||
|
||||
epoch: 24
|
||||
LearningRate:
|
||||
base_lr: 0.01
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [12, 22]
|
||||
- !LinearWarmup
|
||||
start_factor: 0.1
|
||||
steps: 1000
|
||||
@@ -0,0 +1,37 @@
|
||||
English | [简体中文](README_cn.md)
|
||||
|
||||
# CenterNet (CenterNet: Objects as Points)
|
||||
|
||||
## Table of Contents
|
||||
- [Introduction](#Introduction)
|
||||
- [Model Zoo](#Model_Zoo)
|
||||
- [Citations](#Citations)
|
||||
|
||||
## Introduction
|
||||
|
||||
[CenterNet](http://arxiv.org/abs/1904.07850) is an Anchor Free detector, which model an object as a single point -- the center point of its bounding box. The detector uses keypoint estimation to find center points and regresses to all other object properties. The center point based approach, CenterNet, is end-to-end differentiable, simpler, faster, and more accurate than corresponding bounding box based detectors.
|
||||
|
||||
## Model Zoo
|
||||
|
||||
### CenterNet Results on COCO-val 2017
|
||||
|
||||
| backbone | input shape | mAP | FPS | download | config |
|
||||
| :--------------| :------- | :----: | :------: | :----: |:-----: |
|
||||
| DLA-34(paper) | 512x512 | 37.4 | - | - | - |
|
||||
| DLA-34 | 512x512 | 37.6 | - | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_dla34_140e_coco.pdparams) | [config](./centernet_dla34_140e_coco.yml) |
|
||||
| ResNet50 + DLAUp | 512x512 | 38.9 | - | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_r50_140e_coco.pdparams) | [config](./centernet_r50_140e_coco.yml) |
|
||||
| MobileNetV1 + DLAUp | 512x512 | 28.2 | - | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv1_140e_coco.pdparams) | [config](./centernet_mbv1_140e_coco.yml) |
|
||||
| MobileNetV3_small + DLAUp | 512x512 | 17 | - | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv3_small_140e_coco.pdparams) | [config](./centernet_mbv3_small_140e_coco.yml) |
|
||||
| MobileNetV3_large + DLAUp | 512x512 | 27.1 | - | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv3_large_140e_coco.pdparams) | [config](./centernet_mbv3_large_140e_coco.yml) |
|
||||
| ShuffleNetV2 + DLAUp | 512x512 | 23.8 | - | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_shufflenetv2_140e_coco.pdparams) | [config](./centernet_shufflenetv2_140e_coco.yml) |
|
||||
|
||||
|
||||
## Citations
|
||||
```
|
||||
@article{zhou2019objects,
|
||||
title={Objects as points},
|
||||
author={Zhou, Xingyi and Wang, Dequan and Kr{\"a}henb{\"u}hl, Philipp},
|
||||
journal={arXiv preprint arXiv:1904.07850},
|
||||
year={2019}
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,36 @@
|
||||
简体中文 | [English](README.md)
|
||||
|
||||
# CenterNet (CenterNet: Objects as Points)
|
||||
|
||||
## 内容
|
||||
- [简介](#简介)
|
||||
- [模型库](#模型库)
|
||||
- [引用](#引用)
|
||||
|
||||
## 内容
|
||||
|
||||
[CenterNet](http://arxiv.org/abs/1904.07850)是Anchor Free检测器,将物体表示为一个目标框中心点。CenterNet使用关键点检测的方式定位中心点并回归物体的其他属性。CenterNet是以中心点为基础的检测方法,是端到端可训练的,并且相较于基于anchor的检测器更加检测高效。
|
||||
|
||||
## 模型库
|
||||
|
||||
### CenterNet在COCO-val 2017上结果
|
||||
|
||||
| 骨干网络 | 输入尺寸 | mAP | FPS | 下载链接 | 配置文件 |
|
||||
| :--------------| :------- | :----: | :------: | :----: |:-----: |
|
||||
| DLA-34(paper) | 512x512 | 37.4 | - | - | - |
|
||||
| DLA-34 | 512x512 | 37.6 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_dla34_140e_coco.pdparams) | [配置文件](./centernet_dla34_140e_coco.yml) |
|
||||
| ResNet50 + DLAUp | 512x512 | 38.9 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_r50_140e_coco.pdparams) | [配置文件](./centernet_r50_140e_coco.yml) |
|
||||
| MobileNetV1 + DLAUp | 512x512 | 28.2 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv1_140e_coco.pdparams) | [配置文件](./centernet_mbv1_140e_coco.yml) |
|
||||
| MobileNetV3_small + DLAUp | 512x512 | 17 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv3_small_140e_coco.pdparams) | [配置文件](./centernet_mbv3_small_140e_coco.yml) |
|
||||
| MobileNetV3_large + DLAUp | 512x512 | 27.1 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv3_large_140e_coco.pdparams) | [配置文件](./centernet_mbv3_large_140e_coco.yml) |
|
||||
| ShuffleNetV2 + DLAUp | 512x512 | 23.8 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_shufflenetv2_140e_coco.pdparams) | [配置文件](./centernet_shufflenetv2_140e_coco.yml) |
|
||||
|
||||
## 引用
|
||||
```
|
||||
@article{zhou2019objects,
|
||||
title={Objects as points},
|
||||
author={Zhou, Xingyi and Wang, Dequan and Kr{\"a}henb{\"u}hl, Philipp},
|
||||
journal={arXiv preprint arXiv:1904.07850},
|
||||
year={2019}
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,22 @@
|
||||
architecture: CenterNet
|
||||
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/DLA34_pretrain.pdparams
|
||||
|
||||
CenterNet:
|
||||
backbone: DLA
|
||||
neck: CenterNetDLAFPN
|
||||
head: CenterNetHead
|
||||
post_process: CenterNetPostProcess
|
||||
|
||||
DLA:
|
||||
depth: 34
|
||||
|
||||
CenterNetDLAFPN:
|
||||
down_ratio: 4
|
||||
|
||||
CenterNetHead:
|
||||
head_planes: 256
|
||||
regress_ltrb: False
|
||||
|
||||
CenterNetPostProcess:
|
||||
max_per_img: 100
|
||||
regress_ltrb: False
|
||||
@@ -0,0 +1,34 @@
|
||||
architecture: CenterNet
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
|
||||
norm_type: sync_bn
|
||||
use_ema: true
|
||||
ema_decay: 0.9998
|
||||
|
||||
CenterNet:
|
||||
backbone: ResNet
|
||||
neck: CenterNetDLAFPN
|
||||
head: CenterNetHead
|
||||
post_process: CenterNetPostProcess
|
||||
|
||||
ResNet:
|
||||
depth: 50
|
||||
variant: d
|
||||
return_idx: [0, 1, 2, 3]
|
||||
freeze_at: -1
|
||||
norm_decay: 0.
|
||||
dcn_v2_stages: [3]
|
||||
|
||||
|
||||
CenterNetDLAFPN:
|
||||
first_level: 0
|
||||
last_level: 4
|
||||
down_ratio: 4
|
||||
dcn_v2: False
|
||||
|
||||
CenterNetHead:
|
||||
head_planes: 256
|
||||
regress_ltrb: False
|
||||
|
||||
CenterNetPostProcess:
|
||||
max_per_img: 100
|
||||
regress_ltrb: False
|
||||
@@ -0,0 +1,35 @@
|
||||
worker_num: 4
|
||||
TrainReader:
|
||||
inputs_def:
|
||||
image_shape: [3, 512, 512]
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- FlipWarpAffine: {keep_res: False, input_h: 512, input_w: 512, use_random: True}
|
||||
- CenterRandColor: {}
|
||||
- Lighting: {eigval: [0.2141788, 0.01817699, 0.00341571], eigvec: [[-0.58752847, -0.69563484, 0.41340352], [-0.5832747, 0.00994535, -0.81221408], [-0.56089297, 0.71832671, 0.41158938]]}
|
||||
- NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834], is_scale: False}
|
||||
- Permute: {}
|
||||
- Gt2CenterNetTarget: {down_ratio: 4, max_objs: 128}
|
||||
batch_size: 16
|
||||
shuffle: True
|
||||
drop_last: True
|
||||
use_shared_memory: True
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- WarpAffine: {keep_res: True, input_h: 512, input_w: 512}
|
||||
- NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834]}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
|
||||
|
||||
TestReader:
|
||||
inputs_def:
|
||||
image_shape: [3, 512, 512]
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- WarpAffine: {keep_res: True, input_h: 512, input_w: 512}
|
||||
- NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834], is_scale: True}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
@@ -0,0 +1,14 @@
|
||||
epoch: 140
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.0005
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [90, 120]
|
||||
use_warmup: False
|
||||
|
||||
OptimizerBuilder:
|
||||
optimizer:
|
||||
type: Adam
|
||||
regularizer: NULL
|
||||
@@ -0,0 +1,9 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_140e.yml',
|
||||
'_base_/centernet_dla34.yml',
|
||||
'_base_/centernet_reader.yml',
|
||||
]
|
||||
|
||||
weights: output/centernet_dla34_140e_coco/model_final
|
||||
@@ -0,0 +1,21 @@
|
||||
_BASE_: [
|
||||
'centernet_r50_140e_coco.yml'
|
||||
]
|
||||
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV1_pretrained.pdparams
|
||||
weights: output/centernet_mbv1_140e_coco/model_final
|
||||
|
||||
CenterNet:
|
||||
backbone: MobileNet
|
||||
neck: CenterNetDLAFPN
|
||||
head: CenterNetHead
|
||||
post_process: CenterNetPostProcess
|
||||
|
||||
MobileNet:
|
||||
scale: 1.
|
||||
with_extra_blocks: false
|
||||
extra_block_filters: []
|
||||
feature_maps: [3, 5, 11, 13]
|
||||
|
||||
TrainReader:
|
||||
batch_size: 32
|
||||
@@ -0,0 +1,22 @@
|
||||
_BASE_: [
|
||||
'centernet_r50_140e_coco.yml'
|
||||
]
|
||||
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams
|
||||
weights: output/centernet_mbv3_large_140e_coco/model_final
|
||||
|
||||
CenterNet:
|
||||
backbone: MobileNetV3
|
||||
neck: CenterNetDLAFPN
|
||||
head: CenterNetHead
|
||||
post_process: CenterNetPostProcess
|
||||
|
||||
MobileNetV3:
|
||||
model_name: large
|
||||
scale: 1.
|
||||
with_extra_blocks: false
|
||||
extra_block_filters: []
|
||||
feature_maps: [4, 7, 13, 16]
|
||||
|
||||
TrainReader:
|
||||
batch_size: 32
|
||||
@@ -0,0 +1,28 @@
|
||||
_BASE_: [
|
||||
'centernet_r50_140e_coco.yml'
|
||||
]
|
||||
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_small_x1_0_ssld_pretrained.pdparams
|
||||
weights: output/centernet_mbv3_small_140e_coco/model_final
|
||||
|
||||
CenterNet:
|
||||
backbone: MobileNetV3
|
||||
neck: CenterNetDLAFPN
|
||||
head: CenterNetHead
|
||||
post_process: CenterNetPostProcess
|
||||
|
||||
MobileNetV3:
|
||||
model_name: small
|
||||
scale: 1.
|
||||
with_extra_blocks: false
|
||||
extra_block_filters: []
|
||||
feature_maps: [4, 9, 12]
|
||||
|
||||
CenterNetDLAFPN:
|
||||
first_level: 0
|
||||
last_level: 3
|
||||
down_ratio: 8
|
||||
dcn_v2: False
|
||||
|
||||
TrainReader:
|
||||
batch_size: 32
|
||||
@@ -0,0 +1,9 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_140e.yml',
|
||||
'_base_/centernet_r50.yml',
|
||||
'_base_/centernet_reader.yml',
|
||||
]
|
||||
|
||||
weights: output/centernet_r50_140e_coco/model_final
|
||||
@@ -0,0 +1,33 @@
|
||||
_BASE_: [
|
||||
'centernet_r50_140e_coco.yml'
|
||||
]
|
||||
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ShuffleNetV2_x1_0_pretrained.pdparams
|
||||
weights: output/centernet_shufflenetv2_140e_coco/model_final
|
||||
|
||||
CenterNet:
|
||||
backbone: ShuffleNetV2
|
||||
neck: CenterNetDLAFPN
|
||||
head: CenterNetHead
|
||||
post_process: CenterNetPostProcess
|
||||
|
||||
ShuffleNetV2:
|
||||
scale: 1.0
|
||||
feature_maps: [5, 13, 17]
|
||||
act: leaky_relu
|
||||
|
||||
CenterNetDLAFPN:
|
||||
first_level: 0
|
||||
last_level: 3
|
||||
down_ratio: 8
|
||||
dcn_v2: False
|
||||
|
||||
TrainReader:
|
||||
batch_size: 32
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- WarpAffine: {keep_res: False, input_h: 512, input_w: 512}
|
||||
- NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834]}
|
||||
- Permute: {}
|
||||
@@ -0,0 +1,68 @@
|
||||
简体中文 | [English](README.md)
|
||||
|
||||
# CLRNet (CLRNet: Cross Layer Refinement Network for Lane Detection)
|
||||
|
||||
## 目录
|
||||
- [简介](#简介)
|
||||
- [模型库](#模型库)
|
||||
- [引用](#引用)
|
||||
|
||||
## 介绍
|
||||
|
||||
[CLRNet](https://arxiv.org/abs/2203.10350)是一个车道线检测模型。CLRNet模型设计了车道线检测的直线先验轨迹,车道线iou以及nms方法,融合提取车道线轨迹的上下文高层特征与底层特征,利用FPN多尺度进行refine,在车道线检测相关数据集取得了SOTA的性能。
|
||||
|
||||
## 模型库
|
||||
|
||||
### CLRNet在CUlane上结果
|
||||
|
||||
| 骨架网络 | mF1 | F1@50 | F1@75 | 下载链接 | 配置文件 |训练日志|
|
||||
| :--------------| :------- | :----: | :------: | :----: |:-----: |:-----: |
|
||||
| ResNet-18 | 54.98 | 79.46 | 62.10 | [下载链接](https://paddledet.bj.bcebos.com/models/clrnet_resnet18_culane.pdparams) | [配置文件](./clrnet_resnet18_culane.yml) |[训练日志](https://bj.bcebos.com/v1/paddledet/logs/train_clrnet_r18_15_culane.log)|
|
||||
|
||||
### 数据集下载
|
||||
下载[CULane数据集](https://xingangpan.github.io/projects/CULane.html)并解压到`dataset/culane`目录。
|
||||
|
||||
您的数据集目录结构如下:
|
||||
```shell
|
||||
culane/driver_xx_xxframe # data folders x6
|
||||
culane/laneseg_label_w16 # lane segmentation labels
|
||||
culane/list # data lists
|
||||
```
|
||||
如果您使用百度云链接下载,注意确保`driver_23_30frame_part1.tar.gz`和`driver_23_30frame_part2.tar.gz`解压后的文件都在`driver_23_30frame`目录下。
|
||||
|
||||
现已将用于测试的小数据集上传到PaddleDetection,可通过运行训练脚本,自动下载并解压数据,如需复现结果请下载链接中的全量数据集训练。
|
||||
|
||||
### 训练
|
||||
- GPU单卡训练
|
||||
```shell
|
||||
python tools/train.py -c configs/clrnet/clr_resnet18_culane.yml
|
||||
```
|
||||
- GPU多卡训练
|
||||
```shell
|
||||
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
||||
python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/clrnet/clr_resnet18_culane.yml
|
||||
```
|
||||
|
||||
### 评估
|
||||
```shell
|
||||
python tools/eval.py -c configs/clrnet/clr_resnet18_culane.yml -o weights=output/clr_resnet18_culane/model_final.pdparams
|
||||
```
|
||||
|
||||
### 预测
|
||||
```shell
|
||||
python tools/infer_culane.py -c configs/clrnet/clr_resnet18_culane.yml -o weights=output/clr_resnet18_culane/model_final.pdparams --infer_img=demo/lane00000.jpg
|
||||
```
|
||||
|
||||
注意:预测功能暂不支持模型静态图推理部署。
|
||||
|
||||
## 引用
|
||||
```
|
||||
@InProceedings{Zheng_2022_CVPR,
|
||||
author = {Zheng, Tu and Huang, Yifei and Liu, Yang and Tang, Wenjian and Yang, Zheng and Cai, Deng and He, Xiaofei},
|
||||
title = {CLRNet: Cross Layer Refinement Network for Lane Detection},
|
||||
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
||||
month = {June},
|
||||
year = {2022},
|
||||
pages = {898-907}
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,68 @@
|
||||
English | [简体中文](README_cn.md)
|
||||
|
||||
# CLRNet (CLRNet: Cross Layer Refinement Network for Lane Detection)
|
||||
|
||||
## Table of Contents
|
||||
- [Introduction](#Introduction)
|
||||
- [Model Zoo](#Model_Zoo)
|
||||
- [Citations](#Citations)
|
||||
|
||||
## Introduction
|
||||
|
||||
[CLRNet](https://arxiv.org/abs/2203.10350) is a lane detection model. The CLRNet model is designed with line prior for lane detection, line iou loss as well as nms method, fused to extract contextual high-level features of lane line with low-level features, and refined by FPN multi-scale. Finally, the model achieved SOTA performance in lane detection datasets.
|
||||
|
||||
## Model Zoo
|
||||
|
||||
### CLRNet Results on CULane dataset
|
||||
|
||||
| backbone | mF1 | F1@50 | F1@75 | download | config |
|
||||
| :--------------| :------- | :----: | :------: | :----: |:-----: |
|
||||
| ResNet-18 | 54.98 | 79.46 | 62.10 | [model](https://paddledet.bj.bcebos.com/models/clrnet_resnet18_culane.pdparams) | [config](./clrnet_resnet18_culane.yml) |
|
||||
|
||||
### Download
|
||||
Download [CULane](https://xingangpan.github.io/projects/CULane.html). Then extract them to `dataset/culane`.
|
||||
|
||||
For CULane, you should have structure like this:
|
||||
```shell
|
||||
culane/driver_xx_xxframe # data folders x6
|
||||
culane/laneseg_label_w16 # lane segmentation labels
|
||||
culane/list # data lists
|
||||
```
|
||||
If you use Baidu Cloud, make sure that images in `driver_23_30frame_part1.tar.gz` and `driver_23_30frame_part2.tar.gz` are located in one folder `driver_23_30frame` instead of two seperate folders after you decompress them.
|
||||
|
||||
Now we have uploaded a small subset of CULane dataset to PaddleDetection for code checking. You can simply run the training script below to download it automatically. If you want to implement the results, you need to download the full dataset at th link for training.
|
||||
|
||||
### Training
|
||||
- single GPU
|
||||
```shell
|
||||
python tools/train.py -c configs/clrnet/clr_resnet18_culane.yml
|
||||
```
|
||||
- multi GPU
|
||||
```shell
|
||||
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
||||
python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/clrnet/clr_resnet18_culane.yml
|
||||
```
|
||||
|
||||
### Evaluation
|
||||
```shell
|
||||
python tools/eval.py -c configs/clrnet/clr_resnet18_culane.yml -o weights=output/clr_resnet18_culane/model_final.pdparams
|
||||
```
|
||||
|
||||
### Inference
|
||||
```shell
|
||||
python tools/infer_culane.py -c configs/clrnet/clr_resnet18_culane.yml -o weights=output/clr_resnet18_culane/model_final.pdparams --infer_img=demo/lane00000.jpg
|
||||
```
|
||||
|
||||
Notice: The inference phase does not support static model graph deploy at present.
|
||||
|
||||
## Citations
|
||||
```
|
||||
@InProceedings{Zheng_2022_CVPR,
|
||||
author = {Zheng, Tu and Huang, Yifei and Liu, Yang and Tang, Wenjian and Yang, Zheng and Cai, Deng and He, Xiaofei},
|
||||
title = {CLRNet: Cross Layer Refinement Network for Lane Detection},
|
||||
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
||||
month = {June},
|
||||
year = {2022},
|
||||
pages = {898-907}
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,41 @@
|
||||
architecture: CLRNet
|
||||
|
||||
CLRNet:
|
||||
backbone: CLRResNet
|
||||
neck: CLRFPN
|
||||
clr_head: CLRHead
|
||||
|
||||
CLRResNet:
|
||||
resnet: 'resnet18'
|
||||
pretrained: True
|
||||
|
||||
CLRFPN:
|
||||
in_channels: [128,256,512]
|
||||
out_channel: 64
|
||||
extra_stage: 0
|
||||
|
||||
CLRHead:
|
||||
prior_feat_channels: 64
|
||||
fc_hidden_dim: 64
|
||||
num_priors: 192
|
||||
num_fc: 2
|
||||
refine_layers: 3
|
||||
sample_points: 36
|
||||
loss: CLRNetLoss
|
||||
conf_threshold: 0.4
|
||||
nms_thres: 0.8
|
||||
|
||||
CLRNetLoss:
|
||||
cls_loss_weight : 2.0
|
||||
xyt_loss_weight : 0.2
|
||||
iou_loss_weight : 2.0
|
||||
seg_loss_weight : 1.0
|
||||
refine_layers : 3
|
||||
ignore_label: 255
|
||||
bg_weight: 0.4
|
||||
|
||||
# for visualize lane detection results
|
||||
sample_y:
|
||||
start: 589
|
||||
end: 230
|
||||
step: -20
|
||||
@@ -0,0 +1,37 @@
|
||||
worker_num: 10
|
||||
|
||||
img_h: &img_h 320
|
||||
img_w: &img_w 800
|
||||
ori_img_h: &ori_img_h 590
|
||||
ori_img_w: &ori_img_w 1640
|
||||
num_points: &num_points 72
|
||||
max_lanes: &max_lanes 4
|
||||
|
||||
TrainReader:
|
||||
batch_size: 24
|
||||
batch_transforms:
|
||||
- CULaneTrainProcess: {img_h: *img_h, img_w: *img_w}
|
||||
- CULaneDataProcess: {num_points: *num_points, max_lanes: *max_lanes, img_w: *img_w, img_h: *img_h}
|
||||
shuffle: True
|
||||
drop_last: False
|
||||
|
||||
|
||||
|
||||
|
||||
EvalReader:
|
||||
batch_size: 24
|
||||
batch_transforms:
|
||||
- CULaneResize: {prob: 1.0, img_h: *img_h, img_w: *img_w}
|
||||
- CULaneDataProcess: {num_points: *num_points, max_lanes: *max_lanes, img_w: *img_w, img_h: *img_h}
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
|
||||
|
||||
|
||||
TestReader:
|
||||
batch_size: 24
|
||||
batch_transforms:
|
||||
- CULaneResize: {prob: 1.0, img_h: *img_h, img_w: *img_w}
|
||||
- CULaneDataProcess: {num_points: *num_points, max_lanes: *max_lanes, img_w: *img_w, img_h: *img_h}
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
@@ -0,0 +1,14 @@
|
||||
epoch: 15
|
||||
snapshot_epoch: 5
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.6e-3
|
||||
schedulers:
|
||||
- !CosineDecay
|
||||
max_epochs: 15
|
||||
use_warmup: False
|
||||
|
||||
OptimizerBuilder:
|
||||
regularizer: False
|
||||
optimizer:
|
||||
type: AdamW
|
||||
@@ -0,0 +1,9 @@
|
||||
_BASE_: [
|
||||
'../datasets/culane.yml',
|
||||
'_base_/clrnet_reader.yml',
|
||||
'_base_/clrnet_r18_fpn.yml',
|
||||
'_base_/optimizer_1x.yml',
|
||||
'../runtime.yml'
|
||||
]
|
||||
|
||||
weights: output/clr_resnet18_culane/model_final
|
||||
@@ -0,0 +1,20 @@
|
||||
# ConvNeXt (A ConvNet for the 2020s)
|
||||
|
||||
## 模型库
|
||||
### ConvNeXt on COCO
|
||||
|
||||
| 网络网络 | 输入尺寸 | 图片数/GPU | 学习率策略 | mAP<sup>val<br>0.5:0.95 | mAP<sup>val<br>0.5 | Params(M) | FLOPs(G) | 下载链接 | 配置文件 |
|
||||
| :------------- | :------- | :-------: | :------: | :------------: | :---------------------: | :----------------: |:---------: | :------: |:---------------: |
|
||||
| PP-YOLOE-ConvNeXt-tiny | 640 | 16 | 36e | 44.6 | 63.3 | 33.04 | 13.87 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_convnext_tiny_36e_coco.pdparams) | [配置文件](./ppyoloe_convnext_tiny_36e_coco.yml) |
|
||||
| YOLOX-ConvNeXt-s | 640 | 8 | 36e | 44.6 | 65.3 | 36.20 | 27.52 | [下载链接](https://paddledet.bj.bcebos.com/models/yolox_convnext_s_36e_coco.pdparams) | [配置文件](./yolox_convnext_s_36e_coco.yml) |
|
||||
|
||||
|
||||
## Citations
|
||||
```
|
||||
@Article{liu2022convnet,
|
||||
author = {Zhuang Liu and Hanzi Mao and Chao-Yuan Wu and Christoph Feichtenhofer and Trevor Darrell and Saining Xie},
|
||||
title = {A ConvNet for the 2020s},
|
||||
journal = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
||||
year = {2022},
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,55 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'../ppyoloe/_base_/ppyoloe_crn.yml',
|
||||
'../ppyoloe/_base_/ppyoloe_reader.yml',
|
||||
]
|
||||
depth_mult: 0.25
|
||||
width_mult: 0.50
|
||||
|
||||
log_iter: 100
|
||||
snapshot_epoch: 5
|
||||
weights: output/ppyoloe_convnext_tiny_36e_coco/model_final
|
||||
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/convnext_tiny_22k_224.pdparams
|
||||
|
||||
|
||||
YOLOv3:
|
||||
backbone: ConvNeXt
|
||||
neck: CustomCSPPAN
|
||||
yolo_head: PPYOLOEHead
|
||||
post_process: ~
|
||||
|
||||
ConvNeXt:
|
||||
arch: 'tiny'
|
||||
drop_path_rate: 0.4
|
||||
layer_scale_init_value: 1.0
|
||||
return_idx: [1, 2, 3]
|
||||
|
||||
|
||||
PPYOLOEHead:
|
||||
static_assigner_epoch: 12
|
||||
nms:
|
||||
nms_top_k: 1000
|
||||
keep_top_k: 300
|
||||
score_threshold: 0.01
|
||||
nms_threshold: 0.7
|
||||
|
||||
|
||||
TrainReader:
|
||||
batch_size: 16
|
||||
|
||||
|
||||
epoch: 36
|
||||
LearningRate:
|
||||
base_lr: 0.0002
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [36]
|
||||
use_warmup: false
|
||||
|
||||
OptimizerBuilder:
|
||||
regularizer: false
|
||||
optimizer:
|
||||
type: AdamW
|
||||
weight_decay: 0.0005
|
||||
@@ -0,0 +1,58 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'../yolox/_base_/yolox_cspdarknet.yml',
|
||||
'../yolox/_base_/yolox_reader.yml'
|
||||
]
|
||||
depth_mult: 0.33
|
||||
width_mult: 0.50
|
||||
|
||||
log_iter: 100
|
||||
snapshot_epoch: 5
|
||||
weights: output/yolox_convnext_s_36e_coco/model_final
|
||||
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/convnext_tiny_22k_224.pdparams
|
||||
|
||||
|
||||
YOLOX:
|
||||
backbone: ConvNeXt
|
||||
neck: YOLOCSPPAN
|
||||
head: YOLOXHead
|
||||
size_stride: 32
|
||||
size_range: [15, 25] # multi-scale range [480*480 ~ 800*800]
|
||||
|
||||
ConvNeXt:
|
||||
arch: 'tiny'
|
||||
drop_path_rate: 0.4
|
||||
layer_scale_init_value: 1.0
|
||||
return_idx: [1, 2, 3]
|
||||
|
||||
|
||||
TrainReader:
|
||||
batch_size: 8
|
||||
mosaic_epoch: 30
|
||||
|
||||
|
||||
YOLOXHead:
|
||||
l1_epoch: 30
|
||||
nms:
|
||||
name: MultiClassNMS
|
||||
nms_top_k: 10000
|
||||
keep_top_k: 1000
|
||||
score_threshold: 0.001
|
||||
nms_threshold: 0.65
|
||||
|
||||
|
||||
epoch: 36
|
||||
LearningRate:
|
||||
base_lr: 0.0002
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [36]
|
||||
use_warmup: false
|
||||
|
||||
OptimizerBuilder:
|
||||
regularizer: false
|
||||
optimizer:
|
||||
type: AdamW
|
||||
weight_decay: 0.0005
|
||||
@@ -0,0 +1,21 @@
|
||||
metric: COCO
|
||||
num_classes: 80
|
||||
|
||||
TrainDataset:
|
||||
name: COCODataSet
|
||||
image_dir: train2017
|
||||
anno_path: annotations/instances_train2017.json
|
||||
dataset_dir: dataset/coco
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
|
||||
|
||||
EvalDataset:
|
||||
name: COCODataSet
|
||||
image_dir: val2017
|
||||
anno_path: annotations/instances_val2017.json
|
||||
dataset_dir: dataset/coco
|
||||
allow_empty: true
|
||||
|
||||
TestDataset:
|
||||
name: ImageFolder
|
||||
anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
|
||||
dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
|
||||
@@ -0,0 +1,20 @@
|
||||
metric: COCO
|
||||
num_classes: 80
|
||||
|
||||
TrainDataset:
|
||||
name: COCODataSet
|
||||
image_dir: train2017
|
||||
anno_path: annotations/instances_train2017.json
|
||||
dataset_dir: dataset/coco
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
|
||||
|
||||
EvalDataset:
|
||||
name: COCODataSet
|
||||
image_dir: val2017
|
||||
anno_path: annotations/instances_val2017.json
|
||||
dataset_dir: dataset/coco
|
||||
|
||||
TestDataset:
|
||||
name: ImageFolder
|
||||
anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
|
||||
dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
|
||||
@@ -0,0 +1,28 @@
|
||||
metric: CULaneMetric
|
||||
num_classes: 5 # 4 lanes + background
|
||||
|
||||
cut_height: &cut_height 270
|
||||
dataset_dir: &dataset_dir dataset/culane
|
||||
|
||||
TrainDataset:
|
||||
name: CULaneDataSet
|
||||
dataset_dir: *dataset_dir
|
||||
list_path: 'list/train_gt.txt'
|
||||
split: train
|
||||
cut_height: *cut_height
|
||||
|
||||
|
||||
EvalDataset:
|
||||
name: CULaneDataSet
|
||||
dataset_dir: *dataset_dir
|
||||
list_path: 'list/test.txt'
|
||||
split: test
|
||||
cut_height: *cut_height
|
||||
|
||||
|
||||
TestDataset:
|
||||
name: CULaneDataSet
|
||||
dataset_dir: *dataset_dir
|
||||
list_path: 'list/test.txt'
|
||||
split: test
|
||||
cut_height: *cut_height
|
||||
@@ -0,0 +1,21 @@
|
||||
metric: RBOX
|
||||
num_classes: 15
|
||||
|
||||
TrainDataset:
|
||||
!COCODataSet
|
||||
image_dir: trainval1024/images
|
||||
anno_path: trainval1024/DOTA_trainval1024.json
|
||||
dataset_dir: dataset/dota/
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
|
||||
|
||||
EvalDataset:
|
||||
!COCODataSet
|
||||
image_dir: trainval1024/images
|
||||
anno_path: trainval1024/DOTA_trainval1024.json
|
||||
dataset_dir: dataset/dota/
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
|
||||
|
||||
TestDataset:
|
||||
!ImageFolder
|
||||
anno_path: test1024/DOTA_test1024.json
|
||||
dataset_dir: dataset/dota/
|
||||
@@ -0,0 +1,21 @@
|
||||
metric: RBOX
|
||||
num_classes: 15
|
||||
|
||||
TrainDataset:
|
||||
!COCODataSet
|
||||
image_dir: trainval1024/images
|
||||
anno_path: trainval1024/DOTA_trainval1024.json
|
||||
dataset_dir: dataset/dota_ms/
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
|
||||
|
||||
EvalDataset:
|
||||
!COCODataSet
|
||||
image_dir: trainval1024/images
|
||||
anno_path: trainval1024/DOTA_trainval1024.json
|
||||
dataset_dir: dataset/dota_ms/
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
|
||||
|
||||
TestDataset:
|
||||
!ImageFolder
|
||||
anno_path: test1024/DOTA_test1024.json
|
||||
dataset_dir: dataset/dota_ms/
|
||||
@@ -0,0 +1,25 @@
|
||||
metric: MCMOT
|
||||
num_classes: 10
|
||||
# using VisDrone2019 MOT dataset with 10 classes as default, you can modify it for your needs.
|
||||
|
||||
# for MCMOT training
|
||||
TrainDataset:
|
||||
!MCMOTDataSet
|
||||
dataset_dir: dataset/mot
|
||||
image_lists: ['visdrone_mcmot.train']
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide']
|
||||
label_list: label_list.txt
|
||||
|
||||
# for MCMOT evaluation
|
||||
# If you want to change the MCMOT evaluation dataset, please modify 'data_root'
|
||||
EvalMOTDataset:
|
||||
!MOTImageFolder
|
||||
dataset_dir: dataset/mot
|
||||
data_root: visdrone_mcmot/images/val
|
||||
keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT
|
||||
|
||||
# for MCMOT video inference
|
||||
TestMOTDataset:
|
||||
!MOTImageFolder
|
||||
dataset_dir: dataset/mot
|
||||
keep_ori_im: True # set True if save visualization images or video
|
||||
@@ -0,0 +1,23 @@
|
||||
metric: MOT
|
||||
num_classes: 1
|
||||
|
||||
# for MOT training
|
||||
TrainDataset:
|
||||
!MOTDataSet
|
||||
dataset_dir: dataset/mot
|
||||
image_lists: ['mot17.train', 'caltech.all', 'cuhksysu.train', 'prw.train', 'citypersons.train', 'eth.train']
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide']
|
||||
|
||||
# for MOT evaluation
|
||||
# If you want to change the MOT evaluation dataset, please modify 'data_root'
|
||||
EvalMOTDataset:
|
||||
!MOTImageFolder
|
||||
dataset_dir: dataset/mot
|
||||
data_root: MOT16/images/train
|
||||
keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT
|
||||
|
||||
# for MOT video inference
|
||||
TestMOTDataset:
|
||||
!MOTImageFolder
|
||||
dataset_dir: dataset/mot
|
||||
keep_ori_im: True # set True if save visualization images or video
|
||||
@@ -0,0 +1,21 @@
|
||||
metric: COCO
|
||||
num_classes: 365
|
||||
|
||||
TrainDataset:
|
||||
!COCODataSet
|
||||
image_dir: train
|
||||
anno_path: annotations/zhiyuan_objv2_train.json
|
||||
dataset_dir: dataset/objects365
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
|
||||
|
||||
EvalDataset:
|
||||
!COCODataSet
|
||||
image_dir: val
|
||||
anno_path: annotations/zhiyuan_objv2_val.json
|
||||
dataset_dir: dataset/objects365
|
||||
allow_empty: true
|
||||
|
||||
TestDataset:
|
||||
!ImageFolder
|
||||
anno_path: annotations/zhiyuan_objv2_val.json
|
||||
dataset_dir: dataset/objects365/
|
||||
@@ -0,0 +1,21 @@
|
||||
metric: VOC
|
||||
map_type: integral
|
||||
num_classes: 4
|
||||
|
||||
TrainDataset:
|
||||
name: VOCDataSet
|
||||
dataset_dir: dataset/roadsign_voc
|
||||
anno_path: train.txt
|
||||
label_list: label_list.txt
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
|
||||
|
||||
EvalDataset:
|
||||
name: VOCDataSet
|
||||
dataset_dir: dataset/roadsign_voc
|
||||
anno_path: valid.txt
|
||||
label_list: label_list.txt
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
|
||||
|
||||
TestDataset:
|
||||
name: ImageFolder
|
||||
anno_path: dataset/roadsign_voc/label_list.txt
|
||||
@@ -0,0 +1,47 @@
|
||||
metric: SNIPERCOCO
|
||||
num_classes: 80
|
||||
|
||||
TrainDataset:
|
||||
!SniperCOCODataSet
|
||||
image_dir: train2017
|
||||
anno_path: annotations/instances_train2017.json
|
||||
dataset_dir: dataset/coco
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
|
||||
allow_empty: true
|
||||
is_trainset: true
|
||||
image_target_sizes: [2000, 1000]
|
||||
valid_box_ratio_ranges: [[-1, 0.1],[0.08, -1]]
|
||||
chip_target_size: 512
|
||||
chip_target_stride: 200
|
||||
use_neg_chip: false
|
||||
max_neg_num_per_im: 8
|
||||
|
||||
|
||||
EvalDataset:
|
||||
!SniperCOCODataSet
|
||||
image_dir: val2017
|
||||
anno_path: annotations/instances_val2017.json
|
||||
dataset_dir: dataset/coco
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
|
||||
allow_empty: true
|
||||
is_trainset: false
|
||||
image_target_sizes: [2000, 1000]
|
||||
valid_box_ratio_ranges: [[-1, 0.1], [0.08, -1]]
|
||||
chip_target_size: 512
|
||||
chip_target_stride: 200
|
||||
max_per_img: -1
|
||||
nms_thresh: 0.5
|
||||
|
||||
TestDataset:
|
||||
!SniperCOCODataSet
|
||||
image_dir: val2017
|
||||
dataset_dir: dataset/coco
|
||||
is_trainset: false
|
||||
image_target_sizes: [2000, 1000]
|
||||
valid_box_ratio_ranges: [[-1, 0.1],[0.08, -1]]
|
||||
chip_target_size: 500
|
||||
chip_target_stride: 200
|
||||
max_per_img: -1
|
||||
nms_thresh: 0.5
|
||||
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
metric: SNIPERCOCO
|
||||
num_classes: 9
|
||||
|
||||
TrainDataset:
|
||||
!SniperCOCODataSet
|
||||
image_dir: train
|
||||
anno_path: annotations/train.json
|
||||
dataset_dir: dataset/VisDrone2019_coco
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
|
||||
allow_empty: true
|
||||
is_trainset: true
|
||||
image_target_sizes: [8145, 2742]
|
||||
valid_box_ratio_ranges: [[-1, 0.03142857142857144], [0.02333211853008726, -1]]
|
||||
chip_target_size: 1536
|
||||
chip_target_stride: 1184
|
||||
use_neg_chip: false
|
||||
max_neg_num_per_im: 8
|
||||
|
||||
|
||||
EvalDataset:
|
||||
!SniperCOCODataSet
|
||||
image_dir: val
|
||||
anno_path: annotations/val.json
|
||||
dataset_dir: dataset/VisDrone2019_coco
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
|
||||
allow_empty: true
|
||||
is_trainset: false
|
||||
image_target_sizes: [8145, 2742]
|
||||
valid_box_ratio_ranges: [[-1, 0.03142857142857144], [0.02333211853008726, -1]]
|
||||
chip_target_size: 1536
|
||||
chip_target_stride: 1184
|
||||
max_per_img: -1
|
||||
nms_thresh: 0.5
|
||||
|
||||
TestDataset:
|
||||
!SniperCOCODataSet
|
||||
image_dir: val
|
||||
dataset_dir: dataset/VisDrone2019_coco
|
||||
is_trainset: false
|
||||
image_target_sizes: [8145, 2742]
|
||||
valid_box_ratio_ranges: [[-1, 0.03142857142857144], [0.02333211853008726, -1]]
|
||||
chip_target_size: 1536
|
||||
chip_target_stride: 1184
|
||||
max_per_img: -1
|
||||
nms_thresh: 0.5
|
||||
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
metric: RBOX
|
||||
num_classes: 9
|
||||
|
||||
TrainDataset:
|
||||
!COCODataSet
|
||||
image_dir: images
|
||||
anno_path: annotations/train.json
|
||||
dataset_dir: dataset/spine_coco
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
|
||||
|
||||
EvalDataset:
|
||||
!COCODataSet
|
||||
image_dir: images
|
||||
anno_path: annotations/valid.json
|
||||
dataset_dir: dataset/spine_coco
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
|
||||
|
||||
TestDataset:
|
||||
!ImageFolder
|
||||
anno_path: annotations/valid.json
|
||||
dataset_dir: dataset/spine_coco
|
||||
@@ -0,0 +1,22 @@
|
||||
metric: COCO
|
||||
num_classes: 10
|
||||
|
||||
TrainDataset:
|
||||
!COCODataSet
|
||||
image_dir: VisDrone2019-DET-train
|
||||
anno_path: train.json
|
||||
dataset_dir: dataset/visdrone
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
|
||||
|
||||
EvalDataset:
|
||||
!COCODataSet
|
||||
image_dir: VisDrone2019-DET-val
|
||||
anno_path: val.json
|
||||
# image_dir: test_dev
|
||||
# anno_path: test_dev.json
|
||||
dataset_dir: dataset/visdrone
|
||||
|
||||
TestDataset:
|
||||
!ImageFolder
|
||||
anno_path: val.json
|
||||
dataset_dir: dataset/visdrone
|
||||
@@ -0,0 +1,21 @@
|
||||
metric: VOC
|
||||
map_type: 11point
|
||||
num_classes: 20
|
||||
|
||||
TrainDataset:
|
||||
name: VOCDataSet
|
||||
dataset_dir: dataset/voc
|
||||
anno_path: trainval.txt
|
||||
label_list: label_list.txt
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
|
||||
|
||||
EvalDataset:
|
||||
name: VOCDataSet
|
||||
dataset_dir: dataset/voc
|
||||
anno_path: test.txt
|
||||
label_list: label_list.txt
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
|
||||
|
||||
TestDataset:
|
||||
name: ImageFolder
|
||||
anno_path: dataset/voc/label_list.txt
|
||||
@@ -0,0 +1,20 @@
|
||||
metric: WiderFace
|
||||
num_classes: 1
|
||||
|
||||
TrainDataset:
|
||||
!WIDERFaceDataSet
|
||||
dataset_dir: dataset/wider_face
|
||||
anno_path: wider_face_split/wider_face_train_bbx_gt.txt
|
||||
image_dir: WIDER_train/images
|
||||
data_fields: ['image', 'gt_bbox', 'gt_class']
|
||||
|
||||
EvalDataset:
|
||||
!WIDERFaceDataSet
|
||||
dataset_dir: dataset/wider_face
|
||||
anno_path: wider_face_split/wider_face_val_bbx_gt.txt
|
||||
image_dir: WIDER_val/images
|
||||
data_fields: ['image']
|
||||
|
||||
TestDataset:
|
||||
!ImageFolder
|
||||
use_default_label: true
|
||||
@@ -0,0 +1,37 @@
|
||||
### Deformable ConvNets v2
|
||||
|
||||
| 骨架网络 | 网络类型 | 卷积 | 每张GPU图片个数 | 学习率策略 |推理时间(fps)| Box AP | Mask AP | 下载 | 配置文件 |
|
||||
| :------------------- | :------------- | :-----: |:--------: | :-----: | :-----------: |:----: | :-----: | :----------------------------------------------------------: | :----: |
|
||||
| ResNet50-FPN | Faster | c3-c5 | 1 | 1x | - | 42.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_r50_fpn_1x_coco.yml) |
|
||||
| ResNet50-vd-FPN | Faster | c3-c5 | 1 | 1x | - | 42.7 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r50_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_1x_coco.yml) |
|
||||
| ResNet50-vd-FPN | Faster | c3-c5 | 1 | 2x | - | 43.7 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x_coco.yml) |
|
||||
| ResNet101-vd-FPN | Faster | c3-c5 | 1 | 1x | - | 45.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x_coco.yml) |
|
||||
| ResNeXt101-vd-FPN | Faster | c3-c5 | 1 | 1x | - | 46.5 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.pdparams) |[配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml) |
|
||||
| ResNet50-FPN | Mask | c3-c5 | 1 | 1x | - | 42.7 | 38.4 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/mask_rcnn_dcn_r50_fpn_1x_coco.yml) |
|
||||
| ResNet50-vd-FPN | Mask | c3-c5 | 1 | 2x | - | 44.6 | 39.8 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x_coco.yml) |
|
||||
| ResNet101-vd-FPN | Mask | c3-c5 | 1 | 1x | - | 45.6 | 40.6 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x_coco.yml) |
|
||||
| ResNeXt101-vd-FPN | Mask | c3-c5 | 1 | 1x | - | 47.3 | 42.0 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml) |
|
||||
| ResNet50-FPN | Cascade Faster | c3-c5 | 1 | 1x | - | 42.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x_coco.yml) |
|
||||
| ResNeXt101-vd-FPN | Cascade Faster | c3-c5 | 1 | 1x | - | 48.8 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml) |
|
||||
|
||||
|
||||
**注意事项:**
|
||||
|
||||
- Deformable卷积网络v2(dcn_v2)参考自论文[Deformable ConvNets v2](https://arxiv.org/abs/1811.11168).
|
||||
- `c3-c5`意思是在resnet模块的3到5阶段增加`dcn`.
|
||||
|
||||
## Citations
|
||||
```
|
||||
@inproceedings{dai2017deformable,
|
||||
title={Deformable Convolutional Networks},
|
||||
author={Dai, Jifeng and Qi, Haozhi and Xiong, Yuwen and Li, Yi and Zhang, Guodong and Hu, Han and Wei, Yichen},
|
||||
booktitle={Proceedings of the IEEE international conference on computer vision},
|
||||
year={2017}
|
||||
}
|
||||
@article{zhu2018deformable,
|
||||
title={Deformable ConvNets v2: More Deformable, Better Results},
|
||||
author={Zhu, Xizhou and Hu, Han and Lin, Stephen and Dai, Jifeng},
|
||||
journal={arXiv preprint arXiv:1811.11168},
|
||||
year={2018}
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,16 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'../cascade_rcnn/_base_/optimizer_1x.yml',
|
||||
'../cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml',
|
||||
'../cascade_rcnn/_base_/cascade_fpn_reader.yml',
|
||||
]
|
||||
weights: output/cascade_rcnn_dcn_r50_fpn_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
depth: 50
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
@@ -0,0 +1,16 @@
|
||||
_BASE_: [
|
||||
'cascade_rcnn_dcn_r50_fpn_1x_coco.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
|
||||
weights: output/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
depth: 101
|
||||
groups: 64
|
||||
base_width: 4
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
@@ -0,0 +1,15 @@
|
||||
_BASE_: [
|
||||
'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
|
||||
weights: output/faster_rcnn_dcn_r101_vd_fpn_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 101
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
@@ -0,0 +1,16 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'../faster_rcnn/_base_/optimizer_1x.yml',
|
||||
'../faster_rcnn/_base_/faster_rcnn_r50_fpn.yml',
|
||||
'../faster_rcnn/_base_/faster_fpn_reader.yml',
|
||||
]
|
||||
weights: output/faster_rcnn_dcn_r50_fpn_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
depth: 50
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
@@ -0,0 +1,15 @@
|
||||
_BASE_: [
|
||||
'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
|
||||
weights: output/faster_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
@@ -0,0 +1,26 @@
|
||||
_BASE_: [
|
||||
'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
|
||||
weights: output/faster_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
|
||||
epoch: 24
|
||||
LearningRate:
|
||||
base_lr: 0.01
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [16, 22]
|
||||
- !LinearWarmup
|
||||
start_factor: 0.1
|
||||
steps: 1000
|
||||
@@ -0,0 +1,17 @@
|
||||
_BASE_: [
|
||||
'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
|
||||
weights: output/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
# for ResNeXt: groups, base_width, base_channels
|
||||
depth: 101
|
||||
groups: 64
|
||||
base_width: 4
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
@@ -0,0 +1,15 @@
|
||||
_BASE_: [
|
||||
'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
|
||||
weights: output/mask_rcnn_dcn_r101_vd_fpn_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 101
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
@@ -0,0 +1,16 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_instance.yml',
|
||||
'../runtime.yml',
|
||||
'../mask_rcnn/_base_/optimizer_1x.yml',
|
||||
'../mask_rcnn/_base_/mask_rcnn_r50_fpn.yml',
|
||||
'../mask_rcnn/_base_/mask_fpn_reader.yml',
|
||||
]
|
||||
weights: output/mask_rcnn_dcn_r50_fpn_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
depth: 50
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
@@ -0,0 +1,26 @@
|
||||
_BASE_: [
|
||||
'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
|
||||
weights: output/mask_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
variant: d
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
|
||||
epoch: 24
|
||||
LearningRate:
|
||||
base_lr: 0.01
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [16, 22]
|
||||
- !LinearWarmup
|
||||
start_factor: 0.1
|
||||
steps: 1000
|
||||
@@ -0,0 +1,17 @@
|
||||
_BASE_: [
|
||||
'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
|
||||
]
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
|
||||
weights: output/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
# for ResNeXt: groups, base_width, base_channels
|
||||
depth: 101
|
||||
variant: d
|
||||
groups: 64
|
||||
base_width: 4
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
dcn_v2_stages: [1,2,3]
|
||||
@@ -0,0 +1,36 @@
|
||||
# Deformable DETR
|
||||
|
||||
## Introduction
|
||||
|
||||
|
||||
Deformable DETR is an object detection model based on DETR. We reproduced the model of the paper.
|
||||
|
||||
|
||||
## Model Zoo
|
||||
|
||||
| Backbone | Model | Images/GPU | Epochs | Box AP | Config | Log | Download |
|
||||
|:--------:|:---------------:|:----------:|:------:|:------:|:------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------:|
|
||||
| R-50 | Deformable DETR | 2 | 50 | 44.5 | [config](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/deformable_detr/deformable_detr_r50_1x_coco.yml) | [log](https://bj.bcebos.com/v1/paddledet/logs/deformable_detr_r50_1x_coco_44.5.log) | [model](https://paddledet.bj.bcebos.com/models/deformable_detr_r50_1x_coco.pdparams) |
|
||||
|
||||
**Notes:**
|
||||
|
||||
- Deformable DETR is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
|
||||
- Deformable DETR uses 8GPU to train 50 epochs.
|
||||
|
||||
GPU multi-card training
|
||||
```bash
|
||||
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
|
||||
python -m paddle.distributed.launch --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/deformable_detr/deformable_detr_r50_1x_coco.yml --fleet
|
||||
```
|
||||
|
||||
## Citations
|
||||
```
|
||||
@inproceedings{
|
||||
zhu2021deformable,
|
||||
title={Deformable DETR: Deformable Transformers for End-to-End Object Detection},
|
||||
author={Xizhou Zhu and Weijie Su and Lewei Lu and Bin Li and Xiaogang Wang and Jifeng Dai},
|
||||
booktitle={International Conference on Learning Representations},
|
||||
year={2021},
|
||||
url={https://openreview.net/forum?id=gZ9hCDWe6ke}
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,48 @@
|
||||
architecture: DETR
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vb_normal_pretrained.pdparams
|
||||
hidden_dim: 256
|
||||
use_focal_loss: True
|
||||
|
||||
|
||||
DETR:
|
||||
backbone: ResNet
|
||||
transformer: DeformableTransformer
|
||||
detr_head: DeformableDETRHead
|
||||
post_process: DETRPostProcess
|
||||
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [1, 2, 3]
|
||||
lr_mult_list: [0.0, 0.1, 0.1, 0.1]
|
||||
num_stages: 4
|
||||
|
||||
|
||||
DeformableTransformer:
|
||||
num_queries: 300
|
||||
position_embed_type: sine
|
||||
nhead: 8
|
||||
num_encoder_layers: 6
|
||||
num_decoder_layers: 6
|
||||
dim_feedforward: 1024
|
||||
dropout: 0.1
|
||||
activation: relu
|
||||
num_feature_levels: 4
|
||||
num_encoder_points: 4
|
||||
num_decoder_points: 4
|
||||
|
||||
|
||||
DeformableDETRHead:
|
||||
num_mlp_layers: 3
|
||||
|
||||
|
||||
DETRLoss:
|
||||
loss_coeff: {class: 2, bbox: 5, giou: 2}
|
||||
aux_loss: True
|
||||
|
||||
|
||||
HungarianMatcher:
|
||||
matcher_coeff: {class: 2, bbox: 5, giou: 2}
|
||||
@@ -0,0 +1,44 @@
|
||||
worker_num: 2
|
||||
TrainReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- RandomFlip: {prob: 0.5}
|
||||
- RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
|
||||
transforms2: [
|
||||
RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
|
||||
RandomSizeCrop: { min_size: 384, max_size: 600 },
|
||||
RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
|
||||
}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- NormalizeBox: {}
|
||||
- BboxXYXY2XYWH: {}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
|
||||
batch_size: 2
|
||||
shuffle: true
|
||||
drop_last: true
|
||||
collate_batch: false
|
||||
use_shared_memory: false
|
||||
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
@@ -0,0 +1,16 @@
|
||||
epoch: 50
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.0002
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [40]
|
||||
use_warmup: false
|
||||
|
||||
OptimizerBuilder:
|
||||
clip_grad_by_norm: 0.1
|
||||
regularizer: false
|
||||
optimizer:
|
||||
type: AdamW
|
||||
weight_decay: 0.0001
|
||||
@@ -0,0 +1,9 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/deformable_optimizer_1x.yml',
|
||||
'_base_/deformable_detr_r50.yml',
|
||||
'_base_/deformable_detr_reader.yml',
|
||||
]
|
||||
weights: output/deformable_detr_r50_1x_coco/model_final
|
||||
find_unused_parameters: True
|
||||
@@ -0,0 +1,39 @@
|
||||
# DETR
|
||||
|
||||
## Introduction
|
||||
|
||||
|
||||
DETR is an object detection model based on transformer. We reproduced the model of the paper.
|
||||
|
||||
|
||||
## Model Zoo
|
||||
|
||||
| Backbone | Model | Images/GPU | Inf time (fps) | Box AP | Config | Download |
|
||||
|:------:|:--------:|:--------:|:--------------:|:------:|:------:|:--------:|
|
||||
| R-50 | DETR | 4 | --- | 42.3 | [config](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/detr/detr_r50_1x_coco.yml) | [model](https://paddledet.bj.bcebos.com/models/detr_r50_1x_coco.pdparams) |
|
||||
|
||||
**Notes:**
|
||||
|
||||
- DETR is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
|
||||
- DETR uses 8GPU to train 500 epochs.
|
||||
|
||||
GPU multi-card training
|
||||
```bash
|
||||
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
|
||||
python -m paddle.distributed.launch --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/detr/detr_r50_1x_coco.yml --fleet
|
||||
```
|
||||
|
||||
## Citations
|
||||
```
|
||||
@inproceedings{detr,
|
||||
author = {Nicolas Carion and
|
||||
Francisco Massa and
|
||||
Gabriel Synnaeve and
|
||||
Nicolas Usunier and
|
||||
Alexander Kirillov and
|
||||
Sergey Zagoruyko},
|
||||
title = {End-to-End Object Detection with Transformers},
|
||||
booktitle = {ECCV},
|
||||
year = {2020}
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,44 @@
|
||||
architecture: DETR
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vb_normal_pretrained.pdparams
|
||||
hidden_dim: 256
|
||||
|
||||
|
||||
DETR:
|
||||
backbone: ResNet
|
||||
transformer: DETRTransformer
|
||||
detr_head: DETRHead
|
||||
post_process: DETRPostProcess
|
||||
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [3]
|
||||
lr_mult_list: [0.0, 0.1, 0.1, 0.1]
|
||||
num_stages: 4
|
||||
|
||||
|
||||
DETRTransformer:
|
||||
num_queries: 100
|
||||
position_embed_type: sine
|
||||
nhead: 8
|
||||
num_encoder_layers: 6
|
||||
num_decoder_layers: 6
|
||||
dim_feedforward: 2048
|
||||
dropout: 0.1
|
||||
activation: relu
|
||||
|
||||
|
||||
DETRHead:
|
||||
num_mlp_layers: 3
|
||||
|
||||
|
||||
DETRLoss:
|
||||
loss_coeff: {class: 1, bbox: 5, giou: 2, no_object: 0.1}
|
||||
aux_loss: True
|
||||
|
||||
|
||||
HungarianMatcher:
|
||||
matcher_coeff: {class: 1, bbox: 5, giou: 2}
|
||||
@@ -0,0 +1,44 @@
|
||||
worker_num: 0
|
||||
TrainReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- RandomFlip: {prob: 0.5}
|
||||
- RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
|
||||
transforms2: [
|
||||
RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
|
||||
RandomSizeCrop: { min_size: 384, max_size: 600 },
|
||||
RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
|
||||
}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- NormalizeBox: {}
|
||||
- BboxXYXY2XYWH: {}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
|
||||
batch_size: 2
|
||||
shuffle: true
|
||||
drop_last: true
|
||||
collate_batch: false
|
||||
use_shared_memory: false
|
||||
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
@@ -0,0 +1,16 @@
|
||||
epoch: 500
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.0001
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [400]
|
||||
use_warmup: false
|
||||
|
||||
OptimizerBuilder:
|
||||
clip_grad_by_norm: 0.1
|
||||
regularizer: false
|
||||
optimizer:
|
||||
type: AdamW
|
||||
weight_decay: 0.0001
|
||||
@@ -0,0 +1,9 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_1x.yml',
|
||||
'_base_/detr_r50.yml',
|
||||
'_base_/detr_reader.yml',
|
||||
]
|
||||
weights: output/detr_r50_1x_coco/model_final
|
||||
find_unused_parameters: True
|
||||
@@ -0,0 +1,39 @@
|
||||
# DINO: DETR with Improved DeNoising Anchor Boxes for End-to-End Object Detection
|
||||
|
||||
## Introduction
|
||||
|
||||
|
||||
[DINO](https://arxiv.org/abs/2203.03605) is an object detection model based on DETR. We reproduced the model of the paper.
|
||||
|
||||
|
||||
## Model Zoo
|
||||
|
||||
| Backbone | Model | Epochs | Box AP | Config | Log | Download |
|
||||
|:------:|:---------------:|:------:|:------:|:---------------------------------------:|:-------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------:|
|
||||
| R-50 | dino_r50_4scale | 12 | 49.5 | [config](./dino_r50_4scale_1x_coco.yml) | [log](https://bj.bcebos.com/v1/paddledet/logs/dino_r50_4scale_1x_coco_49.5.log) | [model](https://paddledet.bj.bcebos.com/models/dino_r50_4scale_1x_coco.pdparams) |
|
||||
| R-50 | dino_r50_4scale | 24 | 50.8 | [config](./dino_r50_4scale_2x_coco.yml) | [log](https://bj.bcebos.com/v1/paddledet/logs/dino_r50_4scale_2x_coco_50.8.log) | [model](https://paddledet.bj.bcebos.com/models/dino_r50_4scale_2x_coco.pdparams) |
|
||||
|
||||
**Notes:**
|
||||
|
||||
- DINO is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
|
||||
- DINO uses 4GPU to train.
|
||||
|
||||
GPU multi-card training
|
||||
```bash
|
||||
python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/dino/dino_r50_4scale_1x_coco.yml --fleet --eval
|
||||
```
|
||||
|
||||
## Custom Operator
|
||||
- Multi-scale deformable attention custom operator see [here](../../ppdet/modeling/transformers/ext_op).
|
||||
|
||||
## Citations
|
||||
```
|
||||
@misc{zhang2022dino,
|
||||
title={DINO: DETR with Improved DeNoising Anchor Boxes for End-to-End Object Detection},
|
||||
author={Hao Zhang and Feng Li and Shilong Liu and Lei Zhang and Hang Su and Jun Zhu and Lionel M. Ni and Heung-Yeung Shum},
|
||||
year={2022},
|
||||
eprint={2203.03605},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CV}
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,45 @@
|
||||
architecture: DETR
|
||||
# pretrain_weights: # rewrite in FocalNet.pretrained in ppdet/modeling/backbones/focalnet.py
|
||||
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_large_lrf_384_fl4_pretrained.pdparams
|
||||
hidden_dim: 256
|
||||
use_focal_loss: True
|
||||
|
||||
DETR:
|
||||
backbone: FocalNet
|
||||
transformer: DINOTransformer
|
||||
detr_head: DINOHead
|
||||
post_process: DETRPostProcess
|
||||
|
||||
FocalNet:
|
||||
arch: 'focalnet_L_384_22k_fl4'
|
||||
out_indices: [1, 2, 3]
|
||||
pretrained: https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_large_lrf_384_fl4_pretrained.pdparams
|
||||
|
||||
DINOTransformer:
|
||||
num_queries: 900
|
||||
position_embed_type: sine
|
||||
num_levels: 4
|
||||
nhead: 8
|
||||
num_encoder_layers: 6
|
||||
num_decoder_layers: 6
|
||||
dim_feedforward: 2048
|
||||
dropout: 0.0
|
||||
activation: relu
|
||||
pe_temperature: 20
|
||||
pe_offset: 0.0
|
||||
num_denoising: 100
|
||||
label_noise_ratio: 0.5
|
||||
box_noise_scale: 1.0
|
||||
learnt_init_query: True
|
||||
|
||||
DINOHead:
|
||||
loss:
|
||||
name: DINOLoss
|
||||
loss_coeff: {class: 1, bbox: 5, giou: 2}
|
||||
aux_loss: True
|
||||
matcher:
|
||||
name: HungarianMatcher
|
||||
matcher_coeff: {class: 2, bbox: 5, giou: 2}
|
||||
|
||||
DETRPostProcess:
|
||||
num_top_queries: 300
|
||||
@@ -0,0 +1,49 @@
|
||||
architecture: DETR
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
|
||||
hidden_dim: 256
|
||||
use_focal_loss: True
|
||||
|
||||
|
||||
DETR:
|
||||
backbone: ResNet
|
||||
transformer: DINOTransformer
|
||||
detr_head: DINOHead
|
||||
post_process: DETRPostProcess
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [1, 2, 3]
|
||||
lr_mult_list: [0.0, 0.1, 0.1, 0.1]
|
||||
num_stages: 4
|
||||
|
||||
DINOTransformer:
|
||||
num_queries: 900
|
||||
position_embed_type: sine
|
||||
num_levels: 4
|
||||
nhead: 8
|
||||
num_encoder_layers: 6
|
||||
num_decoder_layers: 6
|
||||
dim_feedforward: 2048
|
||||
dropout: 0.0
|
||||
activation: relu
|
||||
pe_temperature: 20
|
||||
pe_offset: 0.0
|
||||
num_denoising: 100
|
||||
label_noise_ratio: 0.5
|
||||
box_noise_scale: 1.0
|
||||
learnt_init_query: True
|
||||
|
||||
DINOHead:
|
||||
loss:
|
||||
name: DINOLoss
|
||||
loss_coeff: {class: 1, bbox: 5, giou: 2}
|
||||
aux_loss: True
|
||||
matcher:
|
||||
name: HungarianMatcher
|
||||
matcher_coeff: {class: 2, bbox: 5, giou: 2}
|
||||
|
||||
DETRPostProcess:
|
||||
num_top_queries: 300
|
||||
@@ -0,0 +1,40 @@
|
||||
worker_num: 4
|
||||
TrainReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- RandomFlip: {prob: 0.5}
|
||||
- RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
|
||||
transforms2: [
|
||||
RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
|
||||
RandomSizeCrop: { min_size: 384, max_size: 600 },
|
||||
RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
|
||||
}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
|
||||
- NormalizeBox: {}
|
||||
- BboxXYXY2XYWH: {}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
|
||||
batch_size: 4
|
||||
shuffle: true
|
||||
drop_last: true
|
||||
collate_batch: false
|
||||
use_shared_memory: false
|
||||
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
@@ -0,0 +1,46 @@
|
||||
architecture: DETR
|
||||
# pretrain_weights: # rewrite in SwinTransformer.pretrained in ppdet/modeling/backbones/swin_transformer.py
|
||||
hidden_dim: 256
|
||||
use_focal_loss: True
|
||||
|
||||
DETR:
|
||||
backbone: SwinTransformer
|
||||
transformer: DINOTransformer
|
||||
detr_head: DINOHead
|
||||
post_process: DETRPostProcess
|
||||
|
||||
SwinTransformer:
|
||||
arch: 'swin_L_384' # ['swin_T_224', 'swin_S_224', 'swin_B_224', 'swin_L_224', 'swin_B_384', 'swin_L_384']
|
||||
ape: false
|
||||
drop_path_rate: 0.2
|
||||
patch_norm: true
|
||||
out_indices: [1, 2, 3]
|
||||
|
||||
DINOTransformer:
|
||||
num_queries: 900
|
||||
position_embed_type: sine
|
||||
num_levels: 4
|
||||
nhead: 8
|
||||
num_encoder_layers: 6
|
||||
num_decoder_layers: 6
|
||||
dim_feedforward: 2048
|
||||
dropout: 0.0
|
||||
activation: relu
|
||||
pe_temperature: 10000
|
||||
pe_offset: -0.5
|
||||
num_denoising: 100
|
||||
label_noise_ratio: 0.5
|
||||
box_noise_scale: 1.0
|
||||
learnt_init_query: True
|
||||
|
||||
DINOHead:
|
||||
loss:
|
||||
name: DINOLoss
|
||||
loss_coeff: {class: 1, bbox: 5, giou: 2}
|
||||
aux_loss: True
|
||||
matcher:
|
||||
name: HungarianMatcher
|
||||
matcher_coeff: {class: 2, bbox: 5, giou: 2}
|
||||
|
||||
DETRPostProcess:
|
||||
num_top_queries: 300
|
||||
@@ -0,0 +1,16 @@
|
||||
epoch: 12
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.0001
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [11]
|
||||
use_warmup: false
|
||||
|
||||
OptimizerBuilder:
|
||||
clip_grad_by_norm: 0.1
|
||||
regularizer: false
|
||||
optimizer:
|
||||
type: AdamW
|
||||
weight_decay: 0.0001
|
||||
@@ -0,0 +1,16 @@
|
||||
epoch: 24
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.0001
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [20]
|
||||
use_warmup: false
|
||||
|
||||
OptimizerBuilder:
|
||||
clip_grad_by_norm: 0.1
|
||||
regularizer: false
|
||||
optimizer:
|
||||
type: AdamW
|
||||
weight_decay: 0.0001
|
||||
@@ -0,0 +1,16 @@
|
||||
epoch: 36
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.0001
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [33]
|
||||
use_warmup: false
|
||||
|
||||
OptimizerBuilder:
|
||||
clip_grad_by_norm: 0.1
|
||||
regularizer: false
|
||||
optimizer:
|
||||
type: AdamW
|
||||
weight_decay: 0.0001
|
||||
@@ -0,0 +1,11 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_1x.yml',
|
||||
'_base_/dino_r50.yml',
|
||||
'_base_/dino_reader.yml',
|
||||
]
|
||||
|
||||
weights: output/dino_r50_4scale_1x_coco/model_final
|
||||
find_unused_parameters: True
|
||||
log_iter: 100
|
||||
@@ -0,0 +1,11 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_detection.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_2x.yml',
|
||||
'_base_/dino_r50.yml',
|
||||
'_base_/dino_reader.yml',
|
||||
]
|
||||
|
||||
weights: output/dino_r50_4scale_2x_coco/model_final
|
||||
find_unused_parameters: True
|
||||
log_iter: 100
|
||||
@@ -0,0 +1,176 @@
|
||||
# 人脸检测模型
|
||||
|
||||
## 简介
|
||||
`face_detection`中提供高效、高速的人脸检测解决方案,包括最先进的模型和经典模型。
|
||||
|
||||

|
||||
|
||||
## 模型库
|
||||
|
||||
#### WIDER-FACE数据集上的mAP
|
||||
|
||||
| 网络结构 | 输入尺寸 | 图片个数/GPU | 学习率策略 | Easy/Medium/Hard Set | 预测时延(SD855)| 模型大小(MB) | 下载 | 配置文件 |
|
||||
|:------------:|:--------:|:----:|:-------:|:-------:|:---------:|:----------:|:---------:|:--------:|
|
||||
| BlazeFace | 640 | 8 | 1000e | 0.885 / 0.855 / 0.731 | - | 0.472 |[下载链接](https://paddledet.bj.bcebos.com/models/blazeface_1000e.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/face_detection/blazeface_1000e.yml) |
|
||||
| BlazeFace-FPN-SSH | 640 | 8 | 1000e | 0.907 / 0.883 / 0.793 | - | 0.479 |[下载链接](https://paddledet.bj.bcebos.com/models/blazeface_fpn_ssh_1000e.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/face_detection/blazeface_fpn_ssh_1000e.yml) |
|
||||
|
||||
**注意:**
|
||||
- 我们使用多尺度评估策略得到`Easy/Medium/Hard Set`里的mAP。具体细节请参考[在WIDER-FACE数据集上评估](#在WIDER-FACE数据集上评估)。
|
||||
|
||||
## 快速开始
|
||||
|
||||
### 数据准备
|
||||
我们使用[WIDER-FACE数据集](http://shuoyang1213.me/WIDERFACE/)进行训练和模型测试,官方网站提供了详细的数据介绍。
|
||||
- WIDER-Face数据源:
|
||||
使用如下目录结构加载`wider_face`类型的数据集:
|
||||
|
||||
```
|
||||
dataset/wider_face/
|
||||
├── wider_face_split
|
||||
│ ├── wider_face_train_bbx_gt.txt
|
||||
│ ├── wider_face_val_bbx_gt.txt
|
||||
├── WIDER_train
|
||||
│ ├── images
|
||||
│ │ ├── 0--Parade
|
||||
│ │ │ ├── 0_Parade_marchingband_1_100.jpg
|
||||
│ │ │ ├── 0_Parade_marchingband_1_381.jpg
|
||||
│ │ │ │ ...
|
||||
│ │ ├── 10--People_Marching
|
||||
│ │ │ ...
|
||||
├── WIDER_val
|
||||
│ ├── images
|
||||
│ │ ├── 0--Parade
|
||||
│ │ │ ├── 0_Parade_marchingband_1_1004.jpg
|
||||
│ │ │ ├── 0_Parade_marchingband_1_1045.jpg
|
||||
│ │ │ │ ...
|
||||
│ │ ├── 10--People_Marching
|
||||
│ │ │ ...
|
||||
```
|
||||
|
||||
- 手动下载数据集:
|
||||
要下载WIDER-FACE数据集,请运行以下命令:
|
||||
```
|
||||
cd dataset/wider_face && ./download_wider_face.sh
|
||||
```
|
||||
|
||||
### 参数配置
|
||||
基础模型的配置可以参考`configs/face_detection/_base_/blazeface.yml`;
|
||||
改进模型增加FPN和SSH的neck结构,配置文件可以参考`configs/face_detection/_base_/blazeface_fpn.yml`,可以根据需求配置FPN和SSH,具体如下:
|
||||
```yaml
|
||||
BlazeNet:
|
||||
blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
|
||||
double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
|
||||
[96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
|
||||
act: hard_swish #配置backbone中BlazeBlock的激活函数,基础模型为relu,增加FPN和SSH时需使用hard_swish
|
||||
|
||||
BlazeNeck:
|
||||
neck_type : fpn_ssh #可选only_fpn、only_ssh和fpn_ssh
|
||||
in_channel: [96,96]
|
||||
```
|
||||
|
||||
|
||||
|
||||
### 训练与评估
|
||||
训练流程与评估流程方法与其他算法一致,请参考[GETTING_STARTED_cn.md](../../docs/tutorials/GETTING_STARTED_cn.md)。
|
||||
**注意:** 人脸检测模型目前不支持边训练边评估。
|
||||
|
||||
#### 在WIDER-FACE数据集上评估
|
||||
- 步骤一:评估并生成结果文件:
|
||||
```shell
|
||||
python -u tools/eval.py -c configs/face_detection/blazeface_1000e.yml \
|
||||
-o weights=output/blazeface_1000e/model_final \
|
||||
multi_scale=True
|
||||
```
|
||||
设置`multi_scale=True`进行多尺度评估,评估完成后,将在`output/pred`中生成txt格式的测试结果。
|
||||
|
||||
- 步骤二:下载官方评估脚本和Ground Truth文件:
|
||||
```
|
||||
wget http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/eval_script/eval_tools.zip
|
||||
unzip eval_tools.zip && rm -f eval_tools.zip
|
||||
```
|
||||
|
||||
- 步骤三:开始评估
|
||||
|
||||
方法一:python评估:
|
||||
```
|
||||
git clone https://github.com/wondervictor/WiderFace-Evaluation.git
|
||||
cd WiderFace-Evaluation
|
||||
# 编译
|
||||
python3 setup.py build_ext --inplace
|
||||
# 开始评估
|
||||
python3 evaluation.py -p /path/to/PaddleDetection/output/pred -g /path/to/eval_tools/ground_truth
|
||||
```
|
||||
|
||||
方法二:MatLab评估:
|
||||
```
|
||||
# 在`eval_tools/wider_eval.m`中修改保存结果路径和绘制曲线的名称:
|
||||
pred_dir = './pred';
|
||||
legend_name = 'Paddle-BlazeFace';
|
||||
|
||||
`wider_eval.m` 是评估模块的主要执行程序。运行命令如下:
|
||||
matlab -nodesktop -nosplash -nojvm -r "run wider_eval.m;quit;"
|
||||
```
|
||||
|
||||
### Python脚本预测
|
||||
为了支持二次开发,这里提供通过Python脚本使用Paddle Detection whl包来进行预测的示例。
|
||||
```python
|
||||
import cv2
|
||||
import paddle
|
||||
import numpy as np
|
||||
from ppdet.core.workspace import load_config
|
||||
from ppdet.engine import Trainer
|
||||
from ppdet.metrics import get_infer_results
|
||||
from ppdet.data.transform.operators import NormalizeImage, Permute
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 准备基础的参数
|
||||
config_path = 'PaddleDetection/configs/face_detection/blazeface_1000e.yml'
|
||||
cfg = load_config(config_path)
|
||||
weight_path = 'PaddleDetection/output/blazeface_1000e.pdparams'
|
||||
infer_img_path = 'PaddleDetection/demo/hrnet_demo.jpg'
|
||||
cfg.weights = weight_path
|
||||
bbox_thre = 0.8
|
||||
paddle.set_device('gpu')
|
||||
# 创建所需的类
|
||||
trainer = Trainer(cfg, mode='test')
|
||||
trainer.load_weights(cfg.weights)
|
||||
trainer.model.eval()
|
||||
normaler = NormalizeImage(mean=[123, 117, 104], std=[127.502231, 127.502231, 127.502231], is_scale=False)
|
||||
permuter = Permute()
|
||||
# 进行图片读取
|
||||
im = cv2.imread(infer_img_path)
|
||||
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
|
||||
# 准备数据字典
|
||||
data_dict = {'image': im}
|
||||
data_dict = normaler(data_dict)
|
||||
data_dict = permuter(data_dict)
|
||||
h, w, c = im.shape
|
||||
data_dict['im_id'] = paddle.Tensor(np.array([[0]]))
|
||||
data_dict['im_shape'] = paddle.Tensor(np.array([[h, w]], dtype=np.float32))
|
||||
data_dict['scale_factor'] = paddle.Tensor(np.array([[1., 1.]], dtype=np.float32))
|
||||
data_dict['image'] = paddle.Tensor(data_dict['image'].reshape((1, c, h, w)))
|
||||
data_dict['curr_iter'] = paddle.Tensor(np.array([0]))
|
||||
# 进行预测
|
||||
outs = trainer.model(data_dict)
|
||||
# 对预测的数据进行后处理得到最终的bbox信息
|
||||
for key in ['im_shape', 'scale_factor', 'im_id']:
|
||||
outs[key] = data_dict[key]
|
||||
for key, value in outs.items():
|
||||
outs[key] = value.numpy()
|
||||
clsid2catid, catid2name = {0: 'face'}, {0: 0}
|
||||
batch_res = get_infer_results(outs, clsid2catid)
|
||||
bbox = [sub_dict for sub_dict in batch_res['bbox'] if sub_dict['score'] > bbox_thre]
|
||||
print(bbox)
|
||||
```
|
||||
|
||||
## Citations
|
||||
|
||||
```
|
||||
@article{bazarevsky2019blazeface,
|
||||
title={BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs},
|
||||
author={Valentin Bazarevsky and Yury Kartynnik and Andrey Vakunov and Karthik Raveendran and Matthias Grundmann},
|
||||
year={2019},
|
||||
eprint={1907.05047},
|
||||
archivePrefix={arXiv},
|
||||
```
|
||||
@@ -0,0 +1,176 @@
|
||||
# Face Detection Model
|
||||
|
||||
## Introduction
|
||||
`face_detection` High efficiency, high speed face detection solutions, including the most advanced models and classic models.
|
||||
|
||||

|
||||
|
||||
## Model Library
|
||||
|
||||
#### A mAP on the WIDERFACE dataset
|
||||
|
||||
| Network structure | size | images/GPUs | Learning rate strategy | Easy/Medium/Hard Set | Prediction delay(SD855)| Model size(MB) | Download | Configuration File |
|
||||
|:------------:|:--------:|:----:|:-------:|:-------:|:---------:|:----------:|:---------:|:--------:|
|
||||
| BlazeFace | 640 | 8 | 1000e | 0.885 / 0.855 / 0.731 | - | 0.472 |[link](https://paddledet.bj.bcebos.com/models/blazeface_1000e.pdparams) | [Configuration File](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/face_detection/blazeface_1000e.yml) |
|
||||
| BlazeFace-FPN-SSH | 640 | 8 | 1000e | 0.907 / 0.883 / 0.793 | - | 0.479 |[link](https://paddledet.bj.bcebos.com/models/blazeface_fpn_ssh_1000e.pdparams) | [Configuration File](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/face_detection/blazeface_fpn_ssh_1000e.yml) |
|
||||
|
||||
**Attention:**
|
||||
- We use a multi-scale evaluation strategy to get the mAP in `Easy/Medium/Hard Set`. Please refer to the [evaluation on the WIDER FACE dataset](#Evaluated-on-the-WIDER-FACE-Dataset) for details.
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Data preparation
|
||||
We use [WIDER-FACE dataset](http://shuoyang1213.me/WIDERFACE/) for training and model tests, the official web site provides detailed data is introduced.
|
||||
- WIDER-Face data source:
|
||||
- Load a dataset of type `wider_face` using the following directory structure:
|
||||
```
|
||||
dataset/wider_face/
|
||||
├── wider_face_split
|
||||
│ ├── wider_face_train_bbx_gt.txt
|
||||
│ ├── wider_face_val_bbx_gt.txt
|
||||
├── WIDER_train
|
||||
│ ├── images
|
||||
│ │ ├── 0--Parade
|
||||
│ │ │ ├── 0_Parade_marchingband_1_100.jpg
|
||||
│ │ │ ├── 0_Parade_marchingband_1_381.jpg
|
||||
│ │ │ │ ...
|
||||
│ │ ├── 10--People_Marching
|
||||
│ │ │ ...
|
||||
├── WIDER_val
|
||||
│ ├── images
|
||||
│ │ ├── 0--Parade
|
||||
│ │ │ ├── 0_Parade_marchingband_1_1004.jpg
|
||||
│ │ │ ├── 0_Parade_marchingband_1_1045.jpg
|
||||
│ │ │ │ ...
|
||||
│ │ ├── 10--People_Marching
|
||||
│ │ │ ...
|
||||
```
|
||||
|
||||
- Manually download the dataset:
|
||||
To download the WIDER-FACE dataset, run the following command:
|
||||
```
|
||||
cd dataset/wider_face && ./download_wider_face.sh
|
||||
```
|
||||
|
||||
### Parameter configuration
|
||||
The configuration of the base model can be referenced to `configs/face_detection/_base_/blazeface.yml`;
|
||||
Improved model to add FPN and SSH neck structure, configuration files can be referenced to `configs/face_detection/_base_/blazeface_fpn.yml`, You can configure FPN and SSH as required
|
||||
```yaml
|
||||
BlazeNet:
|
||||
blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
|
||||
double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
|
||||
[96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
|
||||
act: hard_swish #Configure Blaze Block activation function in Backbone. The basic model is Relu. hard_swish is needed to add FPN and SSH
|
||||
|
||||
BlazeNeck:
|
||||
neck_type : fpn_ssh #only_fpn, only_ssh and fpn_ssh
|
||||
in_channel: [96,96]
|
||||
```
|
||||
|
||||
|
||||
|
||||
### Training and Evaluation
|
||||
The training process and evaluation process methods are consistent with other algorithms, please refer to [GETTING_STARTED_cn.md](../../docs/tutorials/GETTING_STARTED_cn.md)。
|
||||
**Attention:** Face detection models currently do not support training and evaluation.
|
||||
|
||||
#### Evaluated on the WIDER-FACE Dataset
|
||||
- Step 1: Evaluate and generate a result file:
|
||||
```shell
|
||||
python -u tools/eval.py -c configs/face_detection/blazeface_1000e.yml \
|
||||
-o weights=output/blazeface_1000e/model_final \
|
||||
multi_scale=True
|
||||
```
|
||||
Set `multi_scale=True` for multi-scale evaluation. After evaluation, test results in TXT format will be generated in `output/pred`.
|
||||
|
||||
- Step 2: Download the official evaluation script and Ground Truth file:
|
||||
```
|
||||
wget http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/eval_script/eval_tools.zip
|
||||
unzip eval_tools.zip && rm -f eval_tools.zip
|
||||
```
|
||||
|
||||
- Step 3: Start the evaluation
|
||||
|
||||
Method 1: Python evaluation:
|
||||
```
|
||||
git clone https://github.com/wondervictor/WiderFace-Evaluation.git
|
||||
cd WiderFace-Evaluation
|
||||
# compile
|
||||
python3 setup.py build_ext --inplace
|
||||
# Begin to assess
|
||||
python3 evaluation.py -p /path/to/PaddleDetection/output/pred -g /path/to/eval_tools/ground_truth
|
||||
```
|
||||
|
||||
Method 2: MatLab evaluation:
|
||||
```
|
||||
# Change the name of save result path and draw curve in `eval_tools/wider_eval.m`:
|
||||
pred_dir = './pred';
|
||||
legend_name = 'Paddle-BlazeFace';
|
||||
|
||||
`wider_eval.m` is the main implementation of the evaluation module. Run the following command:
|
||||
matlab -nodesktop -nosplash -nojvm -r "run wider_eval.m;quit;"
|
||||
```
|
||||
|
||||
### Use by Python Code
|
||||
In order to support development, here is an example of using the Paddle Detection whl package to make predictions through Python code.
|
||||
```python
|
||||
import cv2
|
||||
import paddle
|
||||
import numpy as np
|
||||
from ppdet.core.workspace import load_config
|
||||
from ppdet.engine import Trainer
|
||||
from ppdet.metrics import get_infer_results
|
||||
from ppdet.data.transform.operators import NormalizeImage, Permute
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# prepare for the parameters
|
||||
config_path = 'PaddleDetection/configs/face_detection/blazeface_1000e.yml'
|
||||
cfg = load_config(config_path)
|
||||
weight_path = 'PaddleDetection/output/blazeface_1000e.pdparams'
|
||||
infer_img_path = 'PaddleDetection/demo/hrnet_demo.jpg'
|
||||
cfg.weights = weight_path
|
||||
bbox_thre = 0.8
|
||||
paddle.set_device('gpu')
|
||||
# create the class object
|
||||
trainer = Trainer(cfg, mode='test')
|
||||
trainer.load_weights(cfg.weights)
|
||||
trainer.model.eval()
|
||||
normaler = NormalizeImage(mean=[123, 117, 104], std=[127.502231, 127.502231, 127.502231], is_scale=False)
|
||||
permuter = Permute()
|
||||
# read the image file
|
||||
im = cv2.imread(infer_img_path)
|
||||
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
|
||||
# prepare for the data dict
|
||||
data_dict = {'image': im}
|
||||
data_dict = normaler(data_dict)
|
||||
data_dict = permuter(data_dict)
|
||||
h, w, c = im.shape
|
||||
data_dict['im_id'] = paddle.Tensor(np.array([[0]]))
|
||||
data_dict['im_shape'] = paddle.Tensor(np.array([[h, w]], dtype=np.float32))
|
||||
data_dict['scale_factor'] = paddle.Tensor(np.array([[1., 1.]], dtype=np.float32))
|
||||
data_dict['image'] = paddle.Tensor(data_dict['image'].reshape((1, c, h, w)))
|
||||
data_dict['curr_iter'] = paddle.Tensor(np.array([0]))
|
||||
# do the prediction
|
||||
outs = trainer.model(data_dict)
|
||||
# to do the postprocess to get the final bbox info
|
||||
for key in ['im_shape', 'scale_factor', 'im_id']:
|
||||
outs[key] = data_dict[key]
|
||||
for key, value in outs.items():
|
||||
outs[key] = value.numpy()
|
||||
clsid2catid, catid2name = {0: 'face'}, {0: 0}
|
||||
batch_res = get_infer_results(outs, clsid2catid)
|
||||
bbox = [sub_dict for sub_dict in batch_res['bbox'] if sub_dict['score'] > bbox_thre]
|
||||
print(bbox)
|
||||
```
|
||||
|
||||
|
||||
## Citations
|
||||
|
||||
```
|
||||
@article{bazarevsky2019blazeface,
|
||||
title={BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs},
|
||||
author={Valentin Bazarevsky and Yury Kartynnik and Andrey Vakunov and Karthik Raveendran and Matthias Grundmann},
|
||||
year={2019},
|
||||
eprint={1907.05047},
|
||||
archivePrefix={arXiv},
|
||||
```
|
||||
@@ -0,0 +1,45 @@
|
||||
architecture: BlazeFace
|
||||
|
||||
BlazeFace:
|
||||
backbone: BlazeNet
|
||||
neck: BlazeNeck
|
||||
blaze_head: FaceHead
|
||||
post_process: BBoxPostProcess
|
||||
|
||||
BlazeNet:
|
||||
blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
|
||||
double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
|
||||
[96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
|
||||
act: relu
|
||||
|
||||
BlazeNeck:
|
||||
neck_type : None
|
||||
in_channel: [96,96]
|
||||
|
||||
FaceHead:
|
||||
in_channels: [96,96]
|
||||
anchor_generator: AnchorGeneratorSSD
|
||||
loss: SSDLoss
|
||||
|
||||
SSDLoss:
|
||||
overlap_threshold: 0.35
|
||||
|
||||
AnchorGeneratorSSD:
|
||||
steps: [8., 16.]
|
||||
aspect_ratios: [[1.], [1.]]
|
||||
min_sizes: [[16.,24.], [32., 48., 64., 80., 96., 128.]]
|
||||
max_sizes: [[], []]
|
||||
offset: 0.5
|
||||
flip: False
|
||||
min_max_aspect_ratios_order: false
|
||||
|
||||
BBoxPostProcess:
|
||||
decode:
|
||||
name: SSDBox
|
||||
nms:
|
||||
name: MultiClassNMS
|
||||
keep_top_k: 750
|
||||
score_threshold: 0.01
|
||||
nms_threshold: 0.3
|
||||
nms_top_k: 5000
|
||||
nms_eta: 1.0
|
||||
@@ -0,0 +1,45 @@
|
||||
architecture: BlazeFace
|
||||
|
||||
BlazeFace:
|
||||
backbone: BlazeNet
|
||||
neck: BlazeNeck
|
||||
blaze_head: FaceHead
|
||||
post_process: BBoxPostProcess
|
||||
|
||||
BlazeNet:
|
||||
blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
|
||||
double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
|
||||
[96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
|
||||
act: hard_swish
|
||||
|
||||
BlazeNeck:
|
||||
neck_type : fpn_ssh
|
||||
in_channel: [96,96]
|
||||
|
||||
FaceHead:
|
||||
in_channels: [48, 48]
|
||||
anchor_generator: AnchorGeneratorSSD
|
||||
loss: SSDLoss
|
||||
|
||||
SSDLoss:
|
||||
overlap_threshold: 0.35
|
||||
|
||||
AnchorGeneratorSSD:
|
||||
steps: [8., 16.]
|
||||
aspect_ratios: [[1.], [1.]]
|
||||
min_sizes: [[16.,24.], [32., 48., 64., 80., 96., 128.]]
|
||||
max_sizes: [[], []]
|
||||
offset: 0.5
|
||||
flip: False
|
||||
min_max_aspect_ratios_order: false
|
||||
|
||||
BBoxPostProcess:
|
||||
decode:
|
||||
name: SSDBox
|
||||
nms:
|
||||
name: MultiClassNMS
|
||||
keep_top_k: 750
|
||||
score_threshold: 0.01
|
||||
nms_threshold: 0.3
|
||||
nms_top_k: 5000
|
||||
nms_eta: 1.0
|
||||
@@ -0,0 +1,44 @@
|
||||
worker_num: 2
|
||||
TrainReader:
|
||||
inputs_def:
|
||||
num_max_boxes: 90
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
|
||||
- RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
|
||||
- RandomFlip: {}
|
||||
- CropWithDataAchorSampling: {
|
||||
anchor_sampler: [[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]],
|
||||
batch_sampler: [
|
||||
[1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
|
||||
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
|
||||
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
|
||||
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
|
||||
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
|
||||
],
|
||||
target_size: 640}
|
||||
- Resize: {target_size: [640, 640], keep_ratio: False, interp: 1}
|
||||
- NormalizeBox: {}
|
||||
- PadBox: {num_max_boxes: 90}
|
||||
batch_transforms:
|
||||
- NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
|
||||
- Permute: {}
|
||||
batch_size: 8
|
||||
shuffle: true
|
||||
drop_last: true
|
||||
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
@@ -0,0 +1,21 @@
|
||||
epoch: 1000
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.001
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones:
|
||||
- 333
|
||||
- 800
|
||||
- !LinearWarmup
|
||||
start_factor: 0.3333333333333333
|
||||
steps: 500
|
||||
|
||||
OptimizerBuilder:
|
||||
optimizer:
|
||||
momentum: 0.0
|
||||
type: RMSProp
|
||||
regularizer:
|
||||
factor: 0.0005
|
||||
type: L2
|
||||
@@ -0,0 +1,9 @@
|
||||
_BASE_: [
|
||||
'../datasets/wider_face.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_1000e.yml',
|
||||
'_base_/blazeface.yml',
|
||||
'_base_/face_reader.yml',
|
||||
]
|
||||
weights: output/blazeface_1000e/model_final
|
||||
multi_scale_eval: True
|
||||
@@ -0,0 +1,9 @@
|
||||
_BASE_: [
|
||||
'../datasets/wider_face.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_1000e.yml',
|
||||
'_base_/blazeface_fpn.yml',
|
||||
'_base_/face_reader.yml',
|
||||
]
|
||||
weights: output/blazeface_fpn_ssh_1000e/model_final
|
||||
multi_scale_eval: True
|
||||
@@ -0,0 +1,38 @@
|
||||
# Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks
|
||||
|
||||
## Model Zoo
|
||||
|
||||
| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | 下载 | 配置文件 |
|
||||
| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
|
||||
| ResNet50 | Faster | 1 | 1x | ---- | 36.7 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_1x_coco.yml) |
|
||||
| ResNet50-vd | Faster | 1 | 1x | ---- | 37.6 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_1x_coco.yml) |
|
||||
| ResNet101 | Faster | 1 | 1x | ---- | 39.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_1x_coco.pdparams) | [配置文件](./faster_rcnn_r101_1x_coco.yml) |
|
||||
| ResNet34-FPN | Faster | 1 | 1x | ---- | 37.8 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r34_fpn_1x_coco.yml) |
|
||||
| ResNet34-FPN-MultiScaleTest | Faster | 1 | 1x | ---- | 38.2 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_fpn_multiscaletest_1x_coco.pdparams) | [配置文件](./faster_rcnn_r34_fpn_multiscaletest_1x_coco.yml) |
|
||||
| ResNet34-vd-FPN | Faster | 1 | 1x | ---- | 38.5 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_vd_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r34_vd_fpn_1x_coco.yml) |
|
||||
| ResNet50-FPN | Faster | 1 | 1x | ---- | 38.4 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_fpn_1x_coco.yml) |
|
||||
| ResNet50-FPN | Faster | 1 | 2x | ---- | 40.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_r50_fpn_2x_coco.yml) |
|
||||
| ResNet50-vd-FPN | Faster | 1 | 1x | ---- | 39.5 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_1x_coco.yml) |
|
||||
| ResNet50-vd-FPN | Faster | 1 | 2x | ---- | 40.8 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_2x_coco.yml) |
|
||||
| ResNet101-FPN | Faster | 1 | 2x | ---- | 41.4 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_r101_fpn_2x_coco.yml) |
|
||||
| ResNet101-vd-FPN | Faster | 1 | 1x | ---- | 42.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r101_vd_fpn_1x_coco.yml) |
|
||||
| ResNet101-vd-FPN | Faster | 1 | 2x | ---- | 43.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_vd_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_r101_vd_fpn_2x_coco.yml) |
|
||||
| ResNeXt101-vd-FPN | Faster | 1 | 1x | ---- | 43.4 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_x101_vd_64x4d_fpn_1x_coco.yml) |
|
||||
| ResNeXt101-vd-FPN | Faster | 1 | 2x | ---- | 44.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_x101_vd_64x4d_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_x101_vd_64x4d_fpn_2x_coco.yml) |
|
||||
| ResNet50-vd-SSLDv2-FPN | Faster | 1 | 1x | ---- | 41.4 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
|
||||
| ResNet50-vd-SSLDv2-FPN | Faster | 1 | 2x | ---- | 42.3 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
|
||||
| Swin-Tiny-FPN | Faster | 2 | 1x | ---- | 42.6 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_swin_tiny_fpn_1x_coco.yml) |
|
||||
| Swin-Tiny-FPN | Faster | 2 | 2x | ---- | 44.8 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_swin_tiny_fpn_2x_coco.yml) |
|
||||
| Swin-Tiny-FPN | Faster | 2 | 3x | ---- | 45.3 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_3x_coco.pdparams) | [配置文件](../swin/faster_rcnn_swin_tiny_fpn_3x_coco.yml) |
|
||||
|
||||
## Citations
|
||||
```
|
||||
@article{Ren_2017,
|
||||
title={Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks},
|
||||
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
|
||||
publisher={Institute of Electrical and Electronics Engineers (IEEE)},
|
||||
author={Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun, Jian},
|
||||
year={2017},
|
||||
month={Jun},
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,40 @@
|
||||
worker_num: 2
|
||||
TrainReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
|
||||
- RandomFlip: {prob: 0.5}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 1
|
||||
shuffle: true
|
||||
drop_last: true
|
||||
collate_batch: false
|
||||
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
@@ -0,0 +1,66 @@
|
||||
architecture: FasterRCNN
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
|
||||
|
||||
FasterRCNN:
|
||||
backbone: ResNet
|
||||
rpn_head: RPNHead
|
||||
bbox_head: BBoxHead
|
||||
# post process
|
||||
bbox_post_process: BBoxPostProcess
|
||||
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [2]
|
||||
num_stages: 3
|
||||
|
||||
RPNHead:
|
||||
anchor_generator:
|
||||
aspect_ratios: [0.5, 1.0, 2.0]
|
||||
anchor_sizes: [32, 64, 128, 256, 512]
|
||||
strides: [16]
|
||||
rpn_target_assign:
|
||||
batch_size_per_im: 256
|
||||
fg_fraction: 0.5
|
||||
negative_overlap: 0.3
|
||||
positive_overlap: 0.7
|
||||
use_random: True
|
||||
train_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 12000
|
||||
post_nms_top_n: 2000
|
||||
topk_after_collect: False
|
||||
test_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 6000
|
||||
post_nms_top_n: 1000
|
||||
|
||||
|
||||
BBoxHead:
|
||||
head: Res5Head
|
||||
roi_extractor:
|
||||
resolution: 14
|
||||
sampling_ratio: 0
|
||||
aligned: True
|
||||
bbox_assigner: BBoxAssigner
|
||||
with_pool: true
|
||||
|
||||
BBoxAssigner:
|
||||
batch_size_per_im: 512
|
||||
bg_thresh: 0.5
|
||||
fg_thresh: 0.5
|
||||
fg_fraction: 0.25
|
||||
use_random: True
|
||||
|
||||
BBoxPostProcess:
|
||||
decode: RCNNBox
|
||||
nms:
|
||||
name: MultiClassNMS
|
||||
keep_top_k: 100
|
||||
score_threshold: 0.05
|
||||
nms_threshold: 0.5
|
||||
@@ -0,0 +1,73 @@
|
||||
architecture: FasterRCNN
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
|
||||
|
||||
FasterRCNN:
|
||||
backbone: ResNet
|
||||
neck: FPN
|
||||
rpn_head: RPNHead
|
||||
bbox_head: BBoxHead
|
||||
# post process
|
||||
bbox_post_process: BBoxPostProcess
|
||||
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 50
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
|
||||
FPN:
|
||||
out_channel: 256
|
||||
|
||||
RPNHead:
|
||||
anchor_generator:
|
||||
aspect_ratios: [0.5, 1.0, 2.0]
|
||||
anchor_sizes: [[32], [64], [128], [256], [512]]
|
||||
strides: [4, 8, 16, 32, 64]
|
||||
rpn_target_assign:
|
||||
batch_size_per_im: 256
|
||||
fg_fraction: 0.5
|
||||
negative_overlap: 0.3
|
||||
positive_overlap: 0.7
|
||||
use_random: True
|
||||
train_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 2000
|
||||
post_nms_top_n: 1000
|
||||
topk_after_collect: True
|
||||
test_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 1000
|
||||
post_nms_top_n: 1000
|
||||
|
||||
|
||||
BBoxHead:
|
||||
head: TwoFCHead
|
||||
roi_extractor:
|
||||
resolution: 7
|
||||
sampling_ratio: 0
|
||||
aligned: True
|
||||
bbox_assigner: BBoxAssigner
|
||||
|
||||
BBoxAssigner:
|
||||
batch_size_per_im: 512
|
||||
bg_thresh: 0.5
|
||||
fg_thresh: 0.5
|
||||
fg_fraction: 0.25
|
||||
use_random: True
|
||||
|
||||
TwoFCHead:
|
||||
out_channel: 1024
|
||||
|
||||
|
||||
BBoxPostProcess:
|
||||
decode: RCNNBox
|
||||
nms:
|
||||
name: MultiClassNMS
|
||||
keep_top_k: 100
|
||||
score_threshold: 0.05
|
||||
nms_threshold: 0.5
|
||||
@@ -0,0 +1,41 @@
|
||||
worker_num: 2
|
||||
TrainReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- RandomResizeCrop: {resizes: [400, 500, 600], cropsizes: [[384, 600], ], prob: 0.5}
|
||||
- RandomResize: {target_size: [[480, 1333], [512, 1333], [544, 1333], [576, 1333], [608, 1333], [640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: True, interp: 2}
|
||||
- RandomFlip: {prob: 0.5}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 2
|
||||
shuffle: true
|
||||
drop_last: true
|
||||
collate_batch: false
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
|
||||
|
||||
TestReader:
|
||||
inputs_def:
|
||||
image_shape: [-1, 3, 640, 640]
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: 640, keep_ratio: True}
|
||||
- Pad: {size: 640}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
@@ -0,0 +1,70 @@
|
||||
architecture: FasterRCNN
|
||||
# pretrain_weights: # rewrite in SwinTransformer.pretrained in ppdet/modeling/backbones/swin_transformer.py
|
||||
|
||||
FasterRCNN:
|
||||
backbone: SwinTransformer
|
||||
neck: FPN
|
||||
rpn_head: RPNHead
|
||||
bbox_head: BBoxHead
|
||||
bbox_post_process: BBoxPostProcess
|
||||
|
||||
SwinTransformer:
|
||||
arch: 'swin_T_224'
|
||||
ape: false
|
||||
drop_path_rate: 0.1
|
||||
patch_norm: true
|
||||
out_indices: [0, 1, 2, 3]
|
||||
pretrained: https://paddledet.bj.bcebos.com/models/pretrained/swin_tiny_patch4_window7_224_22kto1k_pretrained.pdparams
|
||||
|
||||
FPN:
|
||||
out_channel: 256
|
||||
|
||||
RPNHead:
|
||||
anchor_generator:
|
||||
aspect_ratios: [0.5, 1.0, 2.0]
|
||||
anchor_sizes: [[32], [64], [128], [256], [512]]
|
||||
strides: [4, 8, 16, 32, 64]
|
||||
rpn_target_assign:
|
||||
batch_size_per_im: 256
|
||||
fg_fraction: 0.5
|
||||
negative_overlap: 0.3
|
||||
positive_overlap: 0.7
|
||||
use_random: True
|
||||
train_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 2000
|
||||
post_nms_top_n: 1000
|
||||
topk_after_collect: True
|
||||
test_proposal:
|
||||
min_size: 0.0
|
||||
nms_thresh: 0.7
|
||||
pre_nms_top_n: 1000
|
||||
post_nms_top_n: 1000
|
||||
|
||||
|
||||
BBoxHead:
|
||||
head: TwoFCHead
|
||||
roi_extractor:
|
||||
resolution: 7
|
||||
sampling_ratio: 0
|
||||
aligned: True
|
||||
bbox_assigner: BBoxAssigner
|
||||
|
||||
BBoxAssigner:
|
||||
batch_size_per_im: 512
|
||||
bg_thresh: 0.5
|
||||
fg_thresh: 0.5
|
||||
fg_fraction: 0.25
|
||||
use_random: True
|
||||
|
||||
TwoFCHead:
|
||||
out_channel: 1024
|
||||
|
||||
BBoxPostProcess:
|
||||
decode: RCNNBox
|
||||
nms:
|
||||
name: MultiClassNMS
|
||||
keep_top_k: 100
|
||||
score_threshold: 0.05
|
||||
nms_threshold: 0.5
|
||||
@@ -0,0 +1,40 @@
|
||||
worker_num: 2
|
||||
TrainReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
|
||||
- RandomFlip: {prob: 0.5}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: -1}
|
||||
batch_size: 1
|
||||
shuffle: true
|
||||
drop_last: true
|
||||
collate_batch: false
|
||||
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: -1}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: -1}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
@@ -0,0 +1,19 @@
|
||||
epoch: 12
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.01
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [8, 11]
|
||||
- !LinearWarmup
|
||||
start_factor: 0.1
|
||||
steps: 1000
|
||||
|
||||
OptimizerBuilder:
|
||||
optimizer:
|
||||
momentum: 0.9
|
||||
type: Momentum
|
||||
regularizer:
|
||||
factor: 0.0001
|
||||
type: L2
|
||||
@@ -0,0 +1,20 @@
|
||||
epoch: 12
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.0001
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [8, 11]
|
||||
- !LinearWarmup
|
||||
start_factor: 0.1
|
||||
steps: 1000
|
||||
|
||||
OptimizerBuilder:
|
||||
clip_grad_by_norm: 1.0
|
||||
optimizer:
|
||||
type: AdamW
|
||||
weight_decay: 0.05
|
||||
param_groups:
|
||||
- params: ['absolute_pos_embed', 'relative_position_bias_table', 'norm']
|
||||
weight_decay: 0.0
|
||||
@@ -0,0 +1,14 @@
|
||||
_BASE_: [
|
||||
'faster_rcnn_r50_1x_coco.yml',
|
||||
]
|
||||
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
|
||||
weights: output/faster_rcnn_r101_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 101
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [2]
|
||||
num_stages: 3
|
||||
@@ -0,0 +1,14 @@
|
||||
_BASE_: [
|
||||
'faster_rcnn_r50_fpn_1x_coco.yml',
|
||||
]
|
||||
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
|
||||
weights: output/faster_rcnn_r101_fpn_1x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 101
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
@@ -0,0 +1,25 @@
|
||||
_BASE_: [
|
||||
'faster_rcnn_r50_fpn_1x_coco.yml',
|
||||
]
|
||||
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
|
||||
weights: output/faster_rcnn_r101_fpn_2x_coco/model_final
|
||||
|
||||
ResNet:
|
||||
# index 0 stands for res2
|
||||
depth: 101
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [0,1,2,3]
|
||||
num_stages: 4
|
||||
|
||||
epoch: 24
|
||||
LearningRate:
|
||||
base_lr: 0.01
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [16, 22]
|
||||
- !LinearWarmup
|
||||
start_factor: 0.1
|
||||
steps: 1000
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user