移动paddle_detection

This commit is contained in:
2024-09-24 17:02:56 +08:00
parent 90a6d5ec75
commit 3438cf6e0e
2025 changed files with 11 additions and 11 deletions

View File

@@ -0,0 +1,28 @@
# Cascade R-CNN: High Quality Object Detection and Instance Segmentation
## Model Zoo
| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | Mask AP | 下载 | 配置文件 |
| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----: | :-----------------------------------------------------: | :-----: |
| ResNet50-FPN | Cascade Faster | 1 | 1x | ---- | 41.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.yml) |
| ResNet50-FPN | Cascade Mask | 1 | 1x | ---- | 41.8 | 36.3 | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.yml) |
| ResNet50-vd-SSLDv2-FPN | Cascade Faster | 1 | 1x | ---- | 44.4 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
| ResNet50-vd-SSLDv2-FPN | Cascade Faster | 1 | 2x | ---- | 45.0 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
| ResNet50-vd-SSLDv2-FPN | Cascade Mask | 1 | 1x | ---- | 44.9 | 39.1 | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
| ResNet50-vd-SSLDv2-FPN | Cascade Mask | 1 | 2x | ---- | 45.7 | 39.7 | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
## Citations
```
@article{Cai_2019,
title={Cascade R-CNN: High Quality Object Detection and Instance Segmentation},
ISSN={1939-3539},
url={http://dx.doi.org/10.1109/tpami.2019.2956516},
DOI={10.1109/tpami.2019.2956516},
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
publisher={Institute of Electrical and Electronics Engineers (IEEE)},
author={Cai, Zhaowei and Vasconcelos, Nuno},
year={2019},
pages={11}
}
```

View File

@@ -0,0 +1,40 @@
worker_num: 2
TrainReader:
sample_transforms:
- Decode: {}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: true
drop_last: true
collate_batch: false
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: false
drop_last: false
TestReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: false
drop_last: false

View File

@@ -0,0 +1,40 @@
worker_num: 2
TrainReader:
sample_transforms:
- Decode: {}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: true
drop_last: true
collate_batch: false
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: false
drop_last: false
TestReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: false
drop_last: false

View File

@@ -0,0 +1,97 @@
architecture: CascadeRCNN
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
CascadeRCNN:
backbone: ResNet
neck: FPN
rpn_head: RPNHead
bbox_head: CascadeHead
mask_head: MaskHead
# post process
bbox_post_process: BBoxPostProcess
mask_post_process: MaskPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
FPN:
out_channel: 256
RPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
anchor_sizes: [[32], [64], [128], [256], [512]]
strides: [4, 8, 16, 32, 64]
rpn_target_assign:
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
use_random: True
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 2000
topk_after_collect: True
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
CascadeHead:
head: CascadeTwoFCHead
roi_extractor:
resolution: 7
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
BBoxAssigner:
batch_size_per_im: 512
bg_thresh: 0.5
fg_thresh: 0.5
fg_fraction: 0.25
cascade_iou: [0.5, 0.6, 0.7]
use_random: True
CascadeTwoFCHead:
out_channel: 1024
BBoxPostProcess:
decode:
name: RCNNBox
prior_box_var: [30.0, 30.0, 15.0, 15.0]
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5
MaskHead:
head: MaskFeat
roi_extractor:
resolution: 14
sampling_ratio: 0
aligned: True
mask_assigner: MaskAssigner
share_bbox_feat: False
MaskFeat:
num_convs: 4
out_channel: 256
MaskAssigner:
mask_resolution: 28
MaskPostProcess:
binary_thresh: 0.5

View File

@@ -0,0 +1,75 @@
architecture: CascadeRCNN
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
CascadeRCNN:
backbone: ResNet
neck: FPN
rpn_head: RPNHead
bbox_head: CascadeHead
# post process
bbox_post_process: BBoxPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
FPN:
out_channel: 256
RPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
anchor_sizes: [[32], [64], [128], [256], [512]]
strides: [4, 8, 16, 32, 64]
rpn_target_assign:
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
use_random: True
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 2000
topk_after_collect: True
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
CascadeHead:
head: CascadeTwoFCHead
roi_extractor:
resolution: 7
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
BBoxAssigner:
batch_size_per_im: 512
bg_thresh: 0.5
fg_thresh: 0.5
fg_fraction: 0.25
cascade_iou: [0.5, 0.6, 0.7]
use_random: True
CascadeTwoFCHead:
out_channel: 1024
BBoxPostProcess:
decode:
name: RCNNBox
prior_box_var: [30.0, 30.0, 15.0, 15.0]
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5

View File

@@ -0,0 +1,19 @@
epoch: 12
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [8, 11]
- !LinearWarmup
start_factor: 0.001
steps: 1000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2

View File

@@ -0,0 +1,8 @@
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/cascade_mask_rcnn_r50_fpn.yml',
'_base_/cascade_mask_fpn_reader.yml',
]
weights: output/cascade_mask_rcnn_r50_fpn_1x_coco/model_final

View File

@@ -0,0 +1,18 @@
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/cascade_mask_rcnn_r50_fpn.yml',
'_base_/cascade_mask_fpn_reader.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
weights: output/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
ResNet:
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
lr_mult_list: [0.05, 0.05, 0.1, 0.15]

View File

@@ -0,0 +1,29 @@
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/cascade_mask_rcnn_r50_fpn.yml',
'_base_/cascade_mask_fpn_reader.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
weights: output/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
ResNet:
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
lr_mult_list: [0.05, 0.05, 0.1, 0.15]
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [12, 22]
- !LinearWarmup
start_factor: 0.1
steps: 1000

View File

@@ -0,0 +1,8 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/cascade_rcnn_r50_fpn.yml',
'_base_/cascade_fpn_reader.yml',
]
weights: output/cascade_rcnn_r50_fpn_1x_coco/model_final

View File

@@ -0,0 +1,18 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/cascade_rcnn_r50_fpn.yml',
'_base_/cascade_fpn_reader.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
weights: output/cascade_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
ResNet:
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
lr_mult_list: [0.05, 0.05, 0.1, 0.15]

View File

@@ -0,0 +1,29 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/cascade_rcnn_r50_fpn.yml',
'_base_/cascade_fpn_reader.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
weights: output/cascade_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
ResNet:
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
lr_mult_list: [0.05, 0.05, 0.1, 0.15]
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [12, 22]
- !LinearWarmup
start_factor: 0.1
steps: 1000

View File

@@ -0,0 +1,37 @@
English | [简体中文](README_cn.md)
# CenterNet (CenterNet: Objects as Points)
## Table of Contents
- [Introduction](#Introduction)
- [Model Zoo](#Model_Zoo)
- [Citations](#Citations)
## Introduction
[CenterNet](http://arxiv.org/abs/1904.07850) is an Anchor Free detector, which model an object as a single point -- the center point of its bounding box. The detector uses keypoint estimation to find center points and regresses to all other object properties. The center point based approach, CenterNet, is end-to-end differentiable, simpler, faster, and more accurate than corresponding bounding box based detectors.
## Model Zoo
### CenterNet Results on COCO-val 2017
| backbone | input shape | mAP | FPS | download | config |
| :--------------| :------- | :----: | :------: | :----: |:-----: |
| DLA-34(paper) | 512x512 | 37.4 | - | - | - |
| DLA-34 | 512x512 | 37.6 | - | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_dla34_140e_coco.pdparams) | [config](./centernet_dla34_140e_coco.yml) |
| ResNet50 + DLAUp | 512x512 | 38.9 | - | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_r50_140e_coco.pdparams) | [config](./centernet_r50_140e_coco.yml) |
| MobileNetV1 + DLAUp | 512x512 | 28.2 | - | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv1_140e_coco.pdparams) | [config](./centernet_mbv1_140e_coco.yml) |
| MobileNetV3_small + DLAUp | 512x512 | 17 | - | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv3_small_140e_coco.pdparams) | [config](./centernet_mbv3_small_140e_coco.yml) |
| MobileNetV3_large + DLAUp | 512x512 | 27.1 | - | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv3_large_140e_coco.pdparams) | [config](./centernet_mbv3_large_140e_coco.yml) |
| ShuffleNetV2 + DLAUp | 512x512 | 23.8 | - | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_shufflenetv2_140e_coco.pdparams) | [config](./centernet_shufflenetv2_140e_coco.yml) |
## Citations
```
@article{zhou2019objects,
title={Objects as points},
author={Zhou, Xingyi and Wang, Dequan and Kr{\"a}henb{\"u}hl, Philipp},
journal={arXiv preprint arXiv:1904.07850},
year={2019}
}
```

View File

@@ -0,0 +1,36 @@
简体中文 | [English](README.md)
# CenterNet (CenterNet: Objects as Points)
## 内容
- [简介](#简介)
- [模型库](#模型库)
- [引用](#引用)
## 内容
[CenterNet](http://arxiv.org/abs/1904.07850)是Anchor Free检测器将物体表示为一个目标框中心点。CenterNet使用关键点检测的方式定位中心点并回归物体的其他属性。CenterNet是以中心点为基础的检测方法是端到端可训练的并且相较于基于anchor的检测器更加检测高效。
## 模型库
### CenterNet在COCO-val 2017上结果
| 骨干网络 | 输入尺寸 | mAP | FPS | 下载链接 | 配置文件 |
| :--------------| :------- | :----: | :------: | :----: |:-----: |
| DLA-34(paper) | 512x512 | 37.4 | - | - | - |
| DLA-34 | 512x512 | 37.6 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_dla34_140e_coco.pdparams) | [配置文件](./centernet_dla34_140e_coco.yml) |
| ResNet50 + DLAUp | 512x512 | 38.9 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_r50_140e_coco.pdparams) | [配置文件](./centernet_r50_140e_coco.yml) |
| MobileNetV1 + DLAUp | 512x512 | 28.2 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv1_140e_coco.pdparams) | [配置文件](./centernet_mbv1_140e_coco.yml) |
| MobileNetV3_small + DLAUp | 512x512 | 17 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv3_small_140e_coco.pdparams) | [配置文件](./centernet_mbv3_small_140e_coco.yml) |
| MobileNetV3_large + DLAUp | 512x512 | 27.1 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv3_large_140e_coco.pdparams) | [配置文件](./centernet_mbv3_large_140e_coco.yml) |
| ShuffleNetV2 + DLAUp | 512x512 | 23.8 | - | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_shufflenetv2_140e_coco.pdparams) | [配置文件](./centernet_shufflenetv2_140e_coco.yml) |
## 引用
```
@article{zhou2019objects,
title={Objects as points},
author={Zhou, Xingyi and Wang, Dequan and Kr{\"a}henb{\"u}hl, Philipp},
journal={arXiv preprint arXiv:1904.07850},
year={2019}
}
```

View File

@@ -0,0 +1,22 @@
architecture: CenterNet
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/DLA34_pretrain.pdparams
CenterNet:
backbone: DLA
neck: CenterNetDLAFPN
head: CenterNetHead
post_process: CenterNetPostProcess
DLA:
depth: 34
CenterNetDLAFPN:
down_ratio: 4
CenterNetHead:
head_planes: 256
regress_ltrb: False
CenterNetPostProcess:
max_per_img: 100
regress_ltrb: False

View File

@@ -0,0 +1,34 @@
architecture: CenterNet
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
norm_type: sync_bn
use_ema: true
ema_decay: 0.9998
CenterNet:
backbone: ResNet
neck: CenterNetDLAFPN
head: CenterNetHead
post_process: CenterNetPostProcess
ResNet:
depth: 50
variant: d
return_idx: [0, 1, 2, 3]
freeze_at: -1
norm_decay: 0.
dcn_v2_stages: [3]
CenterNetDLAFPN:
first_level: 0
last_level: 4
down_ratio: 4
dcn_v2: False
CenterNetHead:
head_planes: 256
regress_ltrb: False
CenterNetPostProcess:
max_per_img: 100
regress_ltrb: False

View File

@@ -0,0 +1,35 @@
worker_num: 4
TrainReader:
inputs_def:
image_shape: [3, 512, 512]
sample_transforms:
- Decode: {}
- FlipWarpAffine: {keep_res: False, input_h: 512, input_w: 512, use_random: True}
- CenterRandColor: {}
- Lighting: {eigval: [0.2141788, 0.01817699, 0.00341571], eigvec: [[-0.58752847, -0.69563484, 0.41340352], [-0.5832747, 0.00994535, -0.81221408], [-0.56089297, 0.71832671, 0.41158938]]}
- NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834], is_scale: False}
- Permute: {}
- Gt2CenterNetTarget: {down_ratio: 4, max_objs: 128}
batch_size: 16
shuffle: True
drop_last: True
use_shared_memory: True
EvalReader:
sample_transforms:
- Decode: {}
- WarpAffine: {keep_res: True, input_h: 512, input_w: 512}
- NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834]}
- Permute: {}
batch_size: 1
TestReader:
inputs_def:
image_shape: [3, 512, 512]
sample_transforms:
- Decode: {}
- WarpAffine: {keep_res: True, input_h: 512, input_w: 512}
- NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834], is_scale: True}
- Permute: {}
batch_size: 1

View File

@@ -0,0 +1,14 @@
epoch: 140
LearningRate:
base_lr: 0.0005
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [90, 120]
use_warmup: False
OptimizerBuilder:
optimizer:
type: Adam
regularizer: NULL

View File

@@ -0,0 +1,9 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_140e.yml',
'_base_/centernet_dla34.yml',
'_base_/centernet_reader.yml',
]
weights: output/centernet_dla34_140e_coco/model_final

View File

@@ -0,0 +1,21 @@
_BASE_: [
'centernet_r50_140e_coco.yml'
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV1_pretrained.pdparams
weights: output/centernet_mbv1_140e_coco/model_final
CenterNet:
backbone: MobileNet
neck: CenterNetDLAFPN
head: CenterNetHead
post_process: CenterNetPostProcess
MobileNet:
scale: 1.
with_extra_blocks: false
extra_block_filters: []
feature_maps: [3, 5, 11, 13]
TrainReader:
batch_size: 32

View File

@@ -0,0 +1,22 @@
_BASE_: [
'centernet_r50_140e_coco.yml'
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams
weights: output/centernet_mbv3_large_140e_coco/model_final
CenterNet:
backbone: MobileNetV3
neck: CenterNetDLAFPN
head: CenterNetHead
post_process: CenterNetPostProcess
MobileNetV3:
model_name: large
scale: 1.
with_extra_blocks: false
extra_block_filters: []
feature_maps: [4, 7, 13, 16]
TrainReader:
batch_size: 32

View File

@@ -0,0 +1,28 @@
_BASE_: [
'centernet_r50_140e_coco.yml'
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_small_x1_0_ssld_pretrained.pdparams
weights: output/centernet_mbv3_small_140e_coco/model_final
CenterNet:
backbone: MobileNetV3
neck: CenterNetDLAFPN
head: CenterNetHead
post_process: CenterNetPostProcess
MobileNetV3:
model_name: small
scale: 1.
with_extra_blocks: false
extra_block_filters: []
feature_maps: [4, 9, 12]
CenterNetDLAFPN:
first_level: 0
last_level: 3
down_ratio: 8
dcn_v2: False
TrainReader:
batch_size: 32

View File

@@ -0,0 +1,9 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_140e.yml',
'_base_/centernet_r50.yml',
'_base_/centernet_reader.yml',
]
weights: output/centernet_r50_140e_coco/model_final

View File

@@ -0,0 +1,33 @@
_BASE_: [
'centernet_r50_140e_coco.yml'
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ShuffleNetV2_x1_0_pretrained.pdparams
weights: output/centernet_shufflenetv2_140e_coco/model_final
CenterNet:
backbone: ShuffleNetV2
neck: CenterNetDLAFPN
head: CenterNetHead
post_process: CenterNetPostProcess
ShuffleNetV2:
scale: 1.0
feature_maps: [5, 13, 17]
act: leaky_relu
CenterNetDLAFPN:
first_level: 0
last_level: 3
down_ratio: 8
dcn_v2: False
TrainReader:
batch_size: 32
TestReader:
sample_transforms:
- Decode: {}
- WarpAffine: {keep_res: False, input_h: 512, input_w: 512}
- NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834]}
- Permute: {}

View File

@@ -0,0 +1,68 @@
简体中文 | [English](README.md)
# CLRNet (CLRNet: Cross Layer Refinement Network for Lane Detection)
## 目录
- [简介](#简介)
- [模型库](#模型库)
- [引用](#引用)
## 介绍
[CLRNet](https://arxiv.org/abs/2203.10350)是一个车道线检测模型。CLRNet模型设计了车道线检测的直线先验轨迹车道线iou以及nms方法融合提取车道线轨迹的上下文高层特征与底层特征利用FPN多尺度进行refine在车道线检测相关数据集取得了SOTA的性能。
## 模型库
### CLRNet在CUlane上结果
| 骨架网络 | mF1 | F1@50 | F1@75 | 下载链接 | 配置文件 |训练日志|
| :--------------| :------- | :----: | :------: | :----: |:-----: |:-----: |
| ResNet-18 | 54.98 | 79.46 | 62.10 | [下载链接](https://paddledet.bj.bcebos.com/models/clrnet_resnet18_culane.pdparams) | [配置文件](./clrnet_resnet18_culane.yml) |[训练日志](https://bj.bcebos.com/v1/paddledet/logs/train_clrnet_r18_15_culane.log)|
### 数据集下载
下载[CULane数据集](https://xingangpan.github.io/projects/CULane.html)并解压到`dataset/culane`目录。
您的数据集目录结构如下:
```shell
culane/driver_xx_xxframe # data folders x6
culane/laneseg_label_w16 # lane segmentation labels
culane/list # data lists
```
如果您使用百度云链接下载,注意确保`driver_23_30frame_part1.tar.gz``driver_23_30frame_part2.tar.gz`解压后的文件都在`driver_23_30frame`目录下。
现已将用于测试的小数据集上传到PaddleDetection可通过运行训练脚本自动下载并解压数据如需复现结果请下载链接中的全量数据集训练。
### 训练
- GPU单卡训练
```shell
python tools/train.py -c configs/clrnet/clr_resnet18_culane.yml
```
- GPU多卡训练
```shell
export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/clrnet/clr_resnet18_culane.yml
```
### 评估
```shell
python tools/eval.py -c configs/clrnet/clr_resnet18_culane.yml -o weights=output/clr_resnet18_culane/model_final.pdparams
```
### 预测
```shell
python tools/infer_culane.py -c configs/clrnet/clr_resnet18_culane.yml -o weights=output/clr_resnet18_culane/model_final.pdparams --infer_img=demo/lane00000.jpg
```
注意:预测功能暂不支持模型静态图推理部署。
## 引用
```
@InProceedings{Zheng_2022_CVPR,
author = {Zheng, Tu and Huang, Yifei and Liu, Yang and Tang, Wenjian and Yang, Zheng and Cai, Deng and He, Xiaofei},
title = {CLRNet: Cross Layer Refinement Network for Lane Detection},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2022},
pages = {898-907}
}
```

View File

@@ -0,0 +1,68 @@
English | [简体中文](README_cn.md)
# CLRNet (CLRNet: Cross Layer Refinement Network for Lane Detection)
## Table of Contents
- [Introduction](#Introduction)
- [Model Zoo](#Model_Zoo)
- [Citations](#Citations)
## Introduction
[CLRNet](https://arxiv.org/abs/2203.10350) is a lane detection model. The CLRNet model is designed with line prior for lane detection, line iou loss as well as nms method, fused to extract contextual high-level features of lane line with low-level features, and refined by FPN multi-scale. Finally, the model achieved SOTA performance in lane detection datasets.
## Model Zoo
### CLRNet Results on CULane dataset
| backbone | mF1 | F1@50 | F1@75 | download | config |
| :--------------| :------- | :----: | :------: | :----: |:-----: |
| ResNet-18 | 54.98 | 79.46 | 62.10 | [model](https://paddledet.bj.bcebos.com/models/clrnet_resnet18_culane.pdparams) | [config](./clrnet_resnet18_culane.yml) |
### Download
Download [CULane](https://xingangpan.github.io/projects/CULane.html). Then extract them to `dataset/culane`.
For CULane, you should have structure like this:
```shell
culane/driver_xx_xxframe # data folders x6
culane/laneseg_label_w16 # lane segmentation labels
culane/list # data lists
```
If you use Baidu Cloud, make sure that images in `driver_23_30frame_part1.tar.gz` and `driver_23_30frame_part2.tar.gz` are located in one folder `driver_23_30frame` instead of two seperate folders after you decompress them.
Now we have uploaded a small subset of CULane dataset to PaddleDetection for code checking. You can simply run the training script below to download it automatically. If you want to implement the results, you need to download the full dataset at th link for training.
### Training
- single GPU
```shell
python tools/train.py -c configs/clrnet/clr_resnet18_culane.yml
```
- multi GPU
```shell
export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/clrnet/clr_resnet18_culane.yml
```
### Evaluation
```shell
python tools/eval.py -c configs/clrnet/clr_resnet18_culane.yml -o weights=output/clr_resnet18_culane/model_final.pdparams
```
### Inference
```shell
python tools/infer_culane.py -c configs/clrnet/clr_resnet18_culane.yml -o weights=output/clr_resnet18_culane/model_final.pdparams --infer_img=demo/lane00000.jpg
```
Notice: The inference phase does not support static model graph deploy at present.
## Citations
```
@InProceedings{Zheng_2022_CVPR,
author = {Zheng, Tu and Huang, Yifei and Liu, Yang and Tang, Wenjian and Yang, Zheng and Cai, Deng and He, Xiaofei},
title = {CLRNet: Cross Layer Refinement Network for Lane Detection},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2022},
pages = {898-907}
}
```

View File

@@ -0,0 +1,41 @@
architecture: CLRNet
CLRNet:
backbone: CLRResNet
neck: CLRFPN
clr_head: CLRHead
CLRResNet:
resnet: 'resnet18'
pretrained: True
CLRFPN:
in_channels: [128,256,512]
out_channel: 64
extra_stage: 0
CLRHead:
prior_feat_channels: 64
fc_hidden_dim: 64
num_priors: 192
num_fc: 2
refine_layers: 3
sample_points: 36
loss: CLRNetLoss
conf_threshold: 0.4
nms_thres: 0.8
CLRNetLoss:
cls_loss_weight : 2.0
xyt_loss_weight : 0.2
iou_loss_weight : 2.0
seg_loss_weight : 1.0
refine_layers : 3
ignore_label: 255
bg_weight: 0.4
# for visualize lane detection results
sample_y:
start: 589
end: 230
step: -20

View File

@@ -0,0 +1,37 @@
worker_num: 10
img_h: &img_h 320
img_w: &img_w 800
ori_img_h: &ori_img_h 590
ori_img_w: &ori_img_w 1640
num_points: &num_points 72
max_lanes: &max_lanes 4
TrainReader:
batch_size: 24
batch_transforms:
- CULaneTrainProcess: {img_h: *img_h, img_w: *img_w}
- CULaneDataProcess: {num_points: *num_points, max_lanes: *max_lanes, img_w: *img_w, img_h: *img_h}
shuffle: True
drop_last: False
EvalReader:
batch_size: 24
batch_transforms:
- CULaneResize: {prob: 1.0, img_h: *img_h, img_w: *img_w}
- CULaneDataProcess: {num_points: *num_points, max_lanes: *max_lanes, img_w: *img_w, img_h: *img_h}
shuffle: False
drop_last: False
TestReader:
batch_size: 24
batch_transforms:
- CULaneResize: {prob: 1.0, img_h: *img_h, img_w: *img_w}
- CULaneDataProcess: {num_points: *num_points, max_lanes: *max_lanes, img_w: *img_w, img_h: *img_h}
shuffle: False
drop_last: False

View File

@@ -0,0 +1,14 @@
epoch: 15
snapshot_epoch: 5
LearningRate:
base_lr: 0.6e-3
schedulers:
- !CosineDecay
max_epochs: 15
use_warmup: False
OptimizerBuilder:
regularizer: False
optimizer:
type: AdamW

View File

@@ -0,0 +1,9 @@
_BASE_: [
'../datasets/culane.yml',
'_base_/clrnet_reader.yml',
'_base_/clrnet_r18_fpn.yml',
'_base_/optimizer_1x.yml',
'../runtime.yml'
]
weights: output/clr_resnet18_culane/model_final

View File

@@ -0,0 +1,20 @@
# ConvNeXt (A ConvNet for the 2020s)
## 模型库
### ConvNeXt on COCO
| 网络网络 | 输入尺寸 | 图片数/GPU | 学习率策略 | mAP<sup>val<br>0.5:0.95 | mAP<sup>val<br>0.5 | Params(M) | FLOPs(G) | 下载链接 | 配置文件 |
| :------------- | :------- | :-------: | :------: | :------------: | :---------------------: | :----------------: |:---------: | :------: |:---------------: |
| PP-YOLOE-ConvNeXt-tiny | 640 | 16 | 36e | 44.6 | 63.3 | 33.04 | 13.87 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_convnext_tiny_36e_coco.pdparams) | [配置文件](./ppyoloe_convnext_tiny_36e_coco.yml) |
| YOLOX-ConvNeXt-s | 640 | 8 | 36e | 44.6 | 65.3 | 36.20 | 27.52 | [下载链接](https://paddledet.bj.bcebos.com/models/yolox_convnext_s_36e_coco.pdparams) | [配置文件](./yolox_convnext_s_36e_coco.yml) |
## Citations
```
@Article{liu2022convnet,
author = {Zhuang Liu and Hanzi Mao and Chao-Yuan Wu and Christoph Feichtenhofer and Trevor Darrell and Saining Xie},
title = {A ConvNet for the 2020s},
journal = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
year = {2022},
}
```

View File

@@ -0,0 +1,55 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'../ppyoloe/_base_/ppyoloe_crn.yml',
'../ppyoloe/_base_/ppyoloe_reader.yml',
]
depth_mult: 0.25
width_mult: 0.50
log_iter: 100
snapshot_epoch: 5
weights: output/ppyoloe_convnext_tiny_36e_coco/model_final
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/convnext_tiny_22k_224.pdparams
YOLOv3:
backbone: ConvNeXt
neck: CustomCSPPAN
yolo_head: PPYOLOEHead
post_process: ~
ConvNeXt:
arch: 'tiny'
drop_path_rate: 0.4
layer_scale_init_value: 1.0
return_idx: [1, 2, 3]
PPYOLOEHead:
static_assigner_epoch: 12
nms:
nms_top_k: 1000
keep_top_k: 300
score_threshold: 0.01
nms_threshold: 0.7
TrainReader:
batch_size: 16
epoch: 36
LearningRate:
base_lr: 0.0002
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [36]
use_warmup: false
OptimizerBuilder:
regularizer: false
optimizer:
type: AdamW
weight_decay: 0.0005

View File

@@ -0,0 +1,58 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'../yolox/_base_/yolox_cspdarknet.yml',
'../yolox/_base_/yolox_reader.yml'
]
depth_mult: 0.33
width_mult: 0.50
log_iter: 100
snapshot_epoch: 5
weights: output/yolox_convnext_s_36e_coco/model_final
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/convnext_tiny_22k_224.pdparams
YOLOX:
backbone: ConvNeXt
neck: YOLOCSPPAN
head: YOLOXHead
size_stride: 32
size_range: [15, 25] # multi-scale range [480*480 ~ 800*800]
ConvNeXt:
arch: 'tiny'
drop_path_rate: 0.4
layer_scale_init_value: 1.0
return_idx: [1, 2, 3]
TrainReader:
batch_size: 8
mosaic_epoch: 30
YOLOXHead:
l1_epoch: 30
nms:
name: MultiClassNMS
nms_top_k: 10000
keep_top_k: 1000
score_threshold: 0.001
nms_threshold: 0.65
epoch: 36
LearningRate:
base_lr: 0.0002
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [36]
use_warmup: false
OptimizerBuilder:
regularizer: false
optimizer:
type: AdamW
weight_decay: 0.0005

View File

@@ -0,0 +1,21 @@
metric: COCO
num_classes: 80
TrainDataset:
name: COCODataSet
image_dir: train2017
anno_path: annotations/instances_train2017.json
dataset_dir: dataset/coco
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
EvalDataset:
name: COCODataSet
image_dir: val2017
anno_path: annotations/instances_val2017.json
dataset_dir: dataset/coco
allow_empty: true
TestDataset:
name: ImageFolder
anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'

View File

@@ -0,0 +1,20 @@
metric: COCO
num_classes: 80
TrainDataset:
name: COCODataSet
image_dir: train2017
anno_path: annotations/instances_train2017.json
dataset_dir: dataset/coco
data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
EvalDataset:
name: COCODataSet
image_dir: val2017
anno_path: annotations/instances_val2017.json
dataset_dir: dataset/coco
TestDataset:
name: ImageFolder
anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'

View File

@@ -0,0 +1,28 @@
metric: CULaneMetric
num_classes: 5 # 4 lanes + background
cut_height: &cut_height 270
dataset_dir: &dataset_dir dataset/culane
TrainDataset:
name: CULaneDataSet
dataset_dir: *dataset_dir
list_path: 'list/train_gt.txt'
split: train
cut_height: *cut_height
EvalDataset:
name: CULaneDataSet
dataset_dir: *dataset_dir
list_path: 'list/test.txt'
split: test
cut_height: *cut_height
TestDataset:
name: CULaneDataSet
dataset_dir: *dataset_dir
list_path: 'list/test.txt'
split: test
cut_height: *cut_height

View File

@@ -0,0 +1,21 @@
metric: RBOX
num_classes: 15
TrainDataset:
!COCODataSet
image_dir: trainval1024/images
anno_path: trainval1024/DOTA_trainval1024.json
dataset_dir: dataset/dota/
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
EvalDataset:
!COCODataSet
image_dir: trainval1024/images
anno_path: trainval1024/DOTA_trainval1024.json
dataset_dir: dataset/dota/
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
TestDataset:
!ImageFolder
anno_path: test1024/DOTA_test1024.json
dataset_dir: dataset/dota/

View File

@@ -0,0 +1,21 @@
metric: RBOX
num_classes: 15
TrainDataset:
!COCODataSet
image_dir: trainval1024/images
anno_path: trainval1024/DOTA_trainval1024.json
dataset_dir: dataset/dota_ms/
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
EvalDataset:
!COCODataSet
image_dir: trainval1024/images
anno_path: trainval1024/DOTA_trainval1024.json
dataset_dir: dataset/dota_ms/
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
TestDataset:
!ImageFolder
anno_path: test1024/DOTA_test1024.json
dataset_dir: dataset/dota_ms/

View File

@@ -0,0 +1,25 @@
metric: MCMOT
num_classes: 10
# using VisDrone2019 MOT dataset with 10 classes as default, you can modify it for your needs.
# for MCMOT training
TrainDataset:
!MCMOTDataSet
dataset_dir: dataset/mot
image_lists: ['visdrone_mcmot.train']
data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide']
label_list: label_list.txt
# for MCMOT evaluation
# If you want to change the MCMOT evaluation dataset, please modify 'data_root'
EvalMOTDataset:
!MOTImageFolder
dataset_dir: dataset/mot
data_root: visdrone_mcmot/images/val
keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT
# for MCMOT video inference
TestMOTDataset:
!MOTImageFolder
dataset_dir: dataset/mot
keep_ori_im: True # set True if save visualization images or video

View File

@@ -0,0 +1,23 @@
metric: MOT
num_classes: 1
# for MOT training
TrainDataset:
!MOTDataSet
dataset_dir: dataset/mot
image_lists: ['mot17.train', 'caltech.all', 'cuhksysu.train', 'prw.train', 'citypersons.train', 'eth.train']
data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide']
# for MOT evaluation
# If you want to change the MOT evaluation dataset, please modify 'data_root'
EvalMOTDataset:
!MOTImageFolder
dataset_dir: dataset/mot
data_root: MOT16/images/train
keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT
# for MOT video inference
TestMOTDataset:
!MOTImageFolder
dataset_dir: dataset/mot
keep_ori_im: True # set True if save visualization images or video

View File

@@ -0,0 +1,21 @@
metric: COCO
num_classes: 365
TrainDataset:
!COCODataSet
image_dir: train
anno_path: annotations/zhiyuan_objv2_train.json
dataset_dir: dataset/objects365
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
EvalDataset:
!COCODataSet
image_dir: val
anno_path: annotations/zhiyuan_objv2_val.json
dataset_dir: dataset/objects365
allow_empty: true
TestDataset:
!ImageFolder
anno_path: annotations/zhiyuan_objv2_val.json
dataset_dir: dataset/objects365/

View File

@@ -0,0 +1,21 @@
metric: VOC
map_type: integral
num_classes: 4
TrainDataset:
name: VOCDataSet
dataset_dir: dataset/roadsign_voc
anno_path: train.txt
label_list: label_list.txt
data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
EvalDataset:
name: VOCDataSet
dataset_dir: dataset/roadsign_voc
anno_path: valid.txt
label_list: label_list.txt
data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
TestDataset:
name: ImageFolder
anno_path: dataset/roadsign_voc/label_list.txt

View File

@@ -0,0 +1,47 @@
metric: SNIPERCOCO
num_classes: 80
TrainDataset:
!SniperCOCODataSet
image_dir: train2017
anno_path: annotations/instances_train2017.json
dataset_dir: dataset/coco
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
allow_empty: true
is_trainset: true
image_target_sizes: [2000, 1000]
valid_box_ratio_ranges: [[-1, 0.1],[0.08, -1]]
chip_target_size: 512
chip_target_stride: 200
use_neg_chip: false
max_neg_num_per_im: 8
EvalDataset:
!SniperCOCODataSet
image_dir: val2017
anno_path: annotations/instances_val2017.json
dataset_dir: dataset/coco
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
allow_empty: true
is_trainset: false
image_target_sizes: [2000, 1000]
valid_box_ratio_ranges: [[-1, 0.1], [0.08, -1]]
chip_target_size: 512
chip_target_stride: 200
max_per_img: -1
nms_thresh: 0.5
TestDataset:
!SniperCOCODataSet
image_dir: val2017
dataset_dir: dataset/coco
is_trainset: false
image_target_sizes: [2000, 1000]
valid_box_ratio_ranges: [[-1, 0.1],[0.08, -1]]
chip_target_size: 500
chip_target_stride: 200
max_per_img: -1
nms_thresh: 0.5

View File

@@ -0,0 +1,47 @@
metric: SNIPERCOCO
num_classes: 9
TrainDataset:
!SniperCOCODataSet
image_dir: train
anno_path: annotations/train.json
dataset_dir: dataset/VisDrone2019_coco
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
allow_empty: true
is_trainset: true
image_target_sizes: [8145, 2742]
valid_box_ratio_ranges: [[-1, 0.03142857142857144], [0.02333211853008726, -1]]
chip_target_size: 1536
chip_target_stride: 1184
use_neg_chip: false
max_neg_num_per_im: 8
EvalDataset:
!SniperCOCODataSet
image_dir: val
anno_path: annotations/val.json
dataset_dir: dataset/VisDrone2019_coco
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
allow_empty: true
is_trainset: false
image_target_sizes: [8145, 2742]
valid_box_ratio_ranges: [[-1, 0.03142857142857144], [0.02333211853008726, -1]]
chip_target_size: 1536
chip_target_stride: 1184
max_per_img: -1
nms_thresh: 0.5
TestDataset:
!SniperCOCODataSet
image_dir: val
dataset_dir: dataset/VisDrone2019_coco
is_trainset: false
image_target_sizes: [8145, 2742]
valid_box_ratio_ranges: [[-1, 0.03142857142857144], [0.02333211853008726, -1]]
chip_target_size: 1536
chip_target_stride: 1184
max_per_img: -1
nms_thresh: 0.5

View File

@@ -0,0 +1,21 @@
metric: RBOX
num_classes: 9
TrainDataset:
!COCODataSet
image_dir: images
anno_path: annotations/train.json
dataset_dir: dataset/spine_coco
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
EvalDataset:
!COCODataSet
image_dir: images
anno_path: annotations/valid.json
dataset_dir: dataset/spine_coco
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
TestDataset:
!ImageFolder
anno_path: annotations/valid.json
dataset_dir: dataset/spine_coco

View File

@@ -0,0 +1,22 @@
metric: COCO
num_classes: 10
TrainDataset:
!COCODataSet
image_dir: VisDrone2019-DET-train
anno_path: train.json
dataset_dir: dataset/visdrone
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
EvalDataset:
!COCODataSet
image_dir: VisDrone2019-DET-val
anno_path: val.json
# image_dir: test_dev
# anno_path: test_dev.json
dataset_dir: dataset/visdrone
TestDataset:
!ImageFolder
anno_path: val.json
dataset_dir: dataset/visdrone

View File

@@ -0,0 +1,21 @@
metric: VOC
map_type: 11point
num_classes: 20
TrainDataset:
name: VOCDataSet
dataset_dir: dataset/voc
anno_path: trainval.txt
label_list: label_list.txt
data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
EvalDataset:
name: VOCDataSet
dataset_dir: dataset/voc
anno_path: test.txt
label_list: label_list.txt
data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
TestDataset:
name: ImageFolder
anno_path: dataset/voc/label_list.txt

View File

@@ -0,0 +1,20 @@
metric: WiderFace
num_classes: 1
TrainDataset:
!WIDERFaceDataSet
dataset_dir: dataset/wider_face
anno_path: wider_face_split/wider_face_train_bbx_gt.txt
image_dir: WIDER_train/images
data_fields: ['image', 'gt_bbox', 'gt_class']
EvalDataset:
!WIDERFaceDataSet
dataset_dir: dataset/wider_face
anno_path: wider_face_split/wider_face_val_bbx_gt.txt
image_dir: WIDER_val/images
data_fields: ['image']
TestDataset:
!ImageFolder
use_default_label: true

View File

@@ -0,0 +1,37 @@
### Deformable ConvNets v2
| 骨架网络 | 网络类型 | 卷积 | 每张GPU图片个数 | 学习率策略 |推理时间(fps)| Box AP | Mask AP | 下载 | 配置文件 |
| :------------------- | :------------- | :-----: |:--------: | :-----: | :-----------: |:----: | :-----: | :----------------------------------------------------------: | :----: |
| ResNet50-FPN | Faster | c3-c5 | 1 | 1x | - | 42.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_r50_fpn_1x_coco.yml) |
| ResNet50-vd-FPN | Faster | c3-c5 | 1 | 1x | - | 42.7 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r50_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_1x_coco.yml) |
| ResNet50-vd-FPN | Faster | c3-c5 | 1 | 2x | - | 43.7 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x_coco.yml) |
| ResNet101-vd-FPN | Faster | c3-c5 | 1 | 1x | - | 45.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x_coco.yml) |
| ResNeXt101-vd-FPN | Faster | c3-c5 | 1 | 1x | - | 46.5 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.pdparams) |[配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml) |
| ResNet50-FPN | Mask | c3-c5 | 1 | 1x | - | 42.7 | 38.4 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/mask_rcnn_dcn_r50_fpn_1x_coco.yml) |
| ResNet50-vd-FPN | Mask | c3-c5 | 1 | 2x | - | 44.6 | 39.8 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x_coco.yml) |
| ResNet101-vd-FPN | Mask | c3-c5 | 1 | 1x | - | 45.6 | 40.6 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x_coco.yml) |
| ResNeXt101-vd-FPN | Mask | c3-c5 | 1 | 1x | - | 47.3 | 42.0 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml) |
| ResNet50-FPN | Cascade Faster | c3-c5 | 1 | 1x | - | 42.1 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x_coco.yml) |
| ResNeXt101-vd-FPN | Cascade Faster | c3-c5 | 1 | 1x | - | 48.8 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml) |
**注意事项:**
- Deformable卷积网络v2(dcn_v2)参考自论文[Deformable ConvNets v2](https://arxiv.org/abs/1811.11168).
- `c3-c5`意思是在resnet模块的3到5阶段增加`dcn`.
## Citations
```
@inproceedings{dai2017deformable,
title={Deformable Convolutional Networks},
author={Dai, Jifeng and Qi, Haozhi and Xiong, Yuwen and Li, Yi and Zhang, Guodong and Hu, Han and Wei, Yichen},
booktitle={Proceedings of the IEEE international conference on computer vision},
year={2017}
}
@article{zhu2018deformable,
title={Deformable ConvNets v2: More Deformable, Better Results},
author={Zhu, Xizhou and Hu, Han and Lin, Stephen and Dai, Jifeng},
journal={arXiv preprint arXiv:1811.11168},
year={2018}
}
```

View File

@@ -0,0 +1,16 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'../cascade_rcnn/_base_/optimizer_1x.yml',
'../cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml',
'../cascade_rcnn/_base_/cascade_fpn_reader.yml',
]
weights: output/cascade_rcnn_dcn_r50_fpn_1x_coco/model_final
ResNet:
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]

View File

@@ -0,0 +1,16 @@
_BASE_: [
'cascade_rcnn_dcn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
weights: output/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
ResNet:
depth: 101
groups: 64
base_width: 4
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]

View File

@@ -0,0 +1,15 @@
_BASE_: [
'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
weights: output/faster_rcnn_dcn_r101_vd_fpn_1x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 101
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]

View File

@@ -0,0 +1,16 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'../faster_rcnn/_base_/optimizer_1x.yml',
'../faster_rcnn/_base_/faster_rcnn_r50_fpn.yml',
'../faster_rcnn/_base_/faster_fpn_reader.yml',
]
weights: output/faster_rcnn_dcn_r50_fpn_1x_coco/model_final
ResNet:
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]

View File

@@ -0,0 +1,15 @@
_BASE_: [
'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
weights: output/faster_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]

View File

@@ -0,0 +1,26 @@
_BASE_: [
'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
weights: output/faster_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [16, 22]
- !LinearWarmup
start_factor: 0.1
steps: 1000

View File

@@ -0,0 +1,17 @@
_BASE_: [
'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
weights: output/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
ResNet:
# for ResNeXt: groups, base_width, base_channels
depth: 101
groups: 64
base_width: 4
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]

View File

@@ -0,0 +1,15 @@
_BASE_: [
'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
weights: output/mask_rcnn_dcn_r101_vd_fpn_1x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 101
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]

View File

@@ -0,0 +1,16 @@
_BASE_: [
'../datasets/coco_instance.yml',
'../runtime.yml',
'../mask_rcnn/_base_/optimizer_1x.yml',
'../mask_rcnn/_base_/mask_rcnn_r50_fpn.yml',
'../mask_rcnn/_base_/mask_fpn_reader.yml',
]
weights: output/mask_rcnn_dcn_r50_fpn_1x_coco/model_final
ResNet:
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]

View File

@@ -0,0 +1,26 @@
_BASE_: [
'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
weights: output/mask_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [16, 22]
- !LinearWarmup
start_factor: 0.1
steps: 1000

View File

@@ -0,0 +1,17 @@
_BASE_: [
'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
weights: output/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
ResNet:
# for ResNeXt: groups, base_width, base_channels
depth: 101
variant: d
groups: 64
base_width: 4
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
dcn_v2_stages: [1,2,3]

View File

@@ -0,0 +1,36 @@
# Deformable DETR
## Introduction
Deformable DETR is an object detection model based on DETR. We reproduced the model of the paper.
## Model Zoo
| Backbone | Model | Images/GPU | Epochs | Box AP | Config | Log | Download |
|:--------:|:---------------:|:----------:|:------:|:------:|:------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------:|
| R-50 | Deformable DETR | 2 | 50 | 44.5 | [config](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/deformable_detr/deformable_detr_r50_1x_coco.yml) | [log](https://bj.bcebos.com/v1/paddledet/logs/deformable_detr_r50_1x_coco_44.5.log) | [model](https://paddledet.bj.bcebos.com/models/deformable_detr_r50_1x_coco.pdparams) |
**Notes:**
- Deformable DETR is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
- Deformable DETR uses 8GPU to train 50 epochs.
GPU multi-card training
```bash
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python -m paddle.distributed.launch --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/deformable_detr/deformable_detr_r50_1x_coco.yml --fleet
```
## Citations
```
@inproceedings{
zhu2021deformable,
title={Deformable DETR: Deformable Transformers for End-to-End Object Detection},
author={Xizhou Zhu and Weijie Su and Lewei Lu and Bin Li and Xiaogang Wang and Jifeng Dai},
booktitle={International Conference on Learning Representations},
year={2021},
url={https://openreview.net/forum?id=gZ9hCDWe6ke}
}
```

View File

@@ -0,0 +1,48 @@
architecture: DETR
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vb_normal_pretrained.pdparams
hidden_dim: 256
use_focal_loss: True
DETR:
backbone: ResNet
transformer: DeformableTransformer
detr_head: DeformableDETRHead
post_process: DETRPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [1, 2, 3]
lr_mult_list: [0.0, 0.1, 0.1, 0.1]
num_stages: 4
DeformableTransformer:
num_queries: 300
position_embed_type: sine
nhead: 8
num_encoder_layers: 6
num_decoder_layers: 6
dim_feedforward: 1024
dropout: 0.1
activation: relu
num_feature_levels: 4
num_encoder_points: 4
num_decoder_points: 4
DeformableDETRHead:
num_mlp_layers: 3
DETRLoss:
loss_coeff: {class: 2, bbox: 5, giou: 2}
aux_loss: True
HungarianMatcher:
matcher_coeff: {class: 2, bbox: 5, giou: 2}

View File

@@ -0,0 +1,44 @@
worker_num: 2
TrainReader:
sample_transforms:
- Decode: {}
- RandomFlip: {prob: 0.5}
- RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
transforms2: [
RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
RandomSizeCrop: { min_size: 384, max_size: 600 },
RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- NormalizeBox: {}
- BboxXYXY2XYWH: {}
- Permute: {}
batch_transforms:
- PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
batch_size: 2
shuffle: true
drop_last: true
collate_batch: false
use_shared_memory: false
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_size: 1
shuffle: false
drop_last: false
TestReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_size: 1
shuffle: false
drop_last: false

View File

@@ -0,0 +1,16 @@
epoch: 50
LearningRate:
base_lr: 0.0002
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [40]
use_warmup: false
OptimizerBuilder:
clip_grad_by_norm: 0.1
regularizer: false
optimizer:
type: AdamW
weight_decay: 0.0001

View File

@@ -0,0 +1,9 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/deformable_optimizer_1x.yml',
'_base_/deformable_detr_r50.yml',
'_base_/deformable_detr_reader.yml',
]
weights: output/deformable_detr_r50_1x_coco/model_final
find_unused_parameters: True

View File

@@ -0,0 +1,39 @@
# DETR
## Introduction
DETR is an object detection model based on transformer. We reproduced the model of the paper.
## Model Zoo
| Backbone | Model | Images/GPU | Inf time (fps) | Box AP | Config | Download |
|:------:|:--------:|:--------:|:--------------:|:------:|:------:|:--------:|
| R-50 | DETR | 4 | --- | 42.3 | [config](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/detr/detr_r50_1x_coco.yml) | [model](https://paddledet.bj.bcebos.com/models/detr_r50_1x_coco.pdparams) |
**Notes:**
- DETR is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
- DETR uses 8GPU to train 500 epochs.
GPU multi-card training
```bash
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python -m paddle.distributed.launch --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/detr/detr_r50_1x_coco.yml --fleet
```
## Citations
```
@inproceedings{detr,
author = {Nicolas Carion and
Francisco Massa and
Gabriel Synnaeve and
Nicolas Usunier and
Alexander Kirillov and
Sergey Zagoruyko},
title = {End-to-End Object Detection with Transformers},
booktitle = {ECCV},
year = {2020}
}
```

View File

@@ -0,0 +1,44 @@
architecture: DETR
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vb_normal_pretrained.pdparams
hidden_dim: 256
DETR:
backbone: ResNet
transformer: DETRTransformer
detr_head: DETRHead
post_process: DETRPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [3]
lr_mult_list: [0.0, 0.1, 0.1, 0.1]
num_stages: 4
DETRTransformer:
num_queries: 100
position_embed_type: sine
nhead: 8
num_encoder_layers: 6
num_decoder_layers: 6
dim_feedforward: 2048
dropout: 0.1
activation: relu
DETRHead:
num_mlp_layers: 3
DETRLoss:
loss_coeff: {class: 1, bbox: 5, giou: 2, no_object: 0.1}
aux_loss: True
HungarianMatcher:
matcher_coeff: {class: 1, bbox: 5, giou: 2}

View File

@@ -0,0 +1,44 @@
worker_num: 0
TrainReader:
sample_transforms:
- Decode: {}
- RandomFlip: {prob: 0.5}
- RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
transforms2: [
RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
RandomSizeCrop: { min_size: 384, max_size: 600 },
RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- NormalizeBox: {}
- BboxXYXY2XYWH: {}
- Permute: {}
batch_transforms:
- PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
batch_size: 2
shuffle: true
drop_last: true
collate_batch: false
use_shared_memory: false
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_size: 1
shuffle: false
drop_last: false
TestReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_size: 1
shuffle: false
drop_last: false

View File

@@ -0,0 +1,16 @@
epoch: 500
LearningRate:
base_lr: 0.0001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [400]
use_warmup: false
OptimizerBuilder:
clip_grad_by_norm: 0.1
regularizer: false
optimizer:
type: AdamW
weight_decay: 0.0001

View File

@@ -0,0 +1,9 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/detr_r50.yml',
'_base_/detr_reader.yml',
]
weights: output/detr_r50_1x_coco/model_final
find_unused_parameters: True

View File

@@ -0,0 +1,39 @@
# DINO: DETR with Improved DeNoising Anchor Boxes for End-to-End Object Detection
## Introduction
[DINO](https://arxiv.org/abs/2203.03605) is an object detection model based on DETR. We reproduced the model of the paper.
## Model Zoo
| Backbone | Model | Epochs | Box AP | Config | Log | Download |
|:------:|:---------------:|:------:|:------:|:---------------------------------------:|:-------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------:|
| R-50 | dino_r50_4scale | 12 | 49.5 | [config](./dino_r50_4scale_1x_coco.yml) | [log](https://bj.bcebos.com/v1/paddledet/logs/dino_r50_4scale_1x_coco_49.5.log) | [model](https://paddledet.bj.bcebos.com/models/dino_r50_4scale_1x_coco.pdparams) |
| R-50 | dino_r50_4scale | 24 | 50.8 | [config](./dino_r50_4scale_2x_coco.yml) | [log](https://bj.bcebos.com/v1/paddledet/logs/dino_r50_4scale_2x_coco_50.8.log) | [model](https://paddledet.bj.bcebos.com/models/dino_r50_4scale_2x_coco.pdparams) |
**Notes:**
- DINO is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
- DINO uses 4GPU to train.
GPU multi-card training
```bash
python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/dino/dino_r50_4scale_1x_coco.yml --fleet --eval
```
## Custom Operator
- Multi-scale deformable attention custom operator see [here](../../ppdet/modeling/transformers/ext_op).
## Citations
```
@misc{zhang2022dino,
title={DINO: DETR with Improved DeNoising Anchor Boxes for End-to-End Object Detection},
author={Hao Zhang and Feng Li and Shilong Liu and Lei Zhang and Hang Su and Jun Zhu and Lionel M. Ni and Heung-Yeung Shum},
year={2022},
eprint={2203.03605},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```

View File

@@ -0,0 +1,45 @@
architecture: DETR
# pretrain_weights: # rewrite in FocalNet.pretrained in ppdet/modeling/backbones/focalnet.py
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_large_lrf_384_fl4_pretrained.pdparams
hidden_dim: 256
use_focal_loss: True
DETR:
backbone: FocalNet
transformer: DINOTransformer
detr_head: DINOHead
post_process: DETRPostProcess
FocalNet:
arch: 'focalnet_L_384_22k_fl4'
out_indices: [1, 2, 3]
pretrained: https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_large_lrf_384_fl4_pretrained.pdparams
DINOTransformer:
num_queries: 900
position_embed_type: sine
num_levels: 4
nhead: 8
num_encoder_layers: 6
num_decoder_layers: 6
dim_feedforward: 2048
dropout: 0.0
activation: relu
pe_temperature: 20
pe_offset: 0.0
num_denoising: 100
label_noise_ratio: 0.5
box_noise_scale: 1.0
learnt_init_query: True
DINOHead:
loss:
name: DINOLoss
loss_coeff: {class: 1, bbox: 5, giou: 2}
aux_loss: True
matcher:
name: HungarianMatcher
matcher_coeff: {class: 2, bbox: 5, giou: 2}
DETRPostProcess:
num_top_queries: 300

View File

@@ -0,0 +1,49 @@
architecture: DETR
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
hidden_dim: 256
use_focal_loss: True
DETR:
backbone: ResNet
transformer: DINOTransformer
detr_head: DINOHead
post_process: DETRPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [1, 2, 3]
lr_mult_list: [0.0, 0.1, 0.1, 0.1]
num_stages: 4
DINOTransformer:
num_queries: 900
position_embed_type: sine
num_levels: 4
nhead: 8
num_encoder_layers: 6
num_decoder_layers: 6
dim_feedforward: 2048
dropout: 0.0
activation: relu
pe_temperature: 20
pe_offset: 0.0
num_denoising: 100
label_noise_ratio: 0.5
box_noise_scale: 1.0
learnt_init_query: True
DINOHead:
loss:
name: DINOLoss
loss_coeff: {class: 1, bbox: 5, giou: 2}
aux_loss: True
matcher:
name: HungarianMatcher
matcher_coeff: {class: 2, bbox: 5, giou: 2}
DETRPostProcess:
num_top_queries: 300

View File

@@ -0,0 +1,40 @@
worker_num: 4
TrainReader:
sample_transforms:
- Decode: {}
- RandomFlip: {prob: 0.5}
- RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
transforms2: [
RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
RandomSizeCrop: { min_size: 384, max_size: 600 },
RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
}
- NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
- NormalizeBox: {}
- BboxXYXY2XYWH: {}
- Permute: {}
batch_transforms:
- PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
batch_size: 4
shuffle: true
drop_last: true
collate_batch: false
use_shared_memory: false
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
- Permute: {}
batch_size: 1
TestReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
- Permute: {}
batch_size: 1

View File

@@ -0,0 +1,46 @@
architecture: DETR
# pretrain_weights: # rewrite in SwinTransformer.pretrained in ppdet/modeling/backbones/swin_transformer.py
hidden_dim: 256
use_focal_loss: True
DETR:
backbone: SwinTransformer
transformer: DINOTransformer
detr_head: DINOHead
post_process: DETRPostProcess
SwinTransformer:
arch: 'swin_L_384' # ['swin_T_224', 'swin_S_224', 'swin_B_224', 'swin_L_224', 'swin_B_384', 'swin_L_384']
ape: false
drop_path_rate: 0.2
patch_norm: true
out_indices: [1, 2, 3]
DINOTransformer:
num_queries: 900
position_embed_type: sine
num_levels: 4
nhead: 8
num_encoder_layers: 6
num_decoder_layers: 6
dim_feedforward: 2048
dropout: 0.0
activation: relu
pe_temperature: 10000
pe_offset: -0.5
num_denoising: 100
label_noise_ratio: 0.5
box_noise_scale: 1.0
learnt_init_query: True
DINOHead:
loss:
name: DINOLoss
loss_coeff: {class: 1, bbox: 5, giou: 2}
aux_loss: True
matcher:
name: HungarianMatcher
matcher_coeff: {class: 2, bbox: 5, giou: 2}
DETRPostProcess:
num_top_queries: 300

View File

@@ -0,0 +1,16 @@
epoch: 12
LearningRate:
base_lr: 0.0001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [11]
use_warmup: false
OptimizerBuilder:
clip_grad_by_norm: 0.1
regularizer: false
optimizer:
type: AdamW
weight_decay: 0.0001

View File

@@ -0,0 +1,16 @@
epoch: 24
LearningRate:
base_lr: 0.0001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [20]
use_warmup: false
OptimizerBuilder:
clip_grad_by_norm: 0.1
regularizer: false
optimizer:
type: AdamW
weight_decay: 0.0001

View File

@@ -0,0 +1,16 @@
epoch: 36
LearningRate:
base_lr: 0.0001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [33]
use_warmup: false
OptimizerBuilder:
clip_grad_by_norm: 0.1
regularizer: false
optimizer:
type: AdamW
weight_decay: 0.0001

View File

@@ -0,0 +1,11 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/dino_r50.yml',
'_base_/dino_reader.yml',
]
weights: output/dino_r50_4scale_1x_coco/model_final
find_unused_parameters: True
log_iter: 100

View File

@@ -0,0 +1,11 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_2x.yml',
'_base_/dino_r50.yml',
'_base_/dino_reader.yml',
]
weights: output/dino_r50_4scale_2x_coco/model_final
find_unused_parameters: True
log_iter: 100

View File

@@ -0,0 +1,176 @@
# 人脸检测模型
## 简介
`face_detection`中提供高效、高速的人脸检测解决方案,包括最先进的模型和经典模型。
![](../../docs/images/12_Group_Group_12_Group_Group_12_935.jpg)
## 模型库
#### WIDER-FACE数据集上的mAP
| 网络结构 | 输入尺寸 | 图片个数/GPU | 学习率策略 | Easy/Medium/Hard Set | 预测时延SD855| 模型大小(MB) | 下载 | 配置文件 |
|:------------:|:--------:|:----:|:-------:|:-------:|:---------:|:----------:|:---------:|:--------:|
| BlazeFace | 640 | 8 | 1000e | 0.885 / 0.855 / 0.731 | - | 0.472 |[下载链接](https://paddledet.bj.bcebos.com/models/blazeface_1000e.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/face_detection/blazeface_1000e.yml) |
| BlazeFace-FPN-SSH | 640 | 8 | 1000e | 0.907 / 0.883 / 0.793 | - | 0.479 |[下载链接](https://paddledet.bj.bcebos.com/models/blazeface_fpn_ssh_1000e.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/face_detection/blazeface_fpn_ssh_1000e.yml) |
**注意:**
- 我们使用多尺度评估策略得到`Easy/Medium/Hard Set`里的mAP。具体细节请参考[在WIDER-FACE数据集上评估](#在WIDER-FACE数据集上评估)。
## 快速开始
### 数据准备
我们使用[WIDER-FACE数据集](http://shuoyang1213.me/WIDERFACE/)进行训练和模型测试,官方网站提供了详细的数据介绍。
- WIDER-Face数据源:
使用如下目录结构加载`wider_face`类型的数据集:
```
dataset/wider_face/
├── wider_face_split
│ ├── wider_face_train_bbx_gt.txt
│ ├── wider_face_val_bbx_gt.txt
├── WIDER_train
│ ├── images
│ │ ├── 0--Parade
│ │ │ ├── 0_Parade_marchingband_1_100.jpg
│ │ │ ├── 0_Parade_marchingband_1_381.jpg
│ │ │ │ ...
│ │ ├── 10--People_Marching
│ │ │ ...
├── WIDER_val
│ ├── images
│ │ ├── 0--Parade
│ │ │ ├── 0_Parade_marchingband_1_1004.jpg
│ │ │ ├── 0_Parade_marchingband_1_1045.jpg
│ │ │ │ ...
│ │ ├── 10--People_Marching
│ │ │ ...
```
- 手动下载数据集:
要下载WIDER-FACE数据集请运行以下命令
```
cd dataset/wider_face && ./download_wider_face.sh
```
### 参数配置
基础模型的配置可以参考`configs/face_detection/_base_/blazeface.yml`
改进模型增加FPN和SSH的neck结构配置文件可以参考`configs/face_detection/_base_/blazeface_fpn.yml`可以根据需求配置FPN和SSH具体如下
```yaml
BlazeNet:
blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
[96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
act: hard_swish #配置backbone中BlazeBlock的激活函数基础模型为relu增加FPN和SSH时需使用hard_swish
BlazeNeck:
neck_type : fpn_ssh #可选only_fpn、only_ssh和fpn_ssh
in_channel: [96,96]
```
### 训练与评估
训练流程与评估流程方法与其他算法一致,请参考[GETTING_STARTED_cn.md](../../docs/tutorials/GETTING_STARTED_cn.md)。
**注意:** 人脸检测模型目前不支持边训练边评估。
#### 在WIDER-FACE数据集上评估
- 步骤一:评估并生成结果文件:
```shell
python -u tools/eval.py -c configs/face_detection/blazeface_1000e.yml \
-o weights=output/blazeface_1000e/model_final \
multi_scale=True
```
设置`multi_scale=True`进行多尺度评估,评估完成后,将在`output/pred`中生成txt格式的测试结果。
- 步骤二下载官方评估脚本和Ground Truth文件
```
wget http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/eval_script/eval_tools.zip
unzip eval_tools.zip && rm -f eval_tools.zip
```
- 步骤三:开始评估
方法一python评估
```
git clone https://github.com/wondervictor/WiderFace-Evaluation.git
cd WiderFace-Evaluation
# 编译
python3 setup.py build_ext --inplace
# 开始评估
python3 evaluation.py -p /path/to/PaddleDetection/output/pred -g /path/to/eval_tools/ground_truth
```
方法二MatLab评估
```
# 在`eval_tools/wider_eval.m`中修改保存结果路径和绘制曲线的名称:
pred_dir = './pred';
legend_name = 'Paddle-BlazeFace';
`wider_eval.m` 是评估模块的主要执行程序。运行命令如下:
matlab -nodesktop -nosplash -nojvm -r "run wider_eval.m;quit;"
```
### Python脚本预测
为了支持二次开发这里提供通过Python脚本使用Paddle Detection whl包来进行预测的示例。
```python
import cv2
import paddle
import numpy as np
from ppdet.core.workspace import load_config
from ppdet.engine import Trainer
from ppdet.metrics import get_infer_results
from ppdet.data.transform.operators import NormalizeImage, Permute
if __name__ == '__main__':
# 准备基础的参数
config_path = 'PaddleDetection/configs/face_detection/blazeface_1000e.yml'
cfg = load_config(config_path)
weight_path = 'PaddleDetection/output/blazeface_1000e.pdparams'
infer_img_path = 'PaddleDetection/demo/hrnet_demo.jpg'
cfg.weights = weight_path
bbox_thre = 0.8
paddle.set_device('gpu')
# 创建所需的类
trainer = Trainer(cfg, mode='test')
trainer.load_weights(cfg.weights)
trainer.model.eval()
normaler = NormalizeImage(mean=[123, 117, 104], std=[127.502231, 127.502231, 127.502231], is_scale=False)
permuter = Permute()
# 进行图片读取
im = cv2.imread(infer_img_path)
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
# 准备数据字典
data_dict = {'image': im}
data_dict = normaler(data_dict)
data_dict = permuter(data_dict)
h, w, c = im.shape
data_dict['im_id'] = paddle.Tensor(np.array([[0]]))
data_dict['im_shape'] = paddle.Tensor(np.array([[h, w]], dtype=np.float32))
data_dict['scale_factor'] = paddle.Tensor(np.array([[1., 1.]], dtype=np.float32))
data_dict['image'] = paddle.Tensor(data_dict['image'].reshape((1, c, h, w)))
data_dict['curr_iter'] = paddle.Tensor(np.array([0]))
# 进行预测
outs = trainer.model(data_dict)
# 对预测的数据进行后处理得到最终的bbox信息
for key in ['im_shape', 'scale_factor', 'im_id']:
outs[key] = data_dict[key]
for key, value in outs.items():
outs[key] = value.numpy()
clsid2catid, catid2name = {0: 'face'}, {0: 0}
batch_res = get_infer_results(outs, clsid2catid)
bbox = [sub_dict for sub_dict in batch_res['bbox'] if sub_dict['score'] > bbox_thre]
print(bbox)
```
## Citations
```
@article{bazarevsky2019blazeface,
title={BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs},
author={Valentin Bazarevsky and Yury Kartynnik and Andrey Vakunov and Karthik Raveendran and Matthias Grundmann},
year={2019},
eprint={1907.05047},
archivePrefix={arXiv},
```

View File

@@ -0,0 +1,176 @@
# Face Detection Model
## Introduction
`face_detection` High efficiency, high speed face detection solutions, including the most advanced models and classic models.
![](../../docs/images/12_Group_Group_12_Group_Group_12_935.jpg)
## Model Library
#### A mAP on the WIDERFACE dataset
| Network structure | size | images/GPUs | Learning rate strategy | Easy/Medium/Hard Set | Prediction delaySD855| Model size(MB) | Download | Configuration File |
|:------------:|:--------:|:----:|:-------:|:-------:|:---------:|:----------:|:---------:|:--------:|
| BlazeFace | 640 | 8 | 1000e | 0.885 / 0.855 / 0.731 | - | 0.472 |[link](https://paddledet.bj.bcebos.com/models/blazeface_1000e.pdparams) | [Configuration File](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/face_detection/blazeface_1000e.yml) |
| BlazeFace-FPN-SSH | 640 | 8 | 1000e | 0.907 / 0.883 / 0.793 | - | 0.479 |[link](https://paddledet.bj.bcebos.com/models/blazeface_fpn_ssh_1000e.pdparams) | [Configuration File](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/face_detection/blazeface_fpn_ssh_1000e.yml) |
**Attention:**
- We use a multi-scale evaluation strategy to get the mAP in `Easy/Medium/Hard Set`. Please refer to the [evaluation on the WIDER FACE dataset](#Evaluated-on-the-WIDER-FACE-Dataset) for details.
## Quick Start
### Data preparation
We use [WIDER-FACE dataset](http://shuoyang1213.me/WIDERFACE/) for training and model tests, the official web site provides detailed data is introduced.
- WIDER-Face data source:
- Load a dataset of type `wider_face` using the following directory structure:
```
dataset/wider_face/
├── wider_face_split
│ ├── wider_face_train_bbx_gt.txt
│ ├── wider_face_val_bbx_gt.txt
├── WIDER_train
│ ├── images
│ │ ├── 0--Parade
│ │ │ ├── 0_Parade_marchingband_1_100.jpg
│ │ │ ├── 0_Parade_marchingband_1_381.jpg
│ │ │ │ ...
│ │ ├── 10--People_Marching
│ │ │ ...
├── WIDER_val
│ ├── images
│ │ ├── 0--Parade
│ │ │ ├── 0_Parade_marchingband_1_1004.jpg
│ │ │ ├── 0_Parade_marchingband_1_1045.jpg
│ │ │ │ ...
│ │ ├── 10--People_Marching
│ │ │ ...
```
- Manually download the dataset:
To download the WIDER-FACE dataset, run the following command:
```
cd dataset/wider_face && ./download_wider_face.sh
```
### Parameter configuration
The configuration of the base model can be referenced to `configs/face_detection/_base_/blazeface.yml`
Improved model to add FPN and SSH neck structure, configuration files can be referenced to `configs/face_detection/_base_/blazeface_fpn.yml`, You can configure FPN and SSH as required
```yaml
BlazeNet:
blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
[96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
act: hard_swish #Configure Blaze Block activation function in Backbone. The basic model is Relu. hard_swish is needed to add FPN and SSH
BlazeNeck:
neck_type : fpn_ssh #only_fpn, only_ssh and fpn_ssh
in_channel: [96,96]
```
### Training and Evaluation
The training process and evaluation process methods are consistent with other algorithms, please refer to [GETTING_STARTED_cn.md](../../docs/tutorials/GETTING_STARTED_cn.md)。
**Attention:** Face detection models currently do not support training and evaluation.
#### Evaluated on the WIDER-FACE Dataset
- Step 1: Evaluate and generate a result file:
```shell
python -u tools/eval.py -c configs/face_detection/blazeface_1000e.yml \
-o weights=output/blazeface_1000e/model_final \
multi_scale=True
```
Set `multi_scale=True` for multi-scale evaluation. After evaluation, test results in TXT format will be generated in `output/pred`.
- Step 2: Download the official evaluation script and Ground Truth file:
```
wget http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/eval_script/eval_tools.zip
unzip eval_tools.zip && rm -f eval_tools.zip
```
- Step 3: Start the evaluation
Method 1: Python evaluation:
```
git clone https://github.com/wondervictor/WiderFace-Evaluation.git
cd WiderFace-Evaluation
# compile
python3 setup.py build_ext --inplace
# Begin to assess
python3 evaluation.py -p /path/to/PaddleDetection/output/pred -g /path/to/eval_tools/ground_truth
```
Method 2: MatLab evaluation:
```
# Change the name of save result path and draw curve in `eval_tools/wider_eval.m`:
pred_dir = './pred';
legend_name = 'Paddle-BlazeFace';
`wider_eval.m` is the main implementation of the evaluation module. Run the following command:
matlab -nodesktop -nosplash -nojvm -r "run wider_eval.m;quit;"
```
### Use by Python Code
In order to support development, here is an example of using the Paddle Detection whl package to make predictions through Python code.
```python
import cv2
import paddle
import numpy as np
from ppdet.core.workspace import load_config
from ppdet.engine import Trainer
from ppdet.metrics import get_infer_results
from ppdet.data.transform.operators import NormalizeImage, Permute
if __name__ == '__main__':
# prepare for the parameters
config_path = 'PaddleDetection/configs/face_detection/blazeface_1000e.yml'
cfg = load_config(config_path)
weight_path = 'PaddleDetection/output/blazeface_1000e.pdparams'
infer_img_path = 'PaddleDetection/demo/hrnet_demo.jpg'
cfg.weights = weight_path
bbox_thre = 0.8
paddle.set_device('gpu')
# create the class object
trainer = Trainer(cfg, mode='test')
trainer.load_weights(cfg.weights)
trainer.model.eval()
normaler = NormalizeImage(mean=[123, 117, 104], std=[127.502231, 127.502231, 127.502231], is_scale=False)
permuter = Permute()
# read the image file
im = cv2.imread(infer_img_path)
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
# prepare for the data dict
data_dict = {'image': im}
data_dict = normaler(data_dict)
data_dict = permuter(data_dict)
h, w, c = im.shape
data_dict['im_id'] = paddle.Tensor(np.array([[0]]))
data_dict['im_shape'] = paddle.Tensor(np.array([[h, w]], dtype=np.float32))
data_dict['scale_factor'] = paddle.Tensor(np.array([[1., 1.]], dtype=np.float32))
data_dict['image'] = paddle.Tensor(data_dict['image'].reshape((1, c, h, w)))
data_dict['curr_iter'] = paddle.Tensor(np.array([0]))
# do the prediction
outs = trainer.model(data_dict)
# to do the postprocess to get the final bbox info
for key in ['im_shape', 'scale_factor', 'im_id']:
outs[key] = data_dict[key]
for key, value in outs.items():
outs[key] = value.numpy()
clsid2catid, catid2name = {0: 'face'}, {0: 0}
batch_res = get_infer_results(outs, clsid2catid)
bbox = [sub_dict for sub_dict in batch_res['bbox'] if sub_dict['score'] > bbox_thre]
print(bbox)
```
## Citations
```
@article{bazarevsky2019blazeface,
title={BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs},
author={Valentin Bazarevsky and Yury Kartynnik and Andrey Vakunov and Karthik Raveendran and Matthias Grundmann},
year={2019},
eprint={1907.05047},
archivePrefix={arXiv},
```

View File

@@ -0,0 +1,45 @@
architecture: BlazeFace
BlazeFace:
backbone: BlazeNet
neck: BlazeNeck
blaze_head: FaceHead
post_process: BBoxPostProcess
BlazeNet:
blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
[96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
act: relu
BlazeNeck:
neck_type : None
in_channel: [96,96]
FaceHead:
in_channels: [96,96]
anchor_generator: AnchorGeneratorSSD
loss: SSDLoss
SSDLoss:
overlap_threshold: 0.35
AnchorGeneratorSSD:
steps: [8., 16.]
aspect_ratios: [[1.], [1.]]
min_sizes: [[16.,24.], [32., 48., 64., 80., 96., 128.]]
max_sizes: [[], []]
offset: 0.5
flip: False
min_max_aspect_ratios_order: false
BBoxPostProcess:
decode:
name: SSDBox
nms:
name: MultiClassNMS
keep_top_k: 750
score_threshold: 0.01
nms_threshold: 0.3
nms_top_k: 5000
nms_eta: 1.0

View File

@@ -0,0 +1,45 @@
architecture: BlazeFace
BlazeFace:
backbone: BlazeNet
neck: BlazeNeck
blaze_head: FaceHead
post_process: BBoxPostProcess
BlazeNet:
blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
[96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
act: hard_swish
BlazeNeck:
neck_type : fpn_ssh
in_channel: [96,96]
FaceHead:
in_channels: [48, 48]
anchor_generator: AnchorGeneratorSSD
loss: SSDLoss
SSDLoss:
overlap_threshold: 0.35
AnchorGeneratorSSD:
steps: [8., 16.]
aspect_ratios: [[1.], [1.]]
min_sizes: [[16.,24.], [32., 48., 64., 80., 96., 128.]]
max_sizes: [[], []]
offset: 0.5
flip: False
min_max_aspect_ratios_order: false
BBoxPostProcess:
decode:
name: SSDBox
nms:
name: MultiClassNMS
keep_top_k: 750
score_threshold: 0.01
nms_threshold: 0.3
nms_top_k: 5000
nms_eta: 1.0

View File

@@ -0,0 +1,44 @@
worker_num: 2
TrainReader:
inputs_def:
num_max_boxes: 90
sample_transforms:
- Decode: {}
- RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
- RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
- RandomFlip: {}
- CropWithDataAchorSampling: {
anchor_sampler: [[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]],
batch_sampler: [
[1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
],
target_size: 640}
- Resize: {target_size: [640, 640], keep_ratio: False, interp: 1}
- NormalizeBox: {}
- PadBox: {num_max_boxes: 90}
batch_transforms:
- NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
- Permute: {}
batch_size: 8
shuffle: true
drop_last: true
EvalReader:
sample_transforms:
- Decode: {}
- NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
- Permute: {}
batch_size: 1
TestReader:
sample_transforms:
- Decode: {}
- NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
- Permute: {}
batch_size: 1

View File

@@ -0,0 +1,21 @@
epoch: 1000
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones:
- 333
- 800
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.0
type: RMSProp
regularizer:
factor: 0.0005
type: L2

View File

@@ -0,0 +1,9 @@
_BASE_: [
'../datasets/wider_face.yml',
'../runtime.yml',
'_base_/optimizer_1000e.yml',
'_base_/blazeface.yml',
'_base_/face_reader.yml',
]
weights: output/blazeface_1000e/model_final
multi_scale_eval: True

View File

@@ -0,0 +1,9 @@
_BASE_: [
'../datasets/wider_face.yml',
'../runtime.yml',
'_base_/optimizer_1000e.yml',
'_base_/blazeface_fpn.yml',
'_base_/face_reader.yml',
]
weights: output/blazeface_fpn_ssh_1000e/model_final
multi_scale_eval: True

View File

@@ -0,0 +1,38 @@
# Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks
## Model Zoo
| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | 下载 | 配置文件 |
| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
| ResNet50 | Faster | 1 | 1x | ---- | 36.7 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_1x_coco.yml) |
| ResNet50-vd | Faster | 1 | 1x | ---- | 37.6 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_1x_coco.yml) |
| ResNet101 | Faster | 1 | 1x | ---- | 39.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_1x_coco.pdparams) | [配置文件](./faster_rcnn_r101_1x_coco.yml) |
| ResNet34-FPN | Faster | 1 | 1x | ---- | 37.8 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r34_fpn_1x_coco.yml) |
| ResNet34-FPN-MultiScaleTest | Faster | 1 | 1x | ---- | 38.2 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_fpn_multiscaletest_1x_coco.pdparams) | [配置文件](./faster_rcnn_r34_fpn_multiscaletest_1x_coco.yml) |
| ResNet34-vd-FPN | Faster | 1 | 1x | ---- | 38.5 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_vd_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r34_vd_fpn_1x_coco.yml) |
| ResNet50-FPN | Faster | 1 | 1x | ---- | 38.4 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_fpn_1x_coco.yml) |
| ResNet50-FPN | Faster | 1 | 2x | ---- | 40.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_r50_fpn_2x_coco.yml) |
| ResNet50-vd-FPN | Faster | 1 | 1x | ---- | 39.5 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_1x_coco.yml) |
| ResNet50-vd-FPN | Faster | 1 | 2x | ---- | 40.8 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_2x_coco.yml) |
| ResNet101-FPN | Faster | 1 | 2x | ---- | 41.4 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_r101_fpn_2x_coco.yml) |
| ResNet101-vd-FPN | Faster | 1 | 1x | ---- | 42.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r101_vd_fpn_1x_coco.yml) |
| ResNet101-vd-FPN | Faster | 1 | 2x | ---- | 43.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_vd_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_r101_vd_fpn_2x_coco.yml) |
| ResNeXt101-vd-FPN | Faster | 1 | 1x | ---- | 43.4 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_x101_vd_64x4d_fpn_1x_coco.yml) |
| ResNeXt101-vd-FPN | Faster | 1 | 2x | ---- | 44.0 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_x101_vd_64x4d_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_x101_vd_64x4d_fpn_2x_coco.yml) |
| ResNet50-vd-SSLDv2-FPN | Faster | 1 | 1x | ---- | 41.4 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
| ResNet50-vd-SSLDv2-FPN | Faster | 1 | 2x | ---- | 42.3 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
| Swin-Tiny-FPN | Faster | 2 | 1x | ---- | 42.6 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_swin_tiny_fpn_1x_coco.yml) |
| Swin-Tiny-FPN | Faster | 2 | 2x | ---- | 44.8 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_swin_tiny_fpn_2x_coco.yml) |
| Swin-Tiny-FPN | Faster | 2 | 3x | ---- | 45.3 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_3x_coco.pdparams) | [配置文件](../swin/faster_rcnn_swin_tiny_fpn_3x_coco.yml) |
## Citations
```
@article{Ren_2017,
title={Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks},
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
publisher={Institute of Electrical and Electronics Engineers (IEEE)},
author={Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun, Jian},
year={2017},
month={Jun},
}
```

View File

@@ -0,0 +1,40 @@
worker_num: 2
TrainReader:
sample_transforms:
- Decode: {}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: true
drop_last: true
collate_batch: false
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: false
drop_last: false
TestReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: false
drop_last: false

View File

@@ -0,0 +1,66 @@
architecture: FasterRCNN
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
FasterRCNN:
backbone: ResNet
rpn_head: RPNHead
bbox_head: BBoxHead
# post process
bbox_post_process: BBoxPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [2]
num_stages: 3
RPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
anchor_sizes: [32, 64, 128, 256, 512]
strides: [16]
rpn_target_assign:
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
use_random: True
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 12000
post_nms_top_n: 2000
topk_after_collect: False
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 6000
post_nms_top_n: 1000
BBoxHead:
head: Res5Head
roi_extractor:
resolution: 14
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
with_pool: true
BBoxAssigner:
batch_size_per_im: 512
bg_thresh: 0.5
fg_thresh: 0.5
fg_fraction: 0.25
use_random: True
BBoxPostProcess:
decode: RCNNBox
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5

View File

@@ -0,0 +1,73 @@
architecture: FasterRCNN
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
FasterRCNN:
backbone: ResNet
neck: FPN
rpn_head: RPNHead
bbox_head: BBoxHead
# post process
bbox_post_process: BBoxPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
FPN:
out_channel: 256
RPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
anchor_sizes: [[32], [64], [128], [256], [512]]
strides: [4, 8, 16, 32, 64]
rpn_target_assign:
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
use_random: True
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 1000
topk_after_collect: True
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
BBoxHead:
head: TwoFCHead
roi_extractor:
resolution: 7
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
BBoxAssigner:
batch_size_per_im: 512
bg_thresh: 0.5
fg_thresh: 0.5
fg_fraction: 0.25
use_random: True
TwoFCHead:
out_channel: 1024
BBoxPostProcess:
decode: RCNNBox
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5

View File

@@ -0,0 +1,41 @@
worker_num: 2
TrainReader:
sample_transforms:
- Decode: {}
- RandomResizeCrop: {resizes: [400, 500, 600], cropsizes: [[384, 600], ], prob: 0.5}
- RandomResize: {target_size: [[480, 1333], [512, 1333], [544, 1333], [576, 1333], [608, 1333], [640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: True, interp: 2}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 2
shuffle: true
drop_last: true
collate_batch: false
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: false
drop_last: false
TestReader:
inputs_def:
image_shape: [-1, 3, 640, 640]
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: 640, keep_ratio: True}
- Pad: {size: 640}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_size: 1
shuffle: false
drop_last: false

View File

@@ -0,0 +1,70 @@
architecture: FasterRCNN
# pretrain_weights: # rewrite in SwinTransformer.pretrained in ppdet/modeling/backbones/swin_transformer.py
FasterRCNN:
backbone: SwinTransformer
neck: FPN
rpn_head: RPNHead
bbox_head: BBoxHead
bbox_post_process: BBoxPostProcess
SwinTransformer:
arch: 'swin_T_224'
ape: false
drop_path_rate: 0.1
patch_norm: true
out_indices: [0, 1, 2, 3]
pretrained: https://paddledet.bj.bcebos.com/models/pretrained/swin_tiny_patch4_window7_224_22kto1k_pretrained.pdparams
FPN:
out_channel: 256
RPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
anchor_sizes: [[32], [64], [128], [256], [512]]
strides: [4, 8, 16, 32, 64]
rpn_target_assign:
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
use_random: True
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 1000
topk_after_collect: True
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
BBoxHead:
head: TwoFCHead
roi_extractor:
resolution: 7
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
BBoxAssigner:
batch_size_per_im: 512
bg_thresh: 0.5
fg_thresh: 0.5
fg_fraction: 0.25
use_random: True
TwoFCHead:
out_channel: 1024
BBoxPostProcess:
decode: RCNNBox
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5

View File

@@ -0,0 +1,40 @@
worker_num: 2
TrainReader:
sample_transforms:
- Decode: {}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: -1}
batch_size: 1
shuffle: true
drop_last: true
collate_batch: false
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: -1}
batch_size: 1
shuffle: false
drop_last: false
TestReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: -1}
batch_size: 1
shuffle: false
drop_last: false

View File

@@ -0,0 +1,19 @@
epoch: 12
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [8, 11]
- !LinearWarmup
start_factor: 0.1
steps: 1000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2

View File

@@ -0,0 +1,20 @@
epoch: 12
LearningRate:
base_lr: 0.0001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [8, 11]
- !LinearWarmup
start_factor: 0.1
steps: 1000
OptimizerBuilder:
clip_grad_by_norm: 1.0
optimizer:
type: AdamW
weight_decay: 0.05
param_groups:
- params: ['absolute_pos_embed', 'relative_position_bias_table', 'norm']
weight_decay: 0.0

View File

@@ -0,0 +1,14 @@
_BASE_: [
'faster_rcnn_r50_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
weights: output/faster_rcnn_r101_1x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 101
norm_type: bn
freeze_at: 0
return_idx: [2]
num_stages: 3

View File

@@ -0,0 +1,14 @@
_BASE_: [
'faster_rcnn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
weights: output/faster_rcnn_r101_fpn_1x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 101
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4

View File

@@ -0,0 +1,25 @@
_BASE_: [
'faster_rcnn_r50_fpn_1x_coco.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
weights: output/faster_rcnn_r101_fpn_2x_coco/model_final
ResNet:
# index 0 stands for res2
depth: 101
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
epoch: 24
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [16, 22]
- !LinearWarmup
start_factor: 0.1
steps: 1000

Some files were not shown because too many files have changed in this diff Show More