更换文档检测模型

2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions
--- a/paddle_detection/configs/cascade_rcnn/README.md
+++ b/paddle_detection/configs/cascade_rcnn/README.md
@@ -0,0 +1,28 @@
+# Cascade R-CNN: High Quality Object Detection and Instance Segmentation
+
+## Model Zoo
+
+| 骨架网络             | 网络类型       | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | Mask AP |                           下载                          | 配置文件 |
+| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----: | :-----------------------------------------------------: | :-----: |
+| ResNet50-FPN         | Cascade Faster         |    1    |   1x    |     ----     |  41.1  |    -    | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.yml) |
+| ResNet50-FPN         | Cascade Mask         |    1    |   1x    |     ----     |  41.8  |    36.3    | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.yml) |
+| ResNet50-vd-SSLDv2-FPN | Cascade Faster         |    1    |   1x    |     ----     |  44.4  |    -    | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
+| ResNet50-vd-SSLDv2-FPN | Cascade Faster         |    1    |   2x    |     ----     |  45.0  |    -    | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
+| ResNet50-vd-SSLDv2-FPN | Cascade Mask         |    1    |   1x    |     ----     |  44.9  |    39.1    | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
+| ResNet50-vd-SSLDv2-FPN | Cascade Mask         |    1    |   2x    |     ----     |  45.7  |    39.7    | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
+
+
+## Citations
+```
+@article{Cai_2019,
+   title={Cascade R-CNN: High Quality Object Detection and Instance Segmentation},
+   ISSN={1939-3539},
+   url={http://dx.doi.org/10.1109/tpami.2019.2956516},
+   DOI={10.1109/tpami.2019.2956516},
+   journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
+   publisher={Institute of Electrical and Electronics Engineers (IEEE)},
+   author={Cai, Zhaowei and Vasconcelos, Nuno},
+   year={2019},
+   pages={1–1}
+}
+```
--- a/paddle_detection/configs/cascade_rcnn/_base_/cascade_fpn_reader.yml
+++ b/paddle_detection/configs/cascade_rcnn/_base_/cascade_fpn_reader.yml
@@ -0,0 +1,40 @@
+worker_num: 2
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
--- a/paddle_detection/configs/cascade_rcnn/_base_/cascade_mask_fpn_reader.yml
+++ b/paddle_detection/configs/cascade_rcnn/_base_/cascade_mask_fpn_reader.yml
@@ -0,0 +1,40 @@
+worker_num: 2
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
--- a/paddle_detection/configs/cascade_rcnn/_base_/cascade_mask_rcnn_r50_fpn.yml
+++ b/paddle_detection/configs/cascade_rcnn/_base_/cascade_mask_rcnn_r50_fpn.yml
@@ -0,0 +1,97 @@
+architecture: CascadeRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+
+CascadeRCNN:
+  backbone: ResNet
+  neck: FPN
+  rpn_head: RPNHead
+  bbox_head: CascadeHead
+  mask_head: MaskHead
+  # post process
+  bbox_post_process: BBoxPostProcess
+  mask_post_process: MaskPostProcess
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+
+FPN:
+  out_channel: 256
+
+RPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [[32], [64], [128], [256], [512]]
+    strides: [4, 8, 16, 32, 64]
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 2000
+    topk_after_collect: True
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+
+CascadeHead:
+  head: CascadeTwoFCHead
+  roi_extractor:
+    resolution: 7
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bg_thresh: 0.5
+  fg_thresh: 0.5
+  fg_fraction: 0.25
+  cascade_iou: [0.5, 0.6, 0.7]
+  use_random: True
+
+CascadeTwoFCHead:
+  out_channel: 1024
+
+BBoxPostProcess:
+  decode:
+    name: RCNNBox
+    prior_box_var: [30.0, 30.0, 15.0, 15.0]
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5
+
+
+MaskHead:
+  head: MaskFeat
+  roi_extractor:
+    resolution: 14
+    sampling_ratio: 0
+    aligned: True
+  mask_assigner: MaskAssigner
+  share_bbox_feat: False
+
+MaskFeat:
+  num_convs: 4
+  out_channel: 256
+
+MaskAssigner:
+  mask_resolution: 28
+
+MaskPostProcess:
+  binary_thresh: 0.5
--- a/paddle_detection/configs/cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml
+++ b/paddle_detection/configs/cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml
@@ -0,0 +1,75 @@
+architecture: CascadeRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+
+CascadeRCNN:
+  backbone: ResNet
+  neck: FPN
+  rpn_head: RPNHead
+  bbox_head: CascadeHead
+  # post process
+  bbox_post_process: BBoxPostProcess
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+
+FPN:
+  out_channel: 256
+
+RPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [[32], [64], [128], [256], [512]]
+    strides: [4, 8, 16, 32, 64]
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 2000
+    topk_after_collect: True
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+
+CascadeHead:
+  head: CascadeTwoFCHead
+  roi_extractor:
+    resolution: 7
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bg_thresh: 0.5
+  fg_thresh: 0.5
+  fg_fraction: 0.25
+  cascade_iou: [0.5, 0.6, 0.7]
+  use_random: True
+
+CascadeTwoFCHead:
+  out_channel: 1024
+
+BBoxPostProcess:
+  decode:
+    name: RCNNBox
+    prior_box_var: [30.0, 30.0, 15.0, 15.0]
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5
--- a/paddle_detection/configs/cascade_rcnn/_base_/optimizer_1x.yml
+++ b/paddle_detection/configs/cascade_rcnn/_base_/optimizer_1x.yml
@@ -0,0 +1,19 @@
+epoch: 12
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.001
+    steps: 1000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
--- a/paddle_detection/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.yml
+++ b/paddle_detection/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.yml
@@ -0,0 +1,8 @@
+_BASE_: [
+  '../datasets/coco_instance.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/cascade_mask_rcnn_r50_fpn.yml',
+  '_base_/cascade_mask_fpn_reader.yml',
+]
+weights: output/cascade_mask_rcnn_r50_fpn_1x_coco/model_final
--- a/paddle_detection/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml
+++ b/paddle_detection/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml
@@ -0,0 +1,18 @@
+_BASE_: [
+  '../datasets/coco_instance.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/cascade_mask_rcnn_r50_fpn.yml',
+  '_base_/cascade_mask_fpn_reader.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
+weights: output/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
+
+ResNet:
+  depth: 50
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  lr_mult_list: [0.05, 0.05, 0.1, 0.15]
--- a/paddle_detection/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml
+++ b/paddle_detection/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml
@@ -0,0 +1,29 @@
+_BASE_: [
+  '../datasets/coco_instance.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/cascade_mask_rcnn_r50_fpn.yml',
+  '_base_/cascade_mask_fpn_reader.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
+weights: output/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
+
+ResNet:
+  depth: 50
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  lr_mult_list: [0.05, 0.05, 0.1, 0.15]
+
+epoch: 24
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [12, 22]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000
--- a/paddle_detection/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.yml
+++ b/paddle_detection/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.yml
@@ -0,0 +1,8 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/cascade_rcnn_r50_fpn.yml',
+  '_base_/cascade_fpn_reader.yml',
+]
+weights: output/cascade_rcnn_r50_fpn_1x_coco/model_final
--- a/paddle_detection/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.yml
+++ b/paddle_detection/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.yml
@@ -0,0 +1,18 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/cascade_rcnn_r50_fpn.yml',
+  '_base_/cascade_fpn_reader.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
+weights: output/cascade_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
+
+ResNet:
+  depth: 50
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  lr_mult_list: [0.05, 0.05, 0.1, 0.15]
--- a/paddle_detection/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.yml
+++ b/paddle_detection/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.yml
@@ -0,0 +1,29 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/cascade_rcnn_r50_fpn.yml',
+  '_base_/cascade_fpn_reader.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
+weights: output/cascade_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
+
+ResNet:
+  depth: 50
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  lr_mult_list: [0.05, 0.05, 0.1, 0.15]
+
+epoch: 24
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [12, 22]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000
--- a/paddle_detection/configs/centernet/README.md
+++ b/paddle_detection/configs/centernet/README.md
@@ -0,0 +1,37 @@
+English | [简体中文](README_cn.md)
+
+# CenterNet (CenterNet: Objects as Points)
+
+## Table of Contents
+- [Introduction](#Introduction)
+- [Model Zoo](#Model_Zoo)
+- [Citations](#Citations)
+
+## Introduction
+
+[CenterNet](http://arxiv.org/abs/1904.07850) is an Anchor Free detector, which model an object as a single point -- the center point of its bounding box. The detector uses keypoint estimation to find center points and regresses to all other object properties. The center point based approach, CenterNet, is end-to-end differentiable, simpler, faster, and more accurate than corresponding bounding box based detectors.
+
+## Model Zoo
+
+### CenterNet Results on COCO-val 2017
+
+| backbone       | input shape | mAP   |    FPS    | download | config |
+| :--------------| :------- |  :----: | :------: | :----: |:-----: |
+| DLA-34(paper)  | 512x512 |  37.4  |     -   |    -   |   -    |
+| DLA-34         | 512x512 |  37.6  |     -   | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_dla34_140e_coco.pdparams) | [config](./centernet_dla34_140e_coco.yml) |
+| ResNet50 + DLAUp  | 512x512 |  38.9  |     -   | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_r50_140e_coco.pdparams) | [config](./centernet_r50_140e_coco.yml) |
+| MobileNetV1 + DLAUp  | 512x512 |  28.2  |     -   | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv1_140e_coco.pdparams) | [config](./centernet_mbv1_140e_coco.yml) |
+| MobileNetV3_small + DLAUp  | 512x512 | 17  |     -   | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv3_small_140e_coco.pdparams) | [config](./centernet_mbv3_small_140e_coco.yml) |
+| MobileNetV3_large + DLAUp  | 512x512 |  27.1  |     -   | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv3_large_140e_coco.pdparams) | [config](./centernet_mbv3_large_140e_coco.yml) |
+| ShuffleNetV2 + DLAUp  | 512x512 | 23.8  |     -   | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_shufflenetv2_140e_coco.pdparams) | [config](./centernet_shufflenetv2_140e_coco.yml) |
+
+
+## Citations
+```
+@article{zhou2019objects,
+  title={Objects as points},
+  author={Zhou, Xingyi and Wang, Dequan and Kr{\"a}henb{\"u}hl, Philipp},
+  journal={arXiv preprint arXiv:1904.07850},
+  year={2019}
+}
+```
--- a/paddle_detection/configs/centernet/README_cn.md
+++ b/paddle_detection/configs/centernet/README_cn.md
@@ -0,0 +1,36 @@
+简体中文 | [English](README.md)
+
+# CenterNet (CenterNet: Objects as Points)
+
+## 内容
+- [简介](#简介)
+- [模型库](#模型库)
+- [引用](#引用)
+
+## 内容
+
+[CenterNet](http://arxiv.org/abs/1904.07850)是Anchor Free检测器，将物体表示为一个目标框中心点。CenterNet使用关键点检测的方式定位中心点并回归物体的其他属性。CenterNet是以中心点为基础的检测方法，是端到端可训练的，并且相较于基于anchor的检测器更加检测高效。
+
+## 模型库
+
+### CenterNet在COCO-val 2017上结果
+
+| 骨干网络       | 输入尺寸 | mAP   |    FPS    | 下载链接 | 配置文件 |
+| :--------------| :------- |  :----: | :------: | :----: |:-----: |
+| DLA-34(paper)  | 512x512 |  37.4  |     -   |    -   |   -    |
+| DLA-34         | 512x512 |  37.6  |     -   | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_dla34_140e_coco.pdparams) | [配置文件](./centernet_dla34_140e_coco.yml) |
+| ResNet50 + DLAUp  | 512x512 |  38.9  |     -   | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_r50_140e_coco.pdparams) | [配置文件](./centernet_r50_140e_coco.yml) |
+| MobileNetV1 + DLAUp  | 512x512 |  28.2  |     -   | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv1_140e_coco.pdparams) | [配置文件](./centernet_mbv1_140e_coco.yml) |
+| MobileNetV3_small + DLAUp  | 512x512 | 17  |     -   | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv3_small_140e_coco.pdparams) | [配置文件](./centernet_mbv3_small_140e_coco.yml) |
+| MobileNetV3_large + DLAUp  | 512x512 |  27.1  |     -   | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv3_large_140e_coco.pdparams) | [配置文件](./centernet_mbv3_large_140e_coco.yml) |
+| ShuffleNetV2 + DLAUp  | 512x512 | 23.8  |     -   | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_shufflenetv2_140e_coco.pdparams) | [配置文件](./centernet_shufflenetv2_140e_coco.yml) |
+
+## 引用
+```
+@article{zhou2019objects,
+  title={Objects as points},
+  author={Zhou, Xingyi and Wang, Dequan and Kr{\"a}henb{\"u}hl, Philipp},
+  journal={arXiv preprint arXiv:1904.07850},
+  year={2019}
+}
+```
--- a/paddle_detection/configs/centernet/_base_/centernet_dla34.yml
+++ b/paddle_detection/configs/centernet/_base_/centernet_dla34.yml
@@ -0,0 +1,22 @@
+architecture: CenterNet
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/DLA34_pretrain.pdparams
+
+CenterNet:
+  backbone: DLA
+  neck: CenterNetDLAFPN
+  head: CenterNetHead
+  post_process: CenterNetPostProcess
+
+DLA:
+  depth: 34
+
+CenterNetDLAFPN:
+  down_ratio: 4
+
+CenterNetHead:
+  head_planes: 256
+  regress_ltrb: False
+
+CenterNetPostProcess:
+  max_per_img: 100
+  regress_ltrb: False
--- a/paddle_detection/configs/centernet/_base_/centernet_r50.yml
+++ b/paddle_detection/configs/centernet/_base_/centernet_r50.yml
@@ -0,0 +1,34 @@
+architecture: CenterNet
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+
+CenterNet:
+  backbone: ResNet
+  neck: CenterNetDLAFPN
+  head: CenterNetHead
+  post_process: CenterNetPostProcess
+
+ResNet:
+  depth: 50
+  variant: d
+  return_idx: [0, 1, 2, 3]
+  freeze_at: -1
+  norm_decay: 0.
+  dcn_v2_stages: [3]
+
+
+CenterNetDLAFPN:
+  first_level: 0
+  last_level: 4
+  down_ratio: 4
+  dcn_v2: False
+
+CenterNetHead:
+  head_planes: 256
+  regress_ltrb: False
+
+CenterNetPostProcess:
+  max_per_img: 100
+  regress_ltrb: False
--- a/paddle_detection/configs/centernet/_base_/centernet_reader.yml
+++ b/paddle_detection/configs/centernet/_base_/centernet_reader.yml
@@ -0,0 +1,35 @@
+worker_num: 4
+TrainReader:
+  inputs_def:
+    image_shape: [3, 512, 512]
+  sample_transforms:
+    - Decode: {}
+    - FlipWarpAffine: {keep_res: False, input_h: 512, input_w: 512, use_random: True}
+    - CenterRandColor: {}
+    - Lighting: {eigval: [0.2141788, 0.01817699, 0.00341571], eigvec: [[-0.58752847, -0.69563484, 0.41340352], [-0.5832747, 0.00994535, -0.81221408], [-0.56089297, 0.71832671, 0.41158938]]}
+    - NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834], is_scale: False}
+    - Permute: {}
+    - Gt2CenterNetTarget: {down_ratio: 4, max_objs: 128}
+  batch_size: 16
+  shuffle: True
+  drop_last: True
+  use_shared_memory: True
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - WarpAffine: {keep_res: True, input_h: 512, input_w: 512}
+    - NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834]}
+    - Permute: {}
+  batch_size: 1
+
+
+TestReader:
+  inputs_def:
+    image_shape: [3, 512, 512]
+  sample_transforms:
+    - Decode: {}
+    - WarpAffine: {keep_res: True, input_h: 512, input_w: 512}
+    - NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834], is_scale: True}
+    - Permute: {}
+  batch_size: 1
--- a/paddle_detection/configs/centernet/_base_/optimizer_140e.yml
+++ b/paddle_detection/configs/centernet/_base_/optimizer_140e.yml
@@ -0,0 +1,14 @@
+epoch: 140
+
+LearningRate:
+  base_lr: 0.0005
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [90, 120]
+    use_warmup: False
+
+OptimizerBuilder:
+  optimizer:
+    type: Adam
+  regularizer: NULL
--- a/paddle_detection/configs/centernet/centernet_dla34_140e_coco.yml
+++ b/paddle_detection/configs/centernet/centernet_dla34_140e_coco.yml
@@ -0,0 +1,9 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/optimizer_140e.yml',
+  '_base_/centernet_dla34.yml',
+  '_base_/centernet_reader.yml',
+]
+
+weights: output/centernet_dla34_140e_coco/model_final
--- a/paddle_detection/configs/centernet/centernet_mbv1_140e_coco.yml
+++ b/paddle_detection/configs/centernet/centernet_mbv1_140e_coco.yml
@@ -0,0 +1,21 @@
+_BASE_: [
+  'centernet_r50_140e_coco.yml'
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV1_pretrained.pdparams
+weights: output/centernet_mbv1_140e_coco/model_final
+
+CenterNet:
+  backbone: MobileNet
+  neck: CenterNetDLAFPN
+  head: CenterNetHead
+  post_process: CenterNetPostProcess
+
+MobileNet:
+  scale: 1.
+  with_extra_blocks: false
+  extra_block_filters: []
+  feature_maps: [3, 5, 11, 13]
+
+TrainReader:
+  batch_size: 32
--- a/paddle_detection/configs/centernet/centernet_mbv3_large_140e_coco.yml
+++ b/paddle_detection/configs/centernet/centernet_mbv3_large_140e_coco.yml
@@ -0,0 +1,22 @@
+_BASE_: [
+  'centernet_r50_140e_coco.yml'
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams
+weights: output/centernet_mbv3_large_140e_coco/model_final
+
+CenterNet:
+  backbone: MobileNetV3
+  neck: CenterNetDLAFPN
+  head: CenterNetHead
+  post_process: CenterNetPostProcess
+
+MobileNetV3:
+  model_name: large
+  scale: 1.
+  with_extra_blocks: false
+  extra_block_filters: []
+  feature_maps: [4, 7, 13, 16]
+
+TrainReader:
+  batch_size: 32
--- a/paddle_detection/configs/centernet/centernet_mbv3_small_140e_coco.yml
+++ b/paddle_detection/configs/centernet/centernet_mbv3_small_140e_coco.yml
@@ -0,0 +1,28 @@
+_BASE_: [
+  'centernet_r50_140e_coco.yml'
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_small_x1_0_ssld_pretrained.pdparams
+weights: output/centernet_mbv3_small_140e_coco/model_final
+
+CenterNet:
+  backbone: MobileNetV3
+  neck: CenterNetDLAFPN
+  head: CenterNetHead
+  post_process: CenterNetPostProcess
+
+MobileNetV3:
+  model_name: small
+  scale: 1.
+  with_extra_blocks: false
+  extra_block_filters: []
+  feature_maps: [4, 9, 12]
+
+CenterNetDLAFPN:
+  first_level: 0
+  last_level: 3
+  down_ratio: 8
+  dcn_v2: False
+
+TrainReader:
+  batch_size: 32
--- a/paddle_detection/configs/centernet/centernet_r50_140e_coco.yml
+++ b/paddle_detection/configs/centernet/centernet_r50_140e_coco.yml
@@ -0,0 +1,9 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/optimizer_140e.yml',
+  '_base_/centernet_r50.yml',
+  '_base_/centernet_reader.yml',
+]
+
+weights: output/centernet_r50_140e_coco/model_final
--- a/paddle_detection/configs/centernet/centernet_shufflenetv2_140e_coco.yml
+++ b/paddle_detection/configs/centernet/centernet_shufflenetv2_140e_coco.yml
@@ -0,0 +1,33 @@
+_BASE_: [
+  'centernet_r50_140e_coco.yml'
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ShuffleNetV2_x1_0_pretrained.pdparams
+weights: output/centernet_shufflenetv2_140e_coco/model_final
+
+CenterNet:
+  backbone: ShuffleNetV2
+  neck: CenterNetDLAFPN
+  head: CenterNetHead
+  post_process: CenterNetPostProcess
+
+ShuffleNetV2:
+  scale: 1.0
+  feature_maps: [5, 13, 17]
+  act: leaky_relu
+
+CenterNetDLAFPN:
+  first_level: 0
+  last_level: 3
+  down_ratio: 8
+  dcn_v2: False
+
+TrainReader:
+  batch_size: 32
+
+TestReader:
+  sample_transforms:
+    - Decode: {}
+    - WarpAffine: {keep_res: False, input_h: 512, input_w: 512}
+    - NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834]}
+    - Permute: {}
--- a/paddle_detection/configs/clrnet/README.cn.md
+++ b/paddle_detection/configs/clrnet/README.cn.md
@@ -0,0 +1,68 @@
+简体中文 | [English](README.md)
+
+# CLRNet (CLRNet: Cross Layer Refinement Network for Lane Detection)
+
+## 目录
+- [简介](#简介)
+- [模型库](#模型库)
+- [引用](#引用)
+
+## 介绍
+
+[CLRNet](https://arxiv.org/abs/2203.10350)是一个车道线检测模型。CLRNet模型设计了车道线检测的直线先验轨迹，车道线iou以及nms方法，融合提取车道线轨迹的上下文高层特征与底层特征，利用FPN多尺度进行refine，在车道线检测相关数据集取得了SOTA的性能。
+
+## 模型库
+
+### CLRNet在CUlane上结果
+
+| 骨架网络       | mF1 | F1@50   |    F1@75    | 下载链接 | 配置文件 |训练日志|
+| :--------------| :------- |  :----: | :------: | :----: |:-----: |:-----: |
+| ResNet-18         | 54.98 |  79.46  |    62.10   | [下载链接](https://paddledet.bj.bcebos.com/models/clrnet_resnet18_culane.pdparams) | [配置文件](./clrnet_resnet18_culane.yml) |[训练日志](https://bj.bcebos.com/v1/paddledet/logs/train_clrnet_r18_15_culane.log)|
+
+### 数据集下载
+下载[CULane数据集](https://xingangpan.github.io/projects/CULane.html)并解压到`dataset/culane`目录。
+
+您的数据集目录结构如下：
+```shell
+culane/driver_xx_xxframe    # data folders x6
+culane/laneseg_label_w16    # lane segmentation labels
+culane/list                 # data lists
+```
+如果您使用百度云链接下载，注意确保`driver_23_30frame_part1.tar.gz`和`driver_23_30frame_part2.tar.gz`解压后的文件都在`driver_23_30frame`目录下。
+
+现已将用于测试的小数据集上传到PaddleDetection，可通过运行训练脚本，自动下载并解压数据，如需复现结果请下载链接中的全量数据集训练。
+
+### 训练
+- GPU单卡训练
+```shell
+python tools/train.py -c configs/clrnet/clr_resnet18_culane.yml
+```
+- GPU多卡训练
+```shell
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/clrnet/clr_resnet18_culane.yml
+```
+
+### 评估
+```shell
+python tools/eval.py -c configs/clrnet/clr_resnet18_culane.yml -o weights=output/clr_resnet18_culane/model_final.pdparams
+```
+
+### 预测
+```shell
+python tools/infer_culane.py -c configs/clrnet/clr_resnet18_culane.yml -o weights=output/clr_resnet18_culane/model_final.pdparams --infer_img=demo/lane00000.jpg
+```
+
+注意：预测功能暂不支持模型静态图推理部署。
+
+## 引用
+```
+@InProceedings{Zheng_2022_CVPR,
+    author    = {Zheng, Tu and Huang, Yifei and Liu, Yang and Tang, Wenjian and Yang, Zheng and Cai, Deng and He, Xiaofei},
+    title     = {CLRNet: Cross Layer Refinement Network for Lane Detection},
+    booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+    month     = {June},
+    year      = {2022},
+    pages     = {898-907}
+}
+```
--- a/paddle_detection/configs/clrnet/README.md
+++ b/paddle_detection/configs/clrnet/README.md
@@ -0,0 +1,68 @@
+English | [简体中文](README_cn.md)
+
+# CLRNet (CLRNet: Cross Layer Refinement Network for Lane Detection)
+
+## Table of Contents
+- [Introduction](#Introduction)
+- [Model Zoo](#Model_Zoo)
+- [Citations](#Citations)
+
+## Introduction
+
+[CLRNet](https://arxiv.org/abs/2203.10350) is a lane detection model. The CLRNet model is designed with line prior for lane detection, line iou loss as well as nms method, fused to extract contextual high-level features of lane line with low-level features, and refined by FPN multi-scale. Finally, the model achieved SOTA performance in lane detection datasets.
+
+## Model Zoo
+
+### CLRNet Results on CULane dataset
+
+| backbone       | mF1 | F1@50   |    F1@75    | download | config |
+| :--------------| :------- |  :----: | :------: | :----: |:-----: |
+| ResNet-18         | 54.98 |  79.46  |    62.10   | [model](https://paddledet.bj.bcebos.com/models/clrnet_resnet18_culane.pdparams) | [config](./clrnet_resnet18_culane.yml) |
+
+### Download
+Download [CULane](https://xingangpan.github.io/projects/CULane.html). Then extract them to `dataset/culane`.
+
+For CULane, you should have structure like this:
+```shell
+culane/driver_xx_xxframe    # data folders x6
+culane/laneseg_label_w16    # lane segmentation labels
+culane/list                 # data lists
+```
+If you use Baidu Cloud, make sure that images in `driver_23_30frame_part1.tar.gz` and `driver_23_30frame_part2.tar.gz` are located in one folder `driver_23_30frame` instead of two seperate folders after you decompress them.
+
+Now we have uploaded a small subset of CULane dataset to PaddleDetection for code checking. You can simply run the training script below to download it automatically. If you want to implement the results, you need to download the full dataset at th link for training.
+
+### Training
+- single GPU
+```shell
+python tools/train.py -c configs/clrnet/clr_resnet18_culane.yml
+```
+- multi GPU
+```shell
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/clrnet/clr_resnet18_culane.yml
+```
+
+### Evaluation
+```shell
+python tools/eval.py -c configs/clrnet/clr_resnet18_culane.yml -o weights=output/clr_resnet18_culane/model_final.pdparams
+```
+
+### Inference
+```shell
+python tools/infer_culane.py -c configs/clrnet/clr_resnet18_culane.yml -o weights=output/clr_resnet18_culane/model_final.pdparams --infer_img=demo/lane00000.jpg
+```
+
+Notice: The inference phase does not support static model graph deploy at present.
+
+## Citations
+```
+@InProceedings{Zheng_2022_CVPR,
+    author    = {Zheng, Tu and Huang, Yifei and Liu, Yang and Tang, Wenjian and Yang, Zheng and Cai, Deng and He, Xiaofei},
+    title     = {CLRNet: Cross Layer Refinement Network for Lane Detection},
+    booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+    month     = {June},
+    year      = {2022},
+    pages     = {898-907}
+}
+```
--- a/paddle_detection/configs/clrnet/_base_/clrnet_r18_fpn.yml
+++ b/paddle_detection/configs/clrnet/_base_/clrnet_r18_fpn.yml
@@ -0,0 +1,41 @@
+architecture: CLRNet
+
+CLRNet:
+  backbone: CLRResNet
+  neck: CLRFPN
+  clr_head: CLRHead
+
+CLRResNet:
+  resnet: 'resnet18'
+  pretrained: True
+
+CLRFPN:
+  in_channels: [128,256,512]
+  out_channel: 64
+  extra_stage: 0
+
+CLRHead:
+  prior_feat_channels: 64
+  fc_hidden_dim: 64
+  num_priors: 192
+  num_fc: 2
+  refine_layers: 3
+  sample_points: 36
+  loss: CLRNetLoss
+  conf_threshold: 0.4
+  nms_thres: 0.8
+
+CLRNetLoss:
+  cls_loss_weight : 2.0
+  xyt_loss_weight : 0.2
+  iou_loss_weight : 2.0
+  seg_loss_weight : 1.0
+  refine_layers : 3
+  ignore_label: 255
+  bg_weight: 0.4
+
+# for visualize lane detection results
+sample_y:
+  start: 589
+  end: 230
+  step: -20
--- a/paddle_detection/configs/clrnet/_base_/clrnet_reader.yml
+++ b/paddle_detection/configs/clrnet/_base_/clrnet_reader.yml
@@ -0,0 +1,37 @@
+worker_num: 10
+
+img_h: &img_h 320
+img_w: &img_w 800
+ori_img_h: &ori_img_h 590
+ori_img_w: &ori_img_w 1640
+num_points: &num_points 72
+max_lanes: &max_lanes 4
+
+TrainReader:
+  batch_size: 24
+  batch_transforms:
+    - CULaneTrainProcess: {img_h: *img_h, img_w: *img_w}
+    - CULaneDataProcess: {num_points: *num_points, max_lanes: *max_lanes, img_w: *img_w, img_h: *img_h}
+  shuffle: True
+  drop_last: False
+
+
+
+
+EvalReader:
+  batch_size: 24
+  batch_transforms:
+    - CULaneResize: {prob: 1.0, img_h: *img_h, img_w: *img_w}
+    - CULaneDataProcess: {num_points: *num_points, max_lanes: *max_lanes, img_w: *img_w, img_h: *img_h}
+  shuffle: False
+  drop_last: False
+
+
+
+TestReader:
+  batch_size: 24
+  batch_transforms:
+    - CULaneResize: {prob: 1.0, img_h: *img_h, img_w: *img_w}
+    - CULaneDataProcess: {num_points: *num_points, max_lanes: *max_lanes, img_w: *img_w, img_h: *img_h}
+  shuffle: False
+  drop_last: False
--- a/paddle_detection/configs/clrnet/_base_/optimizer_1x.yml
+++ b/paddle_detection/configs/clrnet/_base_/optimizer_1x.yml
@@ -0,0 +1,14 @@
+epoch: 15
+snapshot_epoch: 5
+
+LearningRate:
+  base_lr: 0.6e-3
+  schedulers:
+  - !CosineDecay
+    max_epochs: 15
+    use_warmup: False
+
+OptimizerBuilder:
+  regularizer: False
+  optimizer:
+    type: AdamW
--- a/paddle_detection/configs/clrnet/clrnet_resnet18_culane.yml
+++ b/paddle_detection/configs/clrnet/clrnet_resnet18_culane.yml
@@ -0,0 +1,9 @@
+_BASE_: [
+  '../datasets/culane.yml',
+  '_base_/clrnet_reader.yml',
+  '_base_/clrnet_r18_fpn.yml',
+  '_base_/optimizer_1x.yml',
+  '../runtime.yml'
+]
+
+weights: output/clr_resnet18_culane/model_final
--- a/paddle_detection/configs/convnext/README.md
+++ b/paddle_detection/configs/convnext/README.md
@@ -0,0 +1,20 @@
+# ConvNeXt (A ConvNet for the 2020s)
+
+## 模型库
+### ConvNeXt on COCO
+
+| 网络网络                  | 输入尺寸 | 图片数/GPU | 学习率策略 | mAP<sup>val<br>0.5:0.95 | mAP<sup>val<br>0.5 | Params(M) | FLOPs(G) |    下载链接       | 配置文件 |
+| :------------- | :------- | :-------: | :------: | :------------: | :---------------------: | :----------------: |:---------: | :------: |:---------------: |
+| PP-YOLOE-ConvNeXt-tiny | 640 |    16      |   36e    |  44.6  |  63.3 |  33.04  |  13.87 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_convnext_tiny_36e_coco.pdparams) | [配置文件](./ppyoloe_convnext_tiny_36e_coco.yml) |
+| YOLOX-ConvNeXt-s       | 640 |    8       |   36e    |  44.6  |  65.3 |  36.20  |  27.52 | [下载链接](https://paddledet.bj.bcebos.com/models/yolox_convnext_s_36e_coco.pdparams) | [配置文件](./yolox_convnext_s_36e_coco.yml) |
+
+
+## Citations
+```
+@Article{liu2022convnet,
+  author  = {Zhuang Liu and Hanzi Mao and Chao-Yuan Wu and Christoph Feichtenhofer and Trevor Darrell and Saining Xie},
+  title   = {A ConvNet for the 2020s},
+  journal = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+  year    = {2022},
+}
+```
--- a/paddle_detection/configs/convnext/ppyoloe_convnext_tiny_36e_coco.yml
+++ b/paddle_detection/configs/convnext/ppyoloe_convnext_tiny_36e_coco.yml
@@ -0,0 +1,55 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '../ppyoloe/_base_/ppyoloe_crn.yml',
+  '../ppyoloe/_base_/ppyoloe_reader.yml',
+]
+depth_mult: 0.25
+width_mult: 0.50
+
+log_iter: 100
+snapshot_epoch: 5
+weights: output/ppyoloe_convnext_tiny_36e_coco/model_final
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/convnext_tiny_22k_224.pdparams
+
+
+YOLOv3:
+  backbone: ConvNeXt
+  neck: CustomCSPPAN
+  yolo_head: PPYOLOEHead
+  post_process: ~
+
+ConvNeXt:
+  arch: 'tiny'
+  drop_path_rate: 0.4
+  layer_scale_init_value: 1.0
+  return_idx: [1, 2, 3]
+
+
+PPYOLOEHead:
+  static_assigner_epoch: 12
+  nms:
+    nms_top_k: 1000
+    keep_top_k: 300
+    score_threshold: 0.01
+    nms_threshold: 0.7
+
+
+TrainReader:
+  batch_size: 16
+
+
+epoch: 36
+LearningRate:
+  base_lr: 0.0002
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [36]
+    use_warmup: false
+
+OptimizerBuilder:
+  regularizer: false
+  optimizer:
+    type: AdamW
+    weight_decay: 0.0005
--- a/paddle_detection/configs/convnext/yolox_convnext_s_36e_coco.yml
+++ b/paddle_detection/configs/convnext/yolox_convnext_s_36e_coco.yml
@@ -0,0 +1,58 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '../yolox/_base_/yolox_cspdarknet.yml',
+  '../yolox/_base_/yolox_reader.yml'
+]
+depth_mult: 0.33
+width_mult: 0.50
+
+log_iter: 100
+snapshot_epoch: 5
+weights: output/yolox_convnext_s_36e_coco/model_final
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/convnext_tiny_22k_224.pdparams
+
+
+YOLOX:
+  backbone: ConvNeXt
+  neck: YOLOCSPPAN
+  head: YOLOXHead
+  size_stride: 32
+  size_range: [15, 25] # multi-scale range [480*480 ~ 800*800]
+
+ConvNeXt:
+  arch: 'tiny'
+  drop_path_rate: 0.4
+  layer_scale_init_value: 1.0
+  return_idx: [1, 2, 3]
+
+
+TrainReader:
+  batch_size: 8
+  mosaic_epoch: 30
+
+
+YOLOXHead:
+  l1_epoch: 30
+  nms:
+    name: MultiClassNMS
+    nms_top_k: 10000
+    keep_top_k: 1000
+    score_threshold: 0.001
+    nms_threshold: 0.65
+
+
+epoch: 36
+LearningRate:
+  base_lr: 0.0002
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [36]
+    use_warmup: false
+
+OptimizerBuilder:
+  regularizer: false
+  optimizer:
+    type: AdamW
+    weight_decay: 0.0005
--- a/paddle_detection/configs/datasets/coco_detection.yml
+++ b/paddle_detection/configs/datasets/coco_detection.yml
@@ -0,0 +1,21 @@
+metric: COCO
+num_classes: 80
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: train2017
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: dataset/coco
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: val2017
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: dataset/coco
+  allow_empty: true
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
--- a/paddle_detection/configs/datasets/coco_instance.yml
+++ b/paddle_detection/configs/datasets/coco_instance.yml
@@ -0,0 +1,20 @@
+metric: COCO
+num_classes: 80
+
+TrainDataset:
+  name: COCODataSet
+  image_dir: train2017
+  anno_path: annotations/instances_train2017.json
+  dataset_dir: dataset/coco
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
+
+EvalDataset:
+  name: COCODataSet
+  image_dir: val2017
+  anno_path: annotations/instances_val2017.json
+  dataset_dir: dataset/coco
+
+TestDataset:
+  name: ImageFolder
+  anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
+  dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
--- a/paddle_detection/configs/datasets/culane.yml
+++ b/paddle_detection/configs/datasets/culane.yml
@@ -0,0 +1,28 @@
+metric: CULaneMetric
+num_classes: 5 # 4 lanes + background
+
+cut_height: &cut_height 270
+dataset_dir: &dataset_dir dataset/culane
+
+TrainDataset:
+  name: CULaneDataSet
+  dataset_dir: *dataset_dir
+  list_path: 'list/train_gt.txt'
+  split: train
+  cut_height: *cut_height
+
+
+EvalDataset:
+  name: CULaneDataSet
+  dataset_dir: *dataset_dir
+  list_path: 'list/test.txt'
+  split: test
+  cut_height: *cut_height
+
+
+TestDataset:
+  name: CULaneDataSet
+  dataset_dir: *dataset_dir
+  list_path: 'list/test.txt'
+  split: test
+  cut_height: *cut_height
--- a/paddle_detection/configs/datasets/dota.yml
+++ b/paddle_detection/configs/datasets/dota.yml
@@ -0,0 +1,21 @@
+metric: RBOX
+num_classes: 15
+
+TrainDataset:
+  !COCODataSet
+    image_dir: trainval1024/images
+    anno_path: trainval1024/DOTA_trainval1024.json
+    dataset_dir: dataset/dota/
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
+
+EvalDataset:
+  !COCODataSet
+    image_dir: trainval1024/images
+    anno_path: trainval1024/DOTA_trainval1024.json
+    dataset_dir: dataset/dota/
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
+
+TestDataset:
+  !ImageFolder
+    anno_path: test1024/DOTA_test1024.json
+    dataset_dir: dataset/dota/
--- a/paddle_detection/configs/datasets/dota_ms.yml
+++ b/paddle_detection/configs/datasets/dota_ms.yml
@@ -0,0 +1,21 @@
+metric: RBOX
+num_classes: 15
+
+TrainDataset:
+  !COCODataSet
+    image_dir: trainval1024/images
+    anno_path: trainval1024/DOTA_trainval1024.json
+    dataset_dir: dataset/dota_ms/
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
+
+EvalDataset:
+  !COCODataSet
+    image_dir: trainval1024/images
+    anno_path: trainval1024/DOTA_trainval1024.json
+    dataset_dir: dataset/dota_ms/
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
+
+TestDataset:
+  !ImageFolder
+    anno_path: test1024/DOTA_test1024.json
+    dataset_dir: dataset/dota_ms/
--- a/paddle_detection/configs/datasets/mcmot.yml
+++ b/paddle_detection/configs/datasets/mcmot.yml
@@ -0,0 +1,25 @@
+metric: MCMOT
+num_classes: 10
+# using VisDrone2019 MOT dataset with 10 classes as default, you can modify it for your needs.
+
+# for MCMOT training
+TrainDataset:
+  !MCMOTDataSet
+    dataset_dir: dataset/mot
+    image_lists: ['visdrone_mcmot.train']
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide']
+    label_list: label_list.txt
+
+# for MCMOT evaluation
+# If you want to change the MCMOT evaluation dataset, please modify 'data_root'
+EvalMOTDataset:
+  !MOTImageFolder
+    dataset_dir: dataset/mot
+    data_root: visdrone_mcmot/images/val
+    keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT
+
+# for MCMOT video inference
+TestMOTDataset:
+  !MOTImageFolder
+    dataset_dir: dataset/mot
+    keep_ori_im: True # set True if save visualization images or video
--- a/paddle_detection/configs/datasets/mot.yml
+++ b/paddle_detection/configs/datasets/mot.yml
@@ -0,0 +1,23 @@
+metric: MOT
+num_classes: 1
+
+# for MOT training
+TrainDataset:
+  !MOTDataSet
+    dataset_dir: dataset/mot
+    image_lists: ['mot17.train', 'caltech.all', 'cuhksysu.train', 'prw.train', 'citypersons.train', 'eth.train']
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide']
+
+# for MOT evaluation
+# If you want to change the MOT evaluation dataset, please modify 'data_root'
+EvalMOTDataset:
+  !MOTImageFolder
+    dataset_dir: dataset/mot
+    data_root: MOT16/images/train
+    keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT
+
+# for MOT video inference
+TestMOTDataset:
+  !MOTImageFolder
+    dataset_dir: dataset/mot
+    keep_ori_im: True # set True if save visualization images or video
--- a/paddle_detection/configs/datasets/objects365_detection.yml
+++ b/paddle_detection/configs/datasets/objects365_detection.yml
@@ -0,0 +1,21 @@
+metric: COCO
+num_classes: 365
+
+TrainDataset:
+  !COCODataSet
+    image_dir: train
+    anno_path: annotations/zhiyuan_objv2_train.json
+    dataset_dir: dataset/objects365
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  !COCODataSet
+    image_dir: val
+    anno_path: annotations/zhiyuan_objv2_val.json
+    dataset_dir: dataset/objects365
+    allow_empty: true
+
+TestDataset:
+  !ImageFolder
+    anno_path: annotations/zhiyuan_objv2_val.json
+    dataset_dir: dataset/objects365/
--- a/paddle_detection/configs/datasets/roadsign_voc.yml
+++ b/paddle_detection/configs/datasets/roadsign_voc.yml
@@ -0,0 +1,21 @@
+metric: VOC
+map_type: integral
+num_classes: 4
+
+TrainDataset:
+  name: VOCDataSet
+  dataset_dir: dataset/roadsign_voc
+  anno_path: train.txt
+  label_list: label_list.txt
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
+
+EvalDataset:
+  name: VOCDataSet
+  dataset_dir: dataset/roadsign_voc
+  anno_path: valid.txt
+  label_list: label_list.txt
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
+
+TestDataset:
+  name: ImageFolder
+  anno_path: dataset/roadsign_voc/label_list.txt
--- a/paddle_detection/configs/datasets/sniper_coco_detection.yml
+++ b/paddle_detection/configs/datasets/sniper_coco_detection.yml
@@ -0,0 +1,47 @@
+metric: SNIPERCOCO
+num_classes: 80
+
+TrainDataset:
+  !SniperCOCODataSet
+    image_dir: train2017
+    anno_path: annotations/instances_train2017.json
+    dataset_dir: dataset/coco
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+    allow_empty: true
+    is_trainset: true
+    image_target_sizes: [2000, 1000]
+    valid_box_ratio_ranges: [[-1, 0.1],[0.08, -1]]
+    chip_target_size: 512
+    chip_target_stride: 200
+    use_neg_chip: false
+    max_neg_num_per_im: 8
+
+
+EvalDataset:
+  !SniperCOCODataSet
+    image_dir: val2017
+    anno_path: annotations/instances_val2017.json
+    dataset_dir: dataset/coco
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+    allow_empty: true
+    is_trainset: false
+    image_target_sizes: [2000, 1000]
+    valid_box_ratio_ranges: [[-1, 0.1], [0.08, -1]]
+    chip_target_size: 512
+    chip_target_stride: 200
+    max_per_img: -1
+    nms_thresh: 0.5
+
+TestDataset:
+  !SniperCOCODataSet
+    image_dir: val2017
+    dataset_dir: dataset/coco
+    is_trainset: false
+    image_target_sizes: [2000, 1000]
+    valid_box_ratio_ranges: [[-1, 0.1],[0.08, -1]]
+    chip_target_size: 500
+    chip_target_stride: 200
+    max_per_img: -1
+    nms_thresh: 0.5
+
+
--- a/paddle_detection/configs/datasets/sniper_visdrone_detection.yml
+++ b/paddle_detection/configs/datasets/sniper_visdrone_detection.yml
@@ -0,0 +1,47 @@
+metric: SNIPERCOCO
+num_classes: 9
+
+TrainDataset:
+  !SniperCOCODataSet
+    image_dir: train
+    anno_path: annotations/train.json
+    dataset_dir: dataset/VisDrone2019_coco
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+    allow_empty: true
+    is_trainset: true
+    image_target_sizes: [8145, 2742]
+    valid_box_ratio_ranges: [[-1, 0.03142857142857144], [0.02333211853008726, -1]]
+    chip_target_size: 1536
+    chip_target_stride: 1184
+    use_neg_chip: false
+    max_neg_num_per_im: 8
+
+
+EvalDataset:
+  !SniperCOCODataSet
+    image_dir: val
+    anno_path: annotations/val.json
+    dataset_dir: dataset/VisDrone2019_coco
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+    allow_empty: true
+    is_trainset: false
+    image_target_sizes: [8145, 2742]
+    valid_box_ratio_ranges: [[-1, 0.03142857142857144], [0.02333211853008726, -1]]
+    chip_target_size: 1536
+    chip_target_stride: 1184
+    max_per_img: -1
+    nms_thresh: 0.5
+
+TestDataset:
+  !SniperCOCODataSet
+    image_dir: val
+    dataset_dir: dataset/VisDrone2019_coco
+    is_trainset: false
+    image_target_sizes: [8145, 2742]
+    valid_box_ratio_ranges: [[-1, 0.03142857142857144], [0.02333211853008726, -1]]
+    chip_target_size: 1536
+    chip_target_stride: 1184
+    max_per_img: -1
+    nms_thresh: 0.5
+
+
--- a/paddle_detection/configs/datasets/spine_coco.yml
+++ b/paddle_detection/configs/datasets/spine_coco.yml
@@ -0,0 +1,21 @@
+metric: RBOX
+num_classes: 9
+
+TrainDataset:
+  !COCODataSet
+    image_dir: images
+    anno_path: annotations/train.json
+    dataset_dir: dataset/spine_coco
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
+
+EvalDataset:
+  !COCODataSet
+    image_dir: images
+    anno_path: annotations/valid.json
+    dataset_dir: dataset/spine_coco
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
+
+TestDataset:
+  !ImageFolder
+    anno_path: annotations/valid.json
+    dataset_dir: dataset/spine_coco
--- a/paddle_detection/configs/datasets/visdrone_detection.yml
+++ b/paddle_detection/configs/datasets/visdrone_detection.yml
@@ -0,0 +1,22 @@
+metric: COCO
+num_classes: 10
+
+TrainDataset:
+  !COCODataSet
+    image_dir: VisDrone2019-DET-train
+    anno_path: train.json
+    dataset_dir: dataset/visdrone
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  !COCODataSet
+    image_dir: VisDrone2019-DET-val
+    anno_path: val.json
+    # image_dir: test_dev
+    # anno_path: test_dev.json
+    dataset_dir: dataset/visdrone
+
+TestDataset:
+  !ImageFolder
+    anno_path: val.json
+    dataset_dir: dataset/visdrone
--- a/paddle_detection/configs/datasets/voc.yml
+++ b/paddle_detection/configs/datasets/voc.yml
@@ -0,0 +1,21 @@
+metric: VOC
+map_type: 11point
+num_classes: 20
+
+TrainDataset:
+  name: VOCDataSet
+  dataset_dir: dataset/voc
+  anno_path: trainval.txt
+  label_list: label_list.txt
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
+
+EvalDataset:
+  name: VOCDataSet
+  dataset_dir: dataset/voc
+  anno_path: test.txt
+  label_list: label_list.txt
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
+
+TestDataset:
+  name: ImageFolder
+  anno_path: dataset/voc/label_list.txt
--- a/paddle_detection/configs/datasets/wider_face.yml
+++ b/paddle_detection/configs/datasets/wider_face.yml
@@ -0,0 +1,20 @@
+metric: WiderFace
+num_classes: 1
+
+TrainDataset:
+  !WIDERFaceDataSet
+    dataset_dir: dataset/wider_face
+    anno_path: wider_face_split/wider_face_train_bbx_gt.txt
+    image_dir: WIDER_train/images
+    data_fields: ['image', 'gt_bbox', 'gt_class']
+
+EvalDataset:
+  !WIDERFaceDataSet
+    dataset_dir: dataset/wider_face
+    anno_path: wider_face_split/wider_face_val_bbx_gt.txt
+    image_dir: WIDER_val/images
+    data_fields: ['image']
+
+TestDataset:
+  !ImageFolder
+    use_default_label: true
--- a/paddle_detection/configs/dcn/README.md
+++ b/paddle_detection/configs/dcn/README.md
@@ -0,0 +1,37 @@
+### Deformable ConvNets v2
+
+| 骨架网络             | 网络类型           | 卷积    | 每张GPU图片个数 | 学习率策略 |推理时间(fps)| Box AP | Mask AP |                           下载                           | 配置文件 |
+| :------------------- | :------------- | :-----: |:--------: | :-----: | :-----------: |:----: | :-----: | :----------------------------------------------------------: | :----: |
+| ResNet50-FPN         | Faster         | c3-c5   |    1      |   1x    |    -     |  42.1  |    -    | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_r50_fpn_1x_coco.yml) |
+| ResNet50-vd-FPN      | Faster         | c3-c5   |    1      |   1x    |    -     |  42.7  |    -    | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r50_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_1x_coco.yml) |
+| ResNet50-vd-FPN      | Faster         | c3-c5   |    1      |   2x    |    -     |  43.7  |    -    | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x_coco.yml) |
+| ResNet101-vd-FPN     | Faster         | c3-c5   |    1      |   1x    |    -     |  45.1  |    -    | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x_coco.yml) |
+| ResNeXt101-vd-FPN    | Faster         | c3-c5   |    1      |   1x    |    -     |  46.5  |    -    | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.pdparams) |[配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml) |
+| ResNet50-FPN         | Mask           | c3-c5   |    1      |   1x    |    -     |  42.7  |   38.4   | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/mask_rcnn_dcn_r50_fpn_1x_coco.yml) |
+| ResNet50-vd-FPN      | Mask           | c3-c5   |    1      |   2x    |    -     |  44.6  |  39.8   | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x_coco.yml) |
+| ResNet101-vd-FPN     | Mask           | c3-c5   |    1      |   1x    |    -     |  45.6 |  40.6  | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x_coco.yml) |
+| ResNeXt101-vd-FPN    | Mask           | c3-c5   |    1      |   1x    |     -    |  47.3 |  42.0  | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml) |
+| ResNet50-FPN         | Cascade Faster         | c3-c5   |    1      |   1x    |    -     |  42.1  |    -    | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x_coco.yml) |
+| ResNeXt101-vd-FPN    | Cascade Faster           | c3-c5   |    1      |   1x    |     -    |  48.8 |  -  | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml) |
+
+
+**注意事项:**  
+
+- Deformable卷积网络v2(dcn_v2)参考自论文[Deformable ConvNets v2](https://arxiv.org/abs/1811.11168).
+- `c3-c5`意思是在resnet模块的3到5阶段增加`dcn`.
+
+## Citations
+```
+@inproceedings{dai2017deformable,
+  title={Deformable Convolutional Networks},
+  author={Dai, Jifeng and Qi, Haozhi and Xiong, Yuwen and Li, Yi and Zhang, Guodong and Hu, Han and Wei, Yichen},
+  booktitle={Proceedings of the IEEE international conference on computer vision},
+  year={2017}
+}
+@article{zhu2018deformable,
+  title={Deformable ConvNets v2: More Deformable, Better Results},
+  author={Zhu, Xizhou and Hu, Han and Lin, Stephen and Dai, Jifeng},
+  journal={arXiv preprint arXiv:1811.11168},
+  year={2018}
+}
+```
--- a/paddle_detection/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x_coco.yml
+++ b/paddle_detection/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x_coco.yml
@@ -0,0 +1,16 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '../cascade_rcnn/_base_/optimizer_1x.yml',
+  '../cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml',
+  '../cascade_rcnn/_base_/cascade_fpn_reader.yml',
+]
+weights: output/cascade_rcnn_dcn_r50_fpn_1x_coco/model_final
+
+ResNet:
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]
--- a/paddle_detection/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml
+++ b/paddle_detection/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml
@@ -0,0 +1,16 @@
+_BASE_: [
+  'cascade_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
+weights: output/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
+
+ResNet:
+  depth: 101
+  groups: 64
+  base_width: 4
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]
--- a/paddle_detection/configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x_coco.yml
+++ b/paddle_detection/configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x_coco.yml
@@ -0,0 +1,15 @@
+_BASE_: [
+  'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
+weights: output/faster_rcnn_dcn_r101_vd_fpn_1x_coco/model_final
+
+ResNet:
+  # index 0 stands for res2
+  depth: 101
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]
--- a/paddle_detection/configs/dcn/faster_rcnn_dcn_r50_fpn_1x_coco.yml
+++ b/paddle_detection/configs/dcn/faster_rcnn_dcn_r50_fpn_1x_coco.yml
@@ -0,0 +1,16 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '../faster_rcnn/_base_/optimizer_1x.yml',
+  '../faster_rcnn/_base_/faster_rcnn_r50_fpn.yml',
+  '../faster_rcnn/_base_/faster_fpn_reader.yml',
+]
+weights: output/faster_rcnn_dcn_r50_fpn_1x_coco/model_final
+
+ResNet:
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]
--- a/paddle_detection/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_1x_coco.yml
+++ b/paddle_detection/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_1x_coco.yml
@@ -0,0 +1,15 @@
+_BASE_: [
+  'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
+weights: output/faster_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]
--- a/paddle_detection/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x_coco.yml
+++ b/paddle_detection/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x_coco.yml
@@ -0,0 +1,26 @@
+_BASE_: [
+  'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
+weights: output/faster_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]
+
+epoch: 24
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [16, 22]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000
--- a/paddle_detection/configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml
+++ b/paddle_detection/configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml
@@ -0,0 +1,17 @@
+_BASE_: [
+  'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
+weights: output/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
+
+ResNet:
+  # for ResNeXt: groups, base_width, base_channels
+  depth: 101
+  groups: 64
+  base_width: 4
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]
--- a/paddle_detection/configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x_coco.yml
+++ b/paddle_detection/configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x_coco.yml
@@ -0,0 +1,15 @@
+_BASE_: [
+  'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
+weights: output/mask_rcnn_dcn_r101_vd_fpn_1x_coco/model_final
+
+ResNet:
+  # index 0 stands for res2
+  depth: 101
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]
--- a/paddle_detection/configs/dcn/mask_rcnn_dcn_r50_fpn_1x_coco.yml
+++ b/paddle_detection/configs/dcn/mask_rcnn_dcn_r50_fpn_1x_coco.yml
@@ -0,0 +1,16 @@
+_BASE_: [
+  '../datasets/coco_instance.yml',
+  '../runtime.yml',
+  '../mask_rcnn/_base_/optimizer_1x.yml',
+  '../mask_rcnn/_base_/mask_rcnn_r50_fpn.yml',
+  '../mask_rcnn/_base_/mask_fpn_reader.yml',
+]
+weights: output/mask_rcnn_dcn_r50_fpn_1x_coco/model_final
+
+ResNet:
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]
--- a/paddle_detection/configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x_coco.yml
+++ b/paddle_detection/configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x_coco.yml
@@ -0,0 +1,26 @@
+_BASE_: [
+  'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
+weights: output/mask_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]
+
+epoch: 24
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [16, 22]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000
--- a/paddle_detection/configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml
+++ b/paddle_detection/configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml
@@ -0,0 +1,17 @@
+_BASE_: [
+  'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
+weights: output/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
+
+ResNet:
+  # for ResNeXt: groups, base_width, base_channels
+  depth: 101
+  variant: d
+  groups: 64
+  base_width: 4
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]
--- a/paddle_detection/configs/deformable_detr/README.md
+++ b/paddle_detection/configs/deformable_detr/README.md
@@ -0,0 +1,36 @@
+# Deformable DETR
+
+## Introduction
+
+
+Deformable DETR is an object detection model based on DETR. We reproduced the model of the paper.
+
+
+## Model Zoo
+
+| Backbone |      Model      | Images/GPU | Epochs | Box AP |                                                             Config                                                             |                                         Log                                         |                                       Download                                       |
+|:--------:|:---------------:|:----------:|:------:|:------:|:------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------:|
+|   R-50   | Deformable DETR |     2      |   50   |  44.5  | [config](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/deformable_detr/deformable_detr_r50_1x_coco.yml) | [log](https://bj.bcebos.com/v1/paddledet/logs/deformable_detr_r50_1x_coco_44.5.log) | [model](https://paddledet.bj.bcebos.com/models/deformable_detr_r50_1x_coco.pdparams) |
+
+**Notes:**
+
+- Deformable DETR is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
+- Deformable DETR uses 8GPU to train 50 epochs.
+
+GPU multi-card training
+```bash
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+python -m paddle.distributed.launch --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/deformable_detr/deformable_detr_r50_1x_coco.yml --fleet
+```
+
+## Citations
+```
+@inproceedings{
+zhu2021deformable,
+title={Deformable DETR: Deformable Transformers for End-to-End Object Detection},
+author={Xizhou Zhu and Weijie Su and Lewei Lu and Bin Li and Xiaogang Wang and Jifeng Dai},
+booktitle={International Conference on Learning Representations},
+year={2021},
+url={https://openreview.net/forum?id=gZ9hCDWe6ke}
+}
+```
--- a/paddle_detection/configs/deformable_detr/_base_/deformable_detr_r50.yml
+++ b/paddle_detection/configs/deformable_detr/_base_/deformable_detr_r50.yml
@@ -0,0 +1,48 @@
+architecture: DETR
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vb_normal_pretrained.pdparams
+hidden_dim: 256
+use_focal_loss: True
+
+
+DETR:
+  backbone: ResNet
+  transformer: DeformableTransformer
+  detr_head: DeformableDETRHead
+  post_process: DETRPostProcess
+
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [1, 2, 3]
+  lr_mult_list: [0.0, 0.1, 0.1, 0.1]
+  num_stages: 4
+
+
+DeformableTransformer:
+  num_queries: 300
+  position_embed_type: sine
+  nhead: 8
+  num_encoder_layers: 6
+  num_decoder_layers: 6
+  dim_feedforward: 1024
+  dropout: 0.1
+  activation: relu
+  num_feature_levels: 4
+  num_encoder_points: 4
+  num_decoder_points: 4
+
+
+DeformableDETRHead:
+  num_mlp_layers: 3
+
+
+DETRLoss:
+  loss_coeff: {class: 2, bbox: 5, giou: 2}
+  aux_loss: True
+
+
+HungarianMatcher:
+  matcher_coeff: {class: 2, bbox: 5, giou: 2}
--- a/paddle_detection/configs/deformable_detr/_base_/deformable_detr_reader.yml
+++ b/paddle_detection/configs/deformable_detr/_base_/deformable_detr_reader.yml
@@ -0,0 +1,44 @@
+worker_num: 2
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomFlip: {prob: 0.5}
+  - RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
+                    transforms2: [
+                        RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
+                        RandomSizeCrop: { min_size: 384, max_size: 600 },
+                        RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
+  }
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - NormalizeBox: {}
+  - BboxXYXY2XYWH: {}
+  - Permute: {}
+  batch_transforms:
+  - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+  batch_size: 2
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+  use_shared_memory: false
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
--- a/paddle_detection/configs/deformable_detr/_base_/deformable_optimizer_1x.yml
+++ b/paddle_detection/configs/deformable_detr/_base_/deformable_optimizer_1x.yml
@@ -0,0 +1,16 @@
+epoch: 50
+
+LearningRate:
+  base_lr: 0.0002
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [40]
+    use_warmup: false
+
+OptimizerBuilder:
+  clip_grad_by_norm: 0.1
+  regularizer: false
+  optimizer:
+    type: AdamW
+    weight_decay: 0.0001
--- a/paddle_detection/configs/deformable_detr/deformable_detr_r50_1x_coco.yml
+++ b/paddle_detection/configs/deformable_detr/deformable_detr_r50_1x_coco.yml
@@ -0,0 +1,9 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/deformable_optimizer_1x.yml',
+  '_base_/deformable_detr_r50.yml',
+  '_base_/deformable_detr_reader.yml',
+]
+weights: output/deformable_detr_r50_1x_coco/model_final
+find_unused_parameters: True
--- a/paddle_detection/configs/detr/README.md
+++ b/paddle_detection/configs/detr/README.md
@@ -0,0 +1,39 @@
+# DETR
+
+## Introduction
+
+
+DETR is an object detection model based on transformer. We reproduced the model of the paper.
+
+
+## Model Zoo
+
+| Backbone | Model | Images/GPU  | Inf time (fps) | Box AP | Config | Download |
+|:------:|:--------:|:--------:|:--------------:|:------:|:------:|:--------:|
+| R-50 | DETR  | 4 | --- | 42.3 | [config](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/detr/detr_r50_1x_coco.yml) | [model](https://paddledet.bj.bcebos.com/models/detr_r50_1x_coco.pdparams) |
+
+**Notes:**
+
+- DETR is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
+- DETR uses 8GPU to train 500 epochs.
+
+GPU multi-card training
+```bash
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+python -m paddle.distributed.launch --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/detr/detr_r50_1x_coco.yml --fleet
+```
+
+## Citations
+```
+@inproceedings{detr,
+  author    = {Nicolas Carion and
+               Francisco Massa and
+               Gabriel Synnaeve and
+               Nicolas Usunier and
+               Alexander Kirillov and
+               Sergey Zagoruyko},
+  title     = {End-to-End Object Detection with Transformers},
+  booktitle = {ECCV},
+  year      = {2020}
+}
+```
--- a/paddle_detection/configs/detr/_base_/detr_r50.yml
+++ b/paddle_detection/configs/detr/_base_/detr_r50.yml
@@ -0,0 +1,44 @@
+architecture: DETR
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vb_normal_pretrained.pdparams
+hidden_dim: 256
+
+
+DETR:
+  backbone: ResNet
+  transformer: DETRTransformer
+  detr_head: DETRHead
+  post_process: DETRPostProcess
+
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [3]
+  lr_mult_list: [0.0, 0.1, 0.1, 0.1]
+  num_stages: 4
+
+
+DETRTransformer:
+  num_queries: 100
+  position_embed_type: sine
+  nhead: 8
+  num_encoder_layers: 6
+  num_decoder_layers: 6
+  dim_feedforward: 2048
+  dropout: 0.1
+  activation: relu
+
+
+DETRHead:
+  num_mlp_layers: 3
+
+
+DETRLoss:
+  loss_coeff: {class: 1, bbox: 5, giou: 2, no_object: 0.1}
+  aux_loss: True
+
+
+HungarianMatcher:
+  matcher_coeff: {class: 1, bbox: 5, giou: 2}
--- a/paddle_detection/configs/detr/_base_/detr_reader.yml
+++ b/paddle_detection/configs/detr/_base_/detr_reader.yml
@@ -0,0 +1,44 @@
+worker_num: 0
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomFlip: {prob: 0.5}
+  - RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
+                    transforms2: [
+                        RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
+                        RandomSizeCrop: { min_size: 384, max_size: 600 },
+                        RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
+  }
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - NormalizeBox: {}
+  - BboxXYXY2XYWH: {}
+  - Permute: {}
+  batch_transforms:
+  - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+  batch_size: 2
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+  use_shared_memory: false
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
--- a/paddle_detection/configs/detr/_base_/optimizer_1x.yml
+++ b/paddle_detection/configs/detr/_base_/optimizer_1x.yml
@@ -0,0 +1,16 @@
+epoch: 500
+
+LearningRate:
+  base_lr: 0.0001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [400]
+    use_warmup: false
+
+OptimizerBuilder:
+  clip_grad_by_norm: 0.1
+  regularizer: false
+  optimizer:
+    type: AdamW
+    weight_decay: 0.0001
--- a/paddle_detection/configs/detr/detr_r50_1x_coco.yml
+++ b/paddle_detection/configs/detr/detr_r50_1x_coco.yml
@@ -0,0 +1,9 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/detr_r50.yml',
+  '_base_/detr_reader.yml',
+]
+weights: output/detr_r50_1x_coco/model_final
+find_unused_parameters: True
--- a/paddle_detection/configs/dino/README.md
+++ b/paddle_detection/configs/dino/README.md
@@ -0,0 +1,39 @@
+# DINO: DETR with Improved DeNoising Anchor Boxes for End-to-End Object Detection
+
+## Introduction
+
+
+[DINO](https://arxiv.org/abs/2203.03605) is an object detection model based on DETR. We reproduced the model of the paper.
+
+
+## Model Zoo
+
+| Backbone |      Model      | Epochs | Box AP |                 Config                  |                                       Log                                       |                                     Download                                     |
+|:------:|:---------------:|:------:|:------:|:---------------------------------------:|:-------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------:|
+| R-50 | dino_r50_4scale |   12   |  49.5  | [config](./dino_r50_4scale_1x_coco.yml) | [log](https://bj.bcebos.com/v1/paddledet/logs/dino_r50_4scale_1x_coco_49.5.log) | [model](https://paddledet.bj.bcebos.com/models/dino_r50_4scale_1x_coco.pdparams) |
+| R-50 | dino_r50_4scale |   24   |  50.8  | [config](./dino_r50_4scale_2x_coco.yml) | [log](https://bj.bcebos.com/v1/paddledet/logs/dino_r50_4scale_2x_coco_50.8.log) | [model](https://paddledet.bj.bcebos.com/models/dino_r50_4scale_2x_coco.pdparams) |
+
+**Notes:**
+
+- DINO is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
+- DINO uses 4GPU to train.
+
+GPU multi-card training
+```bash
+python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/dino/dino_r50_4scale_1x_coco.yml --fleet --eval
+```
+
+## Custom Operator
+- Multi-scale deformable attention custom operator see [here](../../ppdet/modeling/transformers/ext_op).
+
+## Citations
+```
+@misc{zhang2022dino,
+      title={DINO: DETR with Improved DeNoising Anchor Boxes for End-to-End Object Detection},
+      author={Hao Zhang and Feng Li and Shilong Liu and Lei Zhang and Hang Su and Jun Zhu and Lionel M. Ni and Heung-Yeung Shum},
+      year={2022},
+      eprint={2203.03605},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV}
+}
+```
--- a/paddle_detection/configs/dino/_base_/dino_focalnet.yml
+++ b/paddle_detection/configs/dino/_base_/dino_focalnet.yml
@@ -0,0 +1,45 @@
+architecture: DETR
+# pretrain_weights: # rewrite in FocalNet.pretrained in ppdet/modeling/backbones/focalnet.py
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_large_lrf_384_fl4_pretrained.pdparams
+hidden_dim: 256
+use_focal_loss: True
+
+DETR:
+  backbone: FocalNet
+  transformer: DINOTransformer
+  detr_head: DINOHead
+  post_process: DETRPostProcess
+
+FocalNet:
+  arch: 'focalnet_L_384_22k_fl4'
+  out_indices: [1, 2, 3]
+  pretrained: https://bj.bcebos.com/v1/paddledet/models/pretrained/focalnet_large_lrf_384_fl4_pretrained.pdparams
+
+DINOTransformer:
+  num_queries: 900
+  position_embed_type: sine
+  num_levels: 4
+  nhead: 8
+  num_encoder_layers: 6
+  num_decoder_layers: 6
+  dim_feedforward: 2048
+  dropout: 0.0
+  activation: relu
+  pe_temperature: 20
+  pe_offset: 0.0
+  num_denoising: 100
+  label_noise_ratio: 0.5
+  box_noise_scale: 1.0
+  learnt_init_query: True
+
+DINOHead:
+  loss:
+    name: DINOLoss
+    loss_coeff: {class: 1, bbox: 5, giou: 2}
+    aux_loss: True
+    matcher:
+      name: HungarianMatcher
+      matcher_coeff: {class: 2, bbox: 5, giou: 2}
+
+DETRPostProcess:
+  num_top_queries: 300
--- a/paddle_detection/configs/dino/_base_/dino_r50.yml
+++ b/paddle_detection/configs/dino/_base_/dino_r50.yml
@@ -0,0 +1,49 @@
+architecture: DETR
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+hidden_dim: 256
+use_focal_loss: True
+
+
+DETR:
+  backbone: ResNet
+  transformer: DINOTransformer
+  detr_head: DINOHead
+  post_process: DETRPostProcess
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [1, 2, 3]
+  lr_mult_list: [0.0, 0.1, 0.1, 0.1]
+  num_stages: 4
+
+DINOTransformer:
+  num_queries: 900
+  position_embed_type: sine
+  num_levels: 4
+  nhead: 8
+  num_encoder_layers: 6
+  num_decoder_layers: 6
+  dim_feedforward: 2048
+  dropout: 0.0
+  activation: relu
+  pe_temperature: 20
+  pe_offset: 0.0
+  num_denoising: 100
+  label_noise_ratio: 0.5
+  box_noise_scale: 1.0
+  learnt_init_query: True
+
+DINOHead:
+  loss:
+    name: DINOLoss
+    loss_coeff: {class: 1, bbox: 5, giou: 2}
+    aux_loss: True
+    matcher:
+      name: HungarianMatcher
+      matcher_coeff: {class: 2, bbox: 5, giou: 2}
+
+DETRPostProcess:
+  num_top_queries: 300
--- a/paddle_detection/configs/dino/_base_/dino_reader.yml
+++ b/paddle_detection/configs/dino/_base_/dino_reader.yml
@@ -0,0 +1,40 @@
+worker_num: 4
+TrainReader:
+  sample_transforms:
+    - Decode: {}
+    - RandomFlip: {prob: 0.5}
+    - RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
+                      transforms2: [
+                          RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
+                          RandomSizeCrop: { min_size: 384, max_size: 600 },
+                          RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
+    }
+    - NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
+    - NormalizeBox: {}
+    - BboxXYXY2XYWH: {}
+    - Permute: {}
+  batch_transforms:
+    - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+  batch_size: 4
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+  use_shared_memory: false
+
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [800, 1333], keep_ratio: True}
+    - NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
+    - Permute: {}
+  batch_size: 1
+
+
+TestReader:
+  sample_transforms:
+    - Decode: {}
+    - Resize: {target_size: [800, 1333], keep_ratio: True}
+    - NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
+    - Permute: {}
+  batch_size: 1
--- a/paddle_detection/configs/dino/_base_/dino_swin.yml
+++ b/paddle_detection/configs/dino/_base_/dino_swin.yml
@@ -0,0 +1,46 @@
+architecture: DETR
+# pretrain_weights: # rewrite in SwinTransformer.pretrained in ppdet/modeling/backbones/swin_transformer.py
+hidden_dim: 256
+use_focal_loss: True
+
+DETR:
+  backbone: SwinTransformer
+  transformer: DINOTransformer
+  detr_head: DINOHead
+  post_process: DETRPostProcess
+
+SwinTransformer:
+  arch: 'swin_L_384' # ['swin_T_224', 'swin_S_224', 'swin_B_224', 'swin_L_224', 'swin_B_384', 'swin_L_384']
+  ape: false
+  drop_path_rate: 0.2
+  patch_norm: true
+  out_indices: [1, 2, 3]
+
+DINOTransformer:
+  num_queries: 900
+  position_embed_type: sine
+  num_levels: 4
+  nhead: 8
+  num_encoder_layers: 6
+  num_decoder_layers: 6
+  dim_feedforward: 2048
+  dropout: 0.0
+  activation: relu
+  pe_temperature: 10000
+  pe_offset: -0.5
+  num_denoising: 100
+  label_noise_ratio: 0.5
+  box_noise_scale: 1.0
+  learnt_init_query: True
+
+DINOHead:
+  loss:
+    name: DINOLoss
+    loss_coeff: {class: 1, bbox: 5, giou: 2}
+    aux_loss: True
+    matcher:
+      name: HungarianMatcher
+      matcher_coeff: {class: 2, bbox: 5, giou: 2}
+
+DETRPostProcess:
+  num_top_queries: 300
--- a/paddle_detection/configs/dino/_base_/optimizer_1x.yml
+++ b/paddle_detection/configs/dino/_base_/optimizer_1x.yml
@@ -0,0 +1,16 @@
+epoch: 12
+
+LearningRate:
+  base_lr: 0.0001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [11]
+    use_warmup: false
+
+OptimizerBuilder:
+  clip_grad_by_norm: 0.1
+  regularizer: false
+  optimizer:
+    type: AdamW
+    weight_decay: 0.0001
--- a/paddle_detection/configs/dino/_base_/optimizer_2x.yml
+++ b/paddle_detection/configs/dino/_base_/optimizer_2x.yml
@@ -0,0 +1,16 @@
+epoch: 24
+
+LearningRate:
+  base_lr: 0.0001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [20]
+    use_warmup: false
+
+OptimizerBuilder:
+  clip_grad_by_norm: 0.1
+  regularizer: false
+  optimizer:
+    type: AdamW
+    weight_decay: 0.0001
--- a/paddle_detection/configs/dino/_base_/optimizer_3x.yml
+++ b/paddle_detection/configs/dino/_base_/optimizer_3x.yml
@@ -0,0 +1,16 @@
+epoch: 36
+
+LearningRate:
+  base_lr: 0.0001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [33]
+    use_warmup: false
+
+OptimizerBuilder:
+  clip_grad_by_norm: 0.1
+  regularizer: false
+  optimizer:
+    type: AdamW
+    weight_decay: 0.0001
--- a/paddle_detection/configs/dino/dino_r50_4scale_1x_coco.yml
+++ b/paddle_detection/configs/dino/dino_r50_4scale_1x_coco.yml
@@ -0,0 +1,11 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/dino_r50.yml',
+  '_base_/dino_reader.yml',
+]
+
+weights: output/dino_r50_4scale_1x_coco/model_final
+find_unused_parameters: True
+log_iter: 100
--- a/paddle_detection/configs/dino/dino_r50_4scale_2x_coco.yml
+++ b/paddle_detection/configs/dino/dino_r50_4scale_2x_coco.yml
@@ -0,0 +1,11 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/optimizer_2x.yml',
+  '_base_/dino_r50.yml',
+  '_base_/dino_reader.yml',
+]
+
+weights: output/dino_r50_4scale_2x_coco/model_final
+find_unused_parameters: True
+log_iter: 100
--- a/paddle_detection/configs/face_detection/README.md
+++ b/paddle_detection/configs/face_detection/README.md
@@ -0,0 +1,176 @@
+# 人脸检测模型
+
+## 简介
+`face_detection`中提供高效、高速的人脸检测解决方案，包括最先进的模型和经典模型。
+
+![](../../docs/images/12_Group_Group_12_Group_Group_12_935.jpg)
+
+## 模型库
+
+#### WIDER-FACE数据集上的mAP
+
+| 网络结构 | 输入尺寸 | 图片个数/GPU | 学习率策略 | Easy/Medium/Hard Set  | 预测时延（SD855）| 模型大小(MB) | 下载 | 配置文件 |
+|:------------:|:--------:|:----:|:-------:|:-------:|:---------:|:----------:|:---------:|:--------:|
+| BlazeFace  | 640  |    8    | 1000e     | 0.885 / 0.855 / 0.731 | - | 0.472 |[下载链接](https://paddledet.bj.bcebos.com/models/blazeface_1000e.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/face_detection/blazeface_1000e.yml) |
+| BlazeFace-FPN-SSH  | 640  |    8    | 1000e     | 0.907 / 0.883 / 0.793 | - | 0.479 |[下载链接](https://paddledet.bj.bcebos.com/models/blazeface_fpn_ssh_1000e.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/face_detection/blazeface_fpn_ssh_1000e.yml) |
+
+**注意:**  
+- 我们使用多尺度评估策略得到`Easy/Medium/Hard Set`里的mAP。具体细节请参考[在WIDER-FACE数据集上评估](#在WIDER-FACE数据集上评估)。
+
+## 快速开始
+
+### 数据准备
+我们使用[WIDER-FACE数据集](http://shuoyang1213.me/WIDERFACE/)进行训练和模型测试，官方网站提供了详细的数据介绍。
+- WIDER-Face数据源:  
+使用如下目录结构加载`wider_face`类型的数据集：
+
+  ```
+  dataset/wider_face/
+  ├── wider_face_split
+  │   ├── wider_face_train_bbx_gt.txt
+  │   ├── wider_face_val_bbx_gt.txt
+  ├── WIDER_train
+  │   ├── images
+  │   │   ├── 0--Parade
+  │   │   │   ├── 0_Parade_marchingband_1_100.jpg
+  │   │   │   ├── 0_Parade_marchingband_1_381.jpg
+  │   │   │   │   ...
+  │   │   ├── 10--People_Marching
+  │   │   │   ...
+  ├── WIDER_val
+  │   ├── images
+  │   │   ├── 0--Parade
+  │   │   │   ├── 0_Parade_marchingband_1_1004.jpg
+  │   │   │   ├── 0_Parade_marchingband_1_1045.jpg
+  │   │   │   │   ...
+  │   │   ├── 10--People_Marching
+  │   │   │   ...
+  ```
+
+- 手动下载数据集：
+要下载WIDER-FACE数据集，请运行以下命令：
+```
+cd dataset/wider_face && ./download_wider_face.sh
+```
+
+### 参数配置
+基础模型的配置可以参考`configs/face_detection/_base_/blazeface.yml`；
+改进模型增加FPN和SSH的neck结构，配置文件可以参考`configs/face_detection/_base_/blazeface_fpn.yml`，可以根据需求配置FPN和SSH，具体如下：
+```yaml
+BlazeNet:
+   blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
+   double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
+                           [96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
+   act: hard_swish #配置backbone中BlazeBlock的激活函数，基础模型为relu，增加FPN和SSH时需使用hard_swish
+
+BlazeNeck:
+   neck_type : fpn_ssh #可选only_fpn、only_ssh和fpn_ssh
+   in_channel: [96,96]
+```
+
+
+
+### 训练与评估
+训练流程与评估流程方法与其他算法一致，请参考[GETTING_STARTED_cn.md](../../docs/tutorials/GETTING_STARTED_cn.md)。  
+**注意:** 人脸检测模型目前不支持边训练边评估。
+
+#### 在WIDER-FACE数据集上评估
+- 步骤一：评估并生成结果文件：
+```shell
+python -u tools/eval.py -c configs/face_detection/blazeface_1000e.yml \
+       -o weights=output/blazeface_1000e/model_final \
+       multi_scale=True
+```
+设置`multi_scale=True`进行多尺度评估，评估完成后，将在`output/pred`中生成txt格式的测试结果。
+
+- 步骤二：下载官方评估脚本和Ground Truth文件：
+```
+wget http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/eval_script/eval_tools.zip
+unzip eval_tools.zip && rm -f eval_tools.zip
+```
+
+- 步骤三：开始评估
+
+方法一：python评估：
+```
+git clone https://github.com/wondervictor/WiderFace-Evaluation.git
+cd WiderFace-Evaluation
+# 编译
+python3 setup.py build_ext --inplace
+# 开始评估
+python3 evaluation.py -p /path/to/PaddleDetection/output/pred -g /path/to/eval_tools/ground_truth
+```
+
+方法二：MatLab评估：
+```
+# 在`eval_tools/wider_eval.m`中修改保存结果路径和绘制曲线的名称：
+pred_dir = './pred';  
+legend_name = 'Paddle-BlazeFace';
+
+`wider_eval.m` 是评估模块的主要执行程序。运行命令如下：
+matlab -nodesktop -nosplash -nojvm -r "run wider_eval.m;quit;"
+```
+
+### Python脚本预测
+为了支持二次开发，这里提供通过Python脚本使用Paddle Detection whl包来进行预测的示例。
+```python
+import cv2
+import paddle
+import numpy as np
+from ppdet.core.workspace import load_config
+from ppdet.engine import Trainer
+from ppdet.metrics import get_infer_results
+from ppdet.data.transform.operators import NormalizeImage, Permute
+
+
+if __name__ == '__main__':
+    # 准备基础的参数
+    config_path = 'PaddleDetection/configs/face_detection/blazeface_1000e.yml'
+    cfg = load_config(config_path)
+    weight_path = 'PaddleDetection/output/blazeface_1000e.pdparams'
+    infer_img_path = 'PaddleDetection/demo/hrnet_demo.jpg'
+    cfg.weights = weight_path
+    bbox_thre = 0.8
+    paddle.set_device('gpu')
+    # 创建所需的类
+    trainer = Trainer(cfg, mode='test')
+    trainer.load_weights(cfg.weights)
+    trainer.model.eval()
+    normaler = NormalizeImage(mean=[123, 117, 104], std=[127.502231, 127.502231, 127.502231], is_scale=False)
+    permuter = Permute()
+    # 进行图片读取
+    im = cv2.imread(infer_img_path)
+    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+    # 准备数据字典
+    data_dict = {'image': im}
+    data_dict = normaler(data_dict)
+    data_dict = permuter(data_dict)
+    h, w, c = im.shape
+    data_dict['im_id'] = paddle.Tensor(np.array([[0]]))
+    data_dict['im_shape'] = paddle.Tensor(np.array([[h, w]], dtype=np.float32))
+    data_dict['scale_factor'] = paddle.Tensor(np.array([[1., 1.]], dtype=np.float32))
+    data_dict['image'] = paddle.Tensor(data_dict['image'].reshape((1, c, h, w)))
+    data_dict['curr_iter'] = paddle.Tensor(np.array([0]))
+    # 进行预测
+    outs = trainer.model(data_dict)
+    # 对预测的数据进行后处理得到最终的bbox信息
+    for key in ['im_shape', 'scale_factor', 'im_id']:
+        outs[key] = data_dict[key]
+    for key, value in outs.items():
+        outs[key] = value.numpy()
+    clsid2catid, catid2name = {0: 'face'}, {0: 0}
+    batch_res = get_infer_results(outs, clsid2catid)
+    bbox = [sub_dict for sub_dict in batch_res['bbox'] if sub_dict['score'] > bbox_thre]
+    print(bbox)
+```
+
+## Citations
+
+```
+@article{bazarevsky2019blazeface,
+      title={BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs},
+      author={Valentin Bazarevsky and Yury Kartynnik and Andrey Vakunov and Karthik Raveendran and Matthias Grundmann},
+      year={2019},
+      eprint={1907.05047},
+      archivePrefix={arXiv},
+```
--- a/paddle_detection/configs/face_detection/README_en.md
+++ b/paddle_detection/configs/face_detection/README_en.md
@@ -0,0 +1,176 @@
+# Face Detection Model
+
+## Introduction
+`face_detection` High efficiency, high speed face detection solutions, including the most advanced models and classic models.
+
+![](../../docs/images/12_Group_Group_12_Group_Group_12_935.jpg)
+
+## Model Library
+
+#### A mAP on the WIDERFACE dataset
+
+| Network structure | size | images/GPUs | Learning rate strategy | Easy/Medium/Hard Set  | Prediction delay（SD855）| Model size(MB) | Download | Configuration File |
+|:------------:|:--------:|:----:|:-------:|:-------:|:---------:|:----------:|:---------:|:--------:|
+| BlazeFace  | 640  |    8    | 1000e     | 0.885 / 0.855 / 0.731 | - | 0.472 |[link](https://paddledet.bj.bcebos.com/models/blazeface_1000e.pdparams) | [Configuration File](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/face_detection/blazeface_1000e.yml) |
+| BlazeFace-FPN-SSH  | 640  |    8    | 1000e     | 0.907 / 0.883 / 0.793 | - | 0.479 |[link](https://paddledet.bj.bcebos.com/models/blazeface_fpn_ssh_1000e.pdparams) | [Configuration File](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/face_detection/blazeface_fpn_ssh_1000e.yml) |
+
+**Attention:**  
+- We use a multi-scale evaluation strategy to get the mAP in `Easy/Medium/Hard Set`. Please refer to the [evaluation on the WIDER FACE dataset](#Evaluated-on-the-WIDER-FACE-Dataset) for details.
+
+## Quick Start
+
+### Data preparation
+We use [WIDER-FACE dataset](http://shuoyang1213.me/WIDERFACE/) for training and model tests, the official web site provides detailed data is introduced.
+- WIDER-Face data source:  
+- Load a dataset of type `wider_face` using the following directory structure:
+  ```
+  dataset/wider_face/
+  ├── wider_face_split
+  │   ├── wider_face_train_bbx_gt.txt
+  │   ├── wider_face_val_bbx_gt.txt
+  ├── WIDER_train
+  │   ├── images
+  │   │   ├── 0--Parade
+  │   │   │   ├── 0_Parade_marchingband_1_100.jpg
+  │   │   │   ├── 0_Parade_marchingband_1_381.jpg
+  │   │   │   │   ...
+  │   │   ├── 10--People_Marching
+  │   │   │   ...
+  ├── WIDER_val
+  │   ├── images
+  │   │   ├── 0--Parade
+  │   │   │   ├── 0_Parade_marchingband_1_1004.jpg
+  │   │   │   ├── 0_Parade_marchingband_1_1045.jpg
+  │   │   │   │   ...
+  │   │   ├── 10--People_Marching
+  │   │   │   ...
+  ```
+
+- Manually download the dataset:
+To download the WIDER-FACE dataset, run the following command:
+```
+cd dataset/wider_face && ./download_wider_face.sh
+```
+
+### Parameter configuration
+The configuration of the base model can be referenced to `configs/face_detection/_base_/blazeface.yml`；
+Improved model to add FPN and SSH neck structure, configuration files can be referenced to `configs/face_detection/_base_/blazeface_fpn.yml`, You can configure FPN and SSH as required
+```yaml
+BlazeNet:
+   blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
+   double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
+                           [96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
+   act: hard_swish #Configure Blaze Block activation function in Backbone. The basic model is Relu. hard_swish is needed to add FPN and SSH
+
+BlazeNeck:
+   neck_type : fpn_ssh #only_fpn, only_ssh and fpn_ssh
+   in_channel: [96,96]
+```
+
+
+
+### Training and Evaluation
+The training process and evaluation process methods are consistent with other algorithms, please refer to [GETTING_STARTED_cn.md](../../docs/tutorials/GETTING_STARTED_cn.md)。  
+**Attention:** Face detection models currently do not support training and evaluation.
+
+#### Evaluated on the WIDER-FACE Dataset
+- Step 1: Evaluate and generate a result file:
+```shell
+python -u tools/eval.py -c configs/face_detection/blazeface_1000e.yml \
+       -o weights=output/blazeface_1000e/model_final \
+       multi_scale=True
+```
+Set `multi_scale=True` for multi-scale evaluation. After evaluation, test results in TXT format will be generated in `output/pred`.
+
+- Step 2: Download the official evaluation script and Ground Truth file:
+```
+wget http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/eval_script/eval_tools.zip
+unzip eval_tools.zip && rm -f eval_tools.zip
+```
+
+- Step 3: Start the evaluation
+
+Method 1: Python evaluation:
+```
+git clone https://github.com/wondervictor/WiderFace-Evaluation.git
+cd WiderFace-Evaluation
+# compile
+python3 setup.py build_ext --inplace
+# Begin to assess
+python3 evaluation.py -p /path/to/PaddleDetection/output/pred -g /path/to/eval_tools/ground_truth
+```
+
+Method 2: MatLab evaluation:
+```
+# Change the name of save result path and draw curve in `eval_tools/wider_eval.m`:
+pred_dir = './pred';  
+legend_name = 'Paddle-BlazeFace';
+
+`wider_eval.m` is the main implementation of the evaluation module. Run the following command:
+matlab -nodesktop -nosplash -nojvm -r "run wider_eval.m;quit;"
+```
+
+### Use by Python Code
+In order to support development, here is an example of using the Paddle Detection whl package to make predictions through Python code.
+```python
+import cv2
+import paddle
+import numpy as np
+from ppdet.core.workspace import load_config
+from ppdet.engine import Trainer
+from ppdet.metrics import get_infer_results
+from ppdet.data.transform.operators import NormalizeImage, Permute
+
+
+if __name__ == '__main__':
+    # prepare for the parameters
+    config_path = 'PaddleDetection/configs/face_detection/blazeface_1000e.yml'
+    cfg = load_config(config_path)
+    weight_path = 'PaddleDetection/output/blazeface_1000e.pdparams'
+    infer_img_path = 'PaddleDetection/demo/hrnet_demo.jpg'
+    cfg.weights = weight_path
+    bbox_thre = 0.8
+    paddle.set_device('gpu')
+    # create the class object
+    trainer = Trainer(cfg, mode='test')
+    trainer.load_weights(cfg.weights)
+    trainer.model.eval()
+    normaler = NormalizeImage(mean=[123, 117, 104], std=[127.502231, 127.502231, 127.502231], is_scale=False)
+    permuter = Permute()
+    # read the image file
+    im = cv2.imread(infer_img_path)
+    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+    # prepare for the data dict
+    data_dict = {'image': im}
+    data_dict = normaler(data_dict)
+    data_dict = permuter(data_dict)
+    h, w, c = im.shape
+    data_dict['im_id'] = paddle.Tensor(np.array([[0]]))
+    data_dict['im_shape'] = paddle.Tensor(np.array([[h, w]], dtype=np.float32))
+    data_dict['scale_factor'] = paddle.Tensor(np.array([[1., 1.]], dtype=np.float32))
+    data_dict['image'] = paddle.Tensor(data_dict['image'].reshape((1, c, h, w)))
+    data_dict['curr_iter'] = paddle.Tensor(np.array([0]))
+    # do the prediction
+    outs = trainer.model(data_dict)
+    # to do the postprocess to get the final bbox info
+    for key in ['im_shape', 'scale_factor', 'im_id']:
+        outs[key] = data_dict[key]
+    for key, value in outs.items():
+        outs[key] = value.numpy()
+    clsid2catid, catid2name = {0: 'face'}, {0: 0}
+    batch_res = get_infer_results(outs, clsid2catid)
+    bbox = [sub_dict for sub_dict in batch_res['bbox'] if sub_dict['score'] > bbox_thre]
+    print(bbox)
+```
+
+
+## Citations
+
+```
+@article{bazarevsky2019blazeface,
+      title={BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs},
+      author={Valentin Bazarevsky and Yury Kartynnik and Andrey Vakunov and Karthik Raveendran and Matthias Grundmann},
+      year={2019},
+      eprint={1907.05047},
+      archivePrefix={arXiv},
+```
--- a/paddle_detection/configs/face_detection/_base_/blazeface.yml
+++ b/paddle_detection/configs/face_detection/_base_/blazeface.yml
@@ -0,0 +1,45 @@
+architecture: BlazeFace
+
+BlazeFace:
+  backbone: BlazeNet
+  neck: BlazeNeck
+  blaze_head: FaceHead
+  post_process: BBoxPostProcess
+
+BlazeNet:
+  blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
+  double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
+                          [96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
+  act: relu
+
+BlazeNeck:
+  neck_type : None
+  in_channel: [96,96]
+
+FaceHead:
+  in_channels: [96,96]
+  anchor_generator: AnchorGeneratorSSD
+  loss: SSDLoss
+
+SSDLoss:
+  overlap_threshold: 0.35
+
+AnchorGeneratorSSD:
+  steps: [8., 16.]
+  aspect_ratios: [[1.], [1.]]
+  min_sizes: [[16.,24.], [32., 48., 64., 80., 96., 128.]]
+  max_sizes: [[], []]
+  offset: 0.5
+  flip: False
+  min_max_aspect_ratios_order: false
+
+BBoxPostProcess:
+  decode:
+    name: SSDBox
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 750
+    score_threshold: 0.01
+    nms_threshold: 0.3
+    nms_top_k: 5000
+    nms_eta: 1.0
--- a/paddle_detection/configs/face_detection/_base_/blazeface_fpn.yml
+++ b/paddle_detection/configs/face_detection/_base_/blazeface_fpn.yml
@@ -0,0 +1,45 @@
+architecture: BlazeFace
+
+BlazeFace:
+  backbone: BlazeNet
+  neck: BlazeNeck
+  blaze_head: FaceHead
+  post_process: BBoxPostProcess
+
+BlazeNet:
+  blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
+  double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
+                          [96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
+  act: hard_swish
+
+BlazeNeck:
+  neck_type : fpn_ssh
+  in_channel: [96,96]
+
+FaceHead:
+  in_channels: [48, 48]
+  anchor_generator: AnchorGeneratorSSD
+  loss: SSDLoss
+
+SSDLoss:
+  overlap_threshold: 0.35
+
+AnchorGeneratorSSD:
+  steps: [8., 16.]
+  aspect_ratios: [[1.], [1.]]
+  min_sizes: [[16.,24.], [32., 48., 64., 80., 96., 128.]]
+  max_sizes: [[], []]
+  offset: 0.5
+  flip: False
+  min_max_aspect_ratios_order: false
+
+BBoxPostProcess:
+  decode:
+    name: SSDBox
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 750
+    score_threshold: 0.01
+    nms_threshold: 0.3
+    nms_top_k: 5000
+    nms_eta: 1.0
--- a/paddle_detection/configs/face_detection/_base_/face_reader.yml
+++ b/paddle_detection/configs/face_detection/_base_/face_reader.yml
@@ -0,0 +1,44 @@
+worker_num: 2
+TrainReader:
+  inputs_def:
+    num_max_boxes: 90
+  sample_transforms:
+    - Decode: {}
+    - RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+    - RandomFlip: {}
+    - CropWithDataAchorSampling: {
+      anchor_sampler: [[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]],
+      batch_sampler: [
+        [1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
+        [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
+        [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
+        [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
+        [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
+      ],
+      target_size: 640}
+    - Resize: {target_size: [640, 640], keep_ratio: False, interp: 1}
+    - NormalizeBox: {}
+    - PadBox: {num_max_boxes: 90}
+  batch_transforms:
+    - NormalizeImage: {mean:  [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
+    - Permute: {}
+  batch_size: 8
+  shuffle: true
+  drop_last: true
+
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - NormalizeImage: {mean:  [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
+    - Permute: {}
+  batch_size: 1
+
+
+TestReader:
+  sample_transforms:
+    - Decode: {}
+    - NormalizeImage: {mean:  [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
+    - Permute: {}
+  batch_size: 1
--- a/paddle_detection/configs/face_detection/_base_/optimizer_1000e.yml
+++ b/paddle_detection/configs/face_detection/_base_/optimizer_1000e.yml
@@ -0,0 +1,21 @@
+epoch: 1000
+
+LearningRate:
+  base_lr: 0.001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones:
+    - 333
+    - 800
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.0
+    type: RMSProp
+  regularizer:
+    factor: 0.0005
+    type: L2
--- a/paddle_detection/configs/face_detection/blazeface_1000e.yml
+++ b/paddle_detection/configs/face_detection/blazeface_1000e.yml
@@ -0,0 +1,9 @@
+_BASE_: [
+  '../datasets/wider_face.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1000e.yml',
+  '_base_/blazeface.yml',
+  '_base_/face_reader.yml',
+]
+weights: output/blazeface_1000e/model_final
+multi_scale_eval: True
--- a/paddle_detection/configs/face_detection/blazeface_fpn_ssh_1000e.yml
+++ b/paddle_detection/configs/face_detection/blazeface_fpn_ssh_1000e.yml
@@ -0,0 +1,9 @@
+_BASE_: [
+  '../datasets/wider_face.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1000e.yml',
+  '_base_/blazeface_fpn.yml',
+  '_base_/face_reader.yml',
+]
+weights: output/blazeface_fpn_ssh_1000e/model_final
+multi_scale_eval: True
--- a/paddle_detection/configs/faster_rcnn/README.md
+++ b/paddle_detection/configs/faster_rcnn/README.md
@@ -0,0 +1,38 @@
+# Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks
+
+## Model Zoo
+
+| 骨架网络             | 网络类型       | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP |                           下载                          | 配置文件 |
+| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
+| ResNet50             | Faster         |    1    |   1x    |     ----     |  36.7  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_1x_coco.yml) |
+| ResNet50-vd          | Faster         |    1    |   1x    |     ----     |  37.6  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_1x_coco.yml) |
+| ResNet101            | Faster         |    1    |   1x    |     ----     |  39.0  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_1x_coco.pdparams) | [配置文件](./faster_rcnn_r101_1x_coco.yml) |
+| ResNet34-FPN         | Faster         |    1    |   1x    |     ----     |  37.8  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r34_fpn_1x_coco.yml) |
+| ResNet34-FPN-MultiScaleTest | Faster  |    1    |   1x    |     ----     |  38.2  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_fpn_multiscaletest_1x_coco.pdparams) | [配置文件](./faster_rcnn_r34_fpn_multiscaletest_1x_coco.yml) |
+| ResNet34-vd-FPN      | Faster         |    1    |   1x    |     ----     |  38.5  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_vd_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r34_vd_fpn_1x_coco.yml) |
+| ResNet50-FPN         | Faster         |    1    |   1x    |     ----     |  38.4  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_fpn_1x_coco.yml) |
+| ResNet50-FPN         | Faster         |    1    |   2x    |     ----     |  40.0  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_r50_fpn_2x_coco.yml) |
+| ResNet50-vd-FPN      | Faster         |    1    |   1x    |     ----     |  39.5  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_1x_coco.yml) |
+| ResNet50-vd-FPN      | Faster         |    1    |   2x    |     ----     |  40.8  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_2x_coco.yml) |
+| ResNet101-FPN        | Faster         |    1    |   2x    |     ----     |  41.4  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_r101_fpn_2x_coco.yml) |
+| ResNet101-vd-FPN     | Faster         |    1    |   1x    |     ----     |  42.0  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r101_vd_fpn_1x_coco.yml) |
+| ResNet101-vd-FPN     | Faster         |    1    |   2x    |     ----     |  43.0  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_vd_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_r101_vd_fpn_2x_coco.yml) |
+| ResNeXt101-vd-FPN    | Faster         |    1    |   1x    |     ----     |  43.4  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_x101_vd_64x4d_fpn_1x_coco.yml) |
+| ResNeXt101-vd-FPN    | Faster         |    1    |   2x    |     ----     |  44.0  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_x101_vd_64x4d_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_x101_vd_64x4d_fpn_2x_coco.yml) |
+| ResNet50-vd-SSLDv2-FPN | Faster       |    1    |   1x    |     ----     |  41.4  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
+| ResNet50-vd-SSLDv2-FPN | Faster       |    1    |   2x    |     ----     |  42.3  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
+| Swin-Tiny-FPN | Faster       |    2    |   1x    |     ----     |  42.6  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_swin_tiny_fpn_1x_coco.yml) |
+| Swin-Tiny-FPN | Faster       |    2    |   2x    |     ----     |  44.8  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_swin_tiny_fpn_2x_coco.yml) |
+| Swin-Tiny-FPN | Faster       |    2    |   3x    |     ----     |  45.3  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_3x_coco.pdparams) | [配置文件](../swin/faster_rcnn_swin_tiny_fpn_3x_coco.yml) |
+
+## Citations
+```
+@article{Ren_2017,
+   title={Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks},
+   journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
+   publisher={Institute of Electrical and Electronics Engineers (IEEE)},
+   author={Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun, Jian},
+   year={2017},
+   month={Jun},
+}
+```
--- a/paddle_detection/configs/faster_rcnn/_base_/faster_fpn_reader.yml
+++ b/paddle_detection/configs/faster_rcnn/_base_/faster_fpn_reader.yml
@@ -0,0 +1,40 @@
+worker_num: 2
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
--- a/paddle_detection/configs/faster_rcnn/_base_/faster_rcnn_r50.yml
+++ b/paddle_detection/configs/faster_rcnn/_base_/faster_rcnn_r50.yml
@@ -0,0 +1,66 @@
+architecture: FasterRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+FasterRCNN:
+  backbone: ResNet
+  rpn_head: RPNHead
+  bbox_head: BBoxHead
+  # post process
+  bbox_post_process: BBoxPostProcess
+
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [2]
+  num_stages: 3
+
+RPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [32, 64, 128, 256, 512]
+    strides: [16]
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 12000
+    post_nms_top_n: 2000
+    topk_after_collect: False
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 6000
+    post_nms_top_n: 1000
+
+
+BBoxHead:
+  head: Res5Head
+  roi_extractor:
+    resolution: 14
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+  with_pool: true
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bg_thresh: 0.5
+  fg_thresh: 0.5
+  fg_fraction: 0.25
+  use_random: True
+
+BBoxPostProcess:
+  decode: RCNNBox
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5
--- a/paddle_detection/configs/faster_rcnn/_base_/faster_rcnn_r50_fpn.yml
+++ b/paddle_detection/configs/faster_rcnn/_base_/faster_rcnn_r50_fpn.yml
@@ -0,0 +1,73 @@
+architecture: FasterRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+FasterRCNN:
+  backbone: ResNet
+  neck: FPN
+  rpn_head: RPNHead
+  bbox_head: BBoxHead
+  # post process
+  bbox_post_process: BBoxPostProcess
+
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+
+FPN:
+  out_channel: 256
+
+RPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [[32], [64], [128], [256], [512]]
+    strides: [4, 8, 16, 32, 64]
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 1000
+    topk_after_collect: True
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+
+BBoxHead:
+  head: TwoFCHead
+  roi_extractor:
+    resolution: 7
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bg_thresh: 0.5
+  fg_thresh: 0.5
+  fg_fraction: 0.25
+  use_random: True
+
+TwoFCHead:
+  out_channel: 1024
+
+
+BBoxPostProcess:
+  decode: RCNNBox
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5
--- a/paddle_detection/configs/faster_rcnn/_base_/faster_rcnn_swin_reader.yml
+++ b/paddle_detection/configs/faster_rcnn/_base_/faster_rcnn_swin_reader.yml
@@ -0,0 +1,41 @@
+worker_num: 2
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResizeCrop: {resizes: [400, 500, 600], cropsizes: [[384, 600], ], prob: 0.5}
+  - RandomResize: {target_size: [[480, 1333], [512, 1333], [544, 1333], [576, 1333], [608, 1333], [640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: True, interp: 2}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 2
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  inputs_def:
+    image_shape: [-1, 3, 640, 640]
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: 640, keep_ratio: True}
+  - Pad: {size: 640}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
--- a/paddle_detection/configs/faster_rcnn/_base_/faster_rcnn_swin_tiny_fpn.yml
+++ b/paddle_detection/configs/faster_rcnn/_base_/faster_rcnn_swin_tiny_fpn.yml
@@ -0,0 +1,70 @@
+architecture: FasterRCNN
+# pretrain_weights: # rewrite in SwinTransformer.pretrained in ppdet/modeling/backbones/swin_transformer.py
+
+FasterRCNN:
+  backbone: SwinTransformer
+  neck: FPN
+  rpn_head: RPNHead
+  bbox_head: BBoxHead
+  bbox_post_process: BBoxPostProcess
+
+SwinTransformer:
+  arch: 'swin_T_224'
+  ape: false
+  drop_path_rate: 0.1
+  patch_norm: true
+  out_indices: [0, 1, 2, 3]
+  pretrained: https://paddledet.bj.bcebos.com/models/pretrained/swin_tiny_patch4_window7_224_22kto1k_pretrained.pdparams
+
+FPN:
+  out_channel: 256
+
+RPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [[32], [64], [128], [256], [512]]
+    strides: [4, 8, 16, 32, 64]
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 1000
+    topk_after_collect: True
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+
+BBoxHead:
+  head: TwoFCHead
+  roi_extractor:
+    resolution: 7
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bg_thresh: 0.5
+  fg_thresh: 0.5
+  fg_fraction: 0.25
+  use_random: True
+
+TwoFCHead:
+  out_channel: 1024
+
+BBoxPostProcess:
+  decode: RCNNBox
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5
--- a/paddle_detection/configs/faster_rcnn/_base_/faster_reader.yml
+++ b/paddle_detection/configs/faster_rcnn/_base_/faster_reader.yml
@@ -0,0 +1,40 @@
+worker_num: 2
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: -1}
+  batch_size: 1
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: -1}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: -1}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
--- a/paddle_detection/configs/faster_rcnn/_base_/optimizer_1x.yml
+++ b/paddle_detection/configs/faster_rcnn/_base_/optimizer_1x.yml
@@ -0,0 +1,19 @@
+epoch: 12
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
--- a/paddle_detection/configs/faster_rcnn/_base_/optimizer_swin_1x.yml
+++ b/paddle_detection/configs/faster_rcnn/_base_/optimizer_swin_1x.yml
@@ -0,0 +1,20 @@
+epoch: 12
+
+LearningRate:
+  base_lr: 0.0001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000
+
+OptimizerBuilder:
+  clip_grad_by_norm: 1.0
+  optimizer:
+    type: AdamW
+    weight_decay: 0.05
+    param_groups:
+      - params: ['absolute_pos_embed', 'relative_position_bias_table', 'norm']
+        weight_decay: 0.0
--- a/paddle_detection/configs/faster_rcnn/faster_rcnn_r101_1x_coco.yml
+++ b/paddle_detection/configs/faster_rcnn/faster_rcnn_r101_1x_coco.yml
@@ -0,0 +1,14 @@
+_BASE_: [
+  'faster_rcnn_r50_1x_coco.yml',
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
+weights: output/faster_rcnn_r101_1x_coco/model_final
+
+ResNet:
+  # index 0 stands for res2
+  depth: 101
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [2]
+  num_stages: 3
--- a/paddle_detection/configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.yml
+++ b/paddle_detection/configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.yml
@@ -0,0 +1,14 @@
+_BASE_: [
+  'faster_rcnn_r50_fpn_1x_coco.yml',
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
+weights: output/faster_rcnn_r101_fpn_1x_coco/model_final
+
+ResNet:
+  # index 0 stands for res2
+  depth: 101
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
--- a/paddle_detection/configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.yml
+++ b/paddle_detection/configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.yml
@@ -0,0 +1,25 @@
+_BASE_: [
+  'faster_rcnn_r50_fpn_1x_coco.yml',
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
+weights: output/faster_rcnn_r101_fpn_2x_coco/model_final
+
+ResNet:
+  # index 0 stands for res2
+  depth: 101
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+
+epoch: 24
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [16, 22]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000
--- a/Show More
+++ b/Show More