更换文档检测模型

This commit is contained in:
2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions

View File

@@ -0,0 +1,49 @@
# Group DETR: Fast DETR training with group-wise one-to-many assignment
# Group DETR v2: Strong object detector with encoder-decoder pretraining
## Introduction
[Group DETR](https://arxiv.org/pdf/2207.13085.pdf) is an object detection model based on DETR. We reproduced the model of the paper.
[Group DETR v2](https://arxiv.org/pdf/2211.03594.pdf) is a strong object detection model based on DINO and Group DETR. We reproduced the model of the paper.
## Model Zoo
| Backbone | Model | Epochs | Resolution |Box AP | Config | Download |
|:------:|:---------------:|:------:|:------:|:---------------------------------------:|:--------------------------------------------------------------------------------:|:------:|
| R-50 | dino_r50_4scale | 12 | (800, 1333) | 49.6 | [config](./group_dino_r50_4scale_1x_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/group_dino_r50_4scale_1x_coco.pdparams) |
| Vit-huge | dino_vit_huge_4scale | 12 | (1184, 2000) | 63.3 | [config](./group_dino_vit_huge_4scale_1x_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/group_dino_vit_huge_4scale_1x_coco.pdparams) |
**Notes:**
- Group DETR is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
- Group DETRv2 requires a ViT-Huge encoder pre-trained and fine-tuned on ImageNet-1K in a self-supervised manner, a detector pre-trained on Object365, and finally it is fine-tuned on trainCOCO. Group DETRv2 is also evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
- Group DETR and Group DETRv2 are both use 4GPU to train.
GPU multi-card training
```bash
python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/group_detr/group_dino_r50_4scale_1x_coco.yml --fleet --eval
```
```bash
python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/group_detr/group_dino_vit_huge_4scale_1x_coco.yml --fleet --eval
```
## Citations
```
@article{chen2022group,
title={Group DETR: Fast DETR training with group-wise one-to-many assignment},
author={Chen, Qiang and Chen, Xiaokang and Wang, Jian and Feng, Haocheng and Han, Junyu and Ding, Errui and Zeng, Gang and Wang, Jingdong},
journal={arXiv preprint arXiv:2207.13085},
volume={1},
number={2},
year={2022}
}
@article{chen2022group,
title={Group DETR v2: Strong object detector with encoder-decoder pretraining},
author={Chen, Qiang and Wang, Jian and Han, Chuchu and Zhang, Shan and Li, Zexian and Chen, Xiaokang and Chen, Jiahui and Wang, Xiaodi and Han, Shuming and Zhang, Gang and others},
journal={arXiv preprint arXiv:2211.03594},
year={2022}
}
```

View File

@@ -0,0 +1,44 @@
worker_num: 2
TrainReader:
sample_transforms:
- Decode: {}
- RandomFlip: {prob: 0.5}
- RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152, 1184], max_size: 2000 } ],
transforms2: [
RandomShortSideResize: { short_side_sizes: [400, 500, 600, 700, 800, 900] },
RandomSizeCrop: { min_size: 384, max_size: 900 },
RandomShortSideResize: { short_side_sizes: [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152, 1184], max_size: 2000 } ]
}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- NormalizeBox: {}
- BboxXYXY2XYWH: {}
- Permute: {}
batch_transforms:
- PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
batch_size: 2
shuffle: true
drop_last: true
collate_batch: false
use_shared_memory: false
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [1184, 2000], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_size: 1
shuffle: false
drop_last: false
TestReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [1184, 2000], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_size: 1
shuffle: false
drop_last: false

View File

@@ -0,0 +1,44 @@
worker_num: 2
TrainReader:
sample_transforms:
- Decode: {}
- RandomFlip: {prob: 0.5}
- RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
transforms2: [
RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
RandomSizeCrop: { min_size: 384, max_size: 600 },
RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- NormalizeBox: {}
- BboxXYXY2XYWH: {}
- Permute: {}
batch_transforms:
- PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
batch_size: 2
shuffle: true
drop_last: true
collate_batch: false
use_shared_memory: false
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_size: 1
shuffle: false
drop_last: false
TestReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_size: 1
shuffle: false
drop_last: false

View File

@@ -0,0 +1,53 @@
architecture: DETR
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
hidden_dim: 256
use_focal_loss: True
DETR:
backbone: ResNet
transformer: GroupDINOTransformer
detr_head: DINOHead
post_process: DETRPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [1, 2, 3]
lr_mult_list: [0.0, 0.1, 0.1, 0.1]
num_stages: 4
GroupDINOTransformer:
num_queries: 900
position_embed_type: sine
num_levels: 4
nhead: 8
num_encoder_layers: 6
num_decoder_layers: 6
dim_feedforward: 2048
dropout: 0.0
activation: relu
pe_temperature: 20
pe_offset: 0.0
num_denoising: 100
label_noise_ratio: 0.5
box_noise_scale: 1.0
learnt_init_query: True
dual_queries: True
dual_groups: 10
DINOHead:
loss:
name: DINOLoss
loss_coeff: {class: 1, bbox: 5, giou: 2}
aux_loss: True
matcher:
name: HungarianMatcher
matcher_coeff: {class: 2, bbox: 5, giou: 2}
DETRPostProcess:
num_top_queries: 300
dual_queries: True
dual_groups: 10

View File

@@ -0,0 +1,68 @@
architecture: DETR
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/vit_huge_mae_patch14_dec512d8b_pretrained.pdparams
hidden_dim: 256
use_focal_loss: True
DETR:
backbone: VisionTransformer2D
neck: SimpleFeaturePyramid
transformer: GroupDINOTransformer
detr_head: DINOHead
post_process: DETRPostProcess
VisionTransformer2D:
patch_size: 16
embed_dim: 1280
depth: 32
num_heads: 16
mlp_ratio: 4
attn_bias: True
drop_rate: 0.0
drop_path_rate: 0.1
lr_decay_rate: 0.7
global_attn_indexes: [7, 15, 23, 31]
use_abs_pos: False
use_rel_pos: True
rel_pos_zero_init: True
window_size: 14
out_indices: [ 31, ]
SimpleFeaturePyramid:
out_channels: 256
num_levels: 4
GroupDINOTransformer:
num_queries: 900
position_embed_type: sine
pe_temperature: 20
pe_offset: 0.0
num_levels: 4
nhead: 8
num_encoder_layers: 6
num_decoder_layers: 6
dim_feedforward: 2048
use_input_proj: False
dropout: 0.0
activation: relu
num_denoising: 100
label_noise_ratio: 0.5
box_noise_scale: 1.0
learnt_init_query: True
dual_queries: True
dual_groups: 10
DINOHead:
loss:
name: DINOLoss
loss_coeff: {class: 1, bbox: 5, giou: 2}
aux_loss: True
matcher:
name: HungarianMatcher
matcher_coeff: {class: 2, bbox: 5, giou: 2}
DETRPostProcess:
num_top_queries: 300
dual_queries: True
dual_groups: 10

View File

@@ -0,0 +1,16 @@
epoch: 12
LearningRate:
base_lr: 0.0001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [11]
use_warmup: false
OptimizerBuilder:
clip_grad_by_norm: 0.1
regularizer: false
optimizer:
type: AdamW
weight_decay: 0.0001

View File

@@ -0,0 +1,11 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/group_dino_r50.yml',
'_base_/dino_reader.yml',
]
weights: output/group_dino_r50_4scale_1x_coco/model_final
find_unused_parameters: True
log_iter: 100

View File

@@ -0,0 +1,11 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/group_dino_vit_huge.yml',
'_base_/dino_2000_reader.yml',
]
weights: output/group_dino_vit_huge_4scale_1x_coco/model_final
find_unused_parameters: True
log_iter: 100