更换文档检测模型

This commit is contained in:
2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions

View File

@@ -0,0 +1,27 @@
# Enhanced Training of Query-Based Object Detection via Selective Query Recollection
## Introduction
This paper investigates a phenomenon where query-based object detectors mispredict at the last decoding stage while predicting correctly at an intermediate stage. It design and present Selective Query Recollection (SQR), a simple and effective training strategy for query-based object detectors. It cumulatively collects intermediate queries as decoding stages go deeper and selectively forwards the queries to the downstream stages aside from the sequential structure.
## Model Zoo
| Backbone | Model | Images/GPU | GPUs | Epochs | Box AP | Config | Download |
|:--------:|:-------------------:|:----------:|:----:|:------:|:------:|:------------------------------------------------:|:---------:|
| R-50 | Deformable DETR SQR | 1 | 4 | 12 | 32.9 | [config](./deformable_detr_sqr_r50_12e_coco.yml) |[model](https://bj.bcebos.com/v1/paddledet/models/deformable_detr_sqr_r50_12e_coco.pdparams) |
> We did not find the config for the 12 epochs experiment in the paper, which we wrote ourselves with reference to the standard 12 epochs config in mmdetection. The same accuracy was obtained in the official project and in this project with this [config](./deformable_detr_sqr_r50_12e_coco.yml). <br> We haven't finished validating the 50 epochs experiment yet, if you need the config, please refer to [here](https://pan.baidu.com/s/1eWavnAiRoFXm3mMlpn9WPw?pwd=3z6m).
## Citations
```
@InProceedings{Chen_2023_CVPR,
author = {Chen, Fangyi and Zhang, Han and Hu, Kai and Huang, Yu-Kai and Zhu, Chenchen and Savvides, Marios},
title = {Enhanced Training of Query-Based Object Detection via Selective Query Recollection},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2023},
pages = {23756-23765}
}
```

View File

@@ -0,0 +1,50 @@
architecture: DETR
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vb_normal_pretrained.pdparams
hidden_dim: 256
use_focal_loss: True
DETR:
backbone: ResNet
transformer: QRDeformableTransformer
detr_head: DeformableDETRHead
post_process: DETRPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [1, 2, 3]
lr_mult_list: [0.0, 0.1, 0.1, 0.1]
num_stages: 4
QRDeformableTransformer:
num_queries: 300
position_embed_type: sine
nhead: 8
num_encoder_layers: 6
num_decoder_layers: 6
dim_feedforward: 1024
dropout: 0.1
activation: relu
num_feature_levels: 4
num_encoder_points: 4
num_decoder_points: 4
start_q: [0, 0, 1, 2, 4, 7, 12]
end_q: [1, 2, 4, 7, 12, 20, 33]
DeformableDETRHead:
num_mlp_layers: 3
DETRLoss:
loss_coeff: {class: 2, bbox: 5, giou: 2}
aux_loss: True
HungarianMatcher:
matcher_coeff: {class: 2, bbox: 5, giou: 2}

View File

@@ -0,0 +1,44 @@
worker_num: 4
TrainReader:
sample_transforms:
- Decode: {}
- RandomFlip: {prob: 0.5}
- RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
transforms2: [
RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
RandomSizeCrop: { min_size: 384, max_size: 600 },
RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- NormalizeBox: {}
- BboxXYXY2XYWH: {}
- Permute: {}
batch_transforms:
- PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
batch_size: 4
shuffle: true
drop_last: true
collate_batch: false
use_shared_memory: false
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_size: 1
shuffle: false
drop_last: false
TestReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_size: 1
shuffle: false
drop_last: false

View File

@@ -0,0 +1,16 @@
epoch: 50
LearningRate:
base_lr: 0.0002
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [40]
use_warmup: false
OptimizerBuilder:
clip_grad_by_norm: 0.1
regularizer: false
optimizer:
type: AdamW
weight_decay: 0.0001

View File

@@ -0,0 +1,27 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/deformable_detr_sqr_r50.yml',
'_base_/deformable_detr_sqr_reader.yml',
]
weights: output/deformable_detr_sqr_r50_12e_coco/model_final
find_unused_parameters: True
# a standard 1x schedule
epoch: 12
LearningRate:
base_lr: 0.0002
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [8, 11]
use_warmup: false
OptimizerBuilder:
clip_grad_by_norm: 0.1
regularizer: false
optimizer:
type: AdamW
weight_decay: 0.0001