移动paddle_detection

This commit is contained in:
2024-09-24 17:02:56 +08:00
parent 90a6d5ec75
commit 3438cf6e0e
2025 changed files with 11 additions and 11 deletions

View File

@@ -0,0 +1,26 @@
# Swin Transformer
## COCO Model Zoo
| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | mAP<sup>val<br>0.5:0.95 | 下载 | 配置文件 |
| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
| swin_T_224 | Faster R-CNN | 2 | 36e | ---- | 45.3 | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_3x_coco.pdparams) | [配置文件](./faster_rcnn_swin_tiny_fpn_3x_coco.yml) |
| swin_T_224 | PP-YOLOE+ | 8 | 36e | ---- | 44.7 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_swin_tiny_36e_coco.pdparams) | [配置文件](./ppyoloe_plus_swin_tiny_36e_coco.yml) |
## Citations
```
@article{liu2021Swin,
title={Swin Transformer: Hierarchical Vision Transformer using Shifted Windows},
author={Liu, Ze and Lin, Yutong and Cao, Yue and Hu, Han and Wei, Yixuan and Zhang, Zheng and Lin, Stephen and Guo, Baining},
journal={arXiv preprint arXiv:2103.14030},
year={2021}
}
@inproceedings{liu2021swinv2,
title={Swin Transformer V2: Scaling Up Capacity and Resolution},
author={Ze Liu and Han Hu and Yutong Lin and Zhuliang Yao and Zhenda Xie and Yixuan Wei and Jia Ning and Yue Cao and Zheng Zhang and Li Dong and Furu Wei and Baining Guo},
booktitle={International Conference on Computer Vision and Pattern Recognition (CVPR)},
year={2022}
}
```

View File

@@ -0,0 +1,82 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'../faster_rcnn/_base_/faster_rcnn_r50_fpn.yml',
'../faster_rcnn/_base_/faster_fpn_reader.yml',
]
weights: output/faster_rcnn_swin_tiny_fpn_3x_coco/model_final
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/swin_tiny_patch4_window7_224_22kto1k_pretrained.pdparams
FasterRCNN:
backbone: SwinTransformer
neck: FPN
rpn_head: RPNHead
bbox_head: BBoxHead
bbox_post_process: BBoxPostProcess
SwinTransformer:
arch: 'swin_T_224' # ['swin_T_224', 'swin_S_224', 'swin_B_224', 'swin_L_224', 'swin_B_384', 'swin_L_384']
ape: false
drop_path_rate: 0.1
patch_norm: true
out_indices: [0, 1, 2, 3]
worker_num: 2
TrainReader:
sample_transforms:
- Decode: {}
- RandomResizeCrop: {resizes: [400, 500, 600], cropsizes: [[384, 600], ], prob: 0.5}
- RandomResize: {target_size: [[480, 1333], [512, 1333], [544, 1333], [576, 1333], [608, 1333], [640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: True, interp: 2}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 2
shuffle: true
drop_last: true
collate_batch: false
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 1
TestReader:
inputs_def:
image_shape: [-1, 3, 640, 640] # TODO deploy: set fixes shape currently
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: 640, keep_ratio: True}
- Pad: {size: 640}
- NormalizeImage: {is_scale: true, mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225]}
- Permute: {}
batch_size: 1
epoch: 36
LearningRate:
base_lr: 0.0001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [24, 33]
- !LinearWarmup
start_factor: 0.1
steps: 1000
OptimizerBuilder:
clip_grad_by_norm: 1.0
optimizer:
type: AdamW
weight_decay: 0.05
param_groups:
- params: ['absolute_pos_embed', 'relative_position_bias_table', 'norm']
weight_decay: 0.0

View File

@@ -0,0 +1,67 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'../ppyoloe/_base_/ppyoloe_plus_crn.yml',
'../ppyoloe/_base_/ppyoloe_plus_reader.yml',
]
depth_mult: 0.33 # s version
width_mult: 0.50
log_iter: 50
snapshot_epoch: 4
weights: output/ppyoloe_plus_swin_tiny_36e_coco/model_final
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/swin_tiny_patch4_window7_224_22kto1k_pretrained.pdparams
architecture: PPYOLOE
norm_type: sync_bn
use_ema: true
ema_decay: 0.9998
ema_black_list: ['proj_conv.weight']
custom_black_list: ['reduce_mean']
PPYOLOE:
backbone: SwinTransformer
neck: CustomCSPPAN
yolo_head: PPYOLOEHead
post_process: ~
SwinTransformer:
arch: 'swin_T_224' # ['swin_T_224', 'swin_S_224', 'swin_B_224', 'swin_L_224', 'swin_B_384', 'swin_L_384']
ape: false
drop_path_rate: 0.1
patch_norm: true
out_indices: [1, 2, 3]
PPYOLOEHead:
static_assigner_epoch: 12
nms:
nms_top_k: 1000
keep_top_k: 300
score_threshold: 0.01
nms_threshold: 0.7
TrainReader:
batch_size: 8
epoch: 36
LearningRate:
base_lr: 0.0001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [24, 33]
- !LinearWarmup
start_factor: 0.1
steps: 1000
OptimizerBuilder:
clip_grad_by_norm: 1.0
optimizer:
type: AdamW
weight_decay: 0.05
param_groups:
- params: ['absolute_pos_embed', 'relative_position_bias_table', 'norm']
weight_decay: 0.0