更换文档检测模型

This commit is contained in:
2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions

View File

@@ -0,0 +1,76 @@
# Co-tuning for Transfer Learning <br />Supervised Contrastive Learning
## Data preparation
以[Kaggle数据集](https://www.kaggle.com/andrewmvd/road-sign-detection) 比赛数据为例,说明如何准备自定义数据。
Kaggle上的 [road-sign-detection](https://www.kaggle.com/andrewmvd/road-sign-detection) 比赛数据包含877张图像数据类别4类crosswalkspeedlimitstoptrafficlight。
可从Kaggle上下载也可以从[下载链接](https://fsdet-dataset.bj.bcebos.com/roadsign_coco.tar.gz) 下载。
分别从原始数据集中每类选取相同样本例如10shots即每类都有十个训练样本训练即可。<br />
工业数据集使用PKU-Market-PCB该数据集用于印刷电路板PCB的瑕疵检测提供了6种常见的PCB缺陷[下载链接](https://fsdet-dataset.bj.bcebos.com/pcb.tar.gz)
## Model Zoo
| 骨架网络 | 网络类型 | 每张GPU图片个数 | 每类样本个数 | Box AP | 下载 | 配置文件 |
| :------------------- | :------------- | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
| ResNet50-vd | Faster | 1 | 10 | 60.1 | [下载链接](https://bj.bcebos.com/v1/paddledet/models/faster_rcnn_r50_vd_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_1x_coco_cotuning_roadsign.yml) |
| PPYOLOE_crn_s | PPYOLOE | 1 | 30 | 17.8 | [下载链接](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_s_80e_contrast_pcb.pdparams) |[配置文件](./ppyoloe_plus_crn_s_80e_contrast_pcb.yml) |
## Compare-cotuning
| 骨架网络 | 网络类型 | 每张GPU图片个数 |每类样本个数 | Cotuning | Box AP |
| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: |
| ResNet50-vd | Faster | 1 | 10 | False | 56.7 |
| ResNet50-vd | Faster | 1 | 10 | True | 60.1 |
## Compare-contrast
| 骨架网络 | 网络类型 | 每张GPU图片个数 | 每类样本个数 | Contrast | Box AP |
| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: |
| PPYOLOE_crn_s | PPYOLOE | 1 | 30 | False | 15.4 |
| PPYOLOE_crn_s | PPYOLOE | 1 | 30 | True | 17.8 |
## Training & Evaluation & Inference
### 1、Training
```
# -c 参数表示指定使用哪个配置文件
# --eval 参数表示边训练边评估训练过程中会保存验证效果最佳的checkpoint
python tools/train.py -c configs/few-shot/faster_rcnn_r50_vd_fpn_1x_coco_cotuning_roadsign.yml --eval
```
### 2、Evaluation
```
# -c 参数表示指定使用哪个配置文件
# -o 参数表示指定配置文件中的全局变量(覆盖配置文件中的设置)
python tools/eval.py -c configs/few-shot/faster_rcnn_r50_vd_fpn_1x_coco_cotuning_roadsign.yml \
-o weights=output/faster_rcnn_r50_vd_fpn_1x_coco_cotuning_roadsign/best_model
```
### 3、Inference
```
# -c 参数表示指定使用哪个配置文件
# --infer_img 参数指定预测图像路径
python tools/infer.py -c configs/few-shot/faster_rcnn_r50_vd_fpn_1x_coco_cotuning_roadsign.yml \
--infer_img=demo/road554.png
```
## Citations
```
@article{you2020co,
title={Co-tuning for transfer learning},
author={You, Kaichao and Kou, Zhi and Long, Mingsheng and Wang, Jianmin},
journal={Advances in Neural Information Processing Systems},
volume={33},
pages={17236--17246},
year={2020}
}
@article{khosla2020supervised,
title={Supervised contrastive learning},
author={Khosla, Prannay and Teterwak, Piotr and Wang, Chen and Sarna, Aaron and Tian, Yonglong and Isola, Phillip and Maschinot, Aaron and Liu, Ce and Krishnan, Dilip},
journal={Advances in Neural Information Processing Systems},
volume={33},
pages={18661--18673},
year={2020}
}
```

View File

@@ -0,0 +1,40 @@
worker_num: 2
TrainReader:
sample_transforms:
- Decode: {}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: true
drop_last: true
collate_batch: false
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: false
drop_last: false
TestReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: false
drop_last: false

View File

@@ -0,0 +1,66 @@
architecture: FasterRCNN
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
FasterRCNN:
backbone: ResNet
rpn_head: RPNHead
bbox_head: BBoxHead
# post process
bbox_post_process: BBoxPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [2]
num_stages: 3
RPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
anchor_sizes: [32, 64, 128, 256, 512]
strides: [16]
rpn_target_assign:
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
use_random: True
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 12000
post_nms_top_n: 2000
topk_after_collect: False
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 6000
post_nms_top_n: 1000
BBoxHead:
head: Res5Head
roi_extractor:
resolution: 14
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
with_pool: true
BBoxAssigner:
batch_size_per_im: 512
bg_thresh: 0.5
fg_thresh: 0.5
fg_fraction: 0.25
use_random: True
BBoxPostProcess:
decode: RCNNBox
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5

View File

@@ -0,0 +1,73 @@
architecture: FasterRCNN
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
FasterRCNN:
backbone: ResNet
neck: FPN
rpn_head: RPNHead
bbox_head: BBoxHead
# post process
bbox_post_process: BBoxPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
FPN:
out_channel: 256
RPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
anchor_sizes: [[32], [64], [128], [256], [512]]
strides: [4, 8, 16, 32, 64]
rpn_target_assign:
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
use_random: True
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 1000
topk_after_collect: True
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
BBoxHead:
head: TwoFCHead
roi_extractor:
resolution: 7
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
BBoxAssigner:
batch_size_per_im: 512
bg_thresh: 0.5
fg_thresh: 0.5
fg_fraction: 0.25
use_random: True
TwoFCHead:
out_channel: 1024
BBoxPostProcess:
decode: RCNNBox
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5

View File

@@ -0,0 +1,40 @@
worker_num: 2
TrainReader:
sample_transforms:
- Decode: {}
- RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
- RandomFlip: {prob: 0.5}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: -1}
batch_size: 1
shuffle: true
drop_last: true
collate_batch: false
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: -1}
batch_size: 1
shuffle: false
drop_last: false
TestReader:
sample_transforms:
- Decode: {}
- Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- PadBatch: {pad_to_stride: -1}
batch_size: 1
shuffle: false
drop_last: false

View File

@@ -0,0 +1,19 @@
epoch: 12
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [8, 11]
- !LinearWarmup
start_factor: 0.1
steps: 1000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2

View File

@@ -0,0 +1,18 @@
epoch: 80
LearningRate:
base_lr: 0.001
schedulers:
- !CosineDecay
max_epochs: 96
- !LinearWarmup
start_factor: 0.
epochs: 5
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0005
type: L2

View File

@@ -0,0 +1,49 @@
architecture: YOLOv3
norm_type: sync_bn
use_ema: true
use_cot: False
ema_decay: 0.9998
ema_black_list: ['proj_conv.weight']
custom_black_list: ['reduce_mean']
YOLOv3:
backbone: CSPResNet
neck: CustomCSPPAN
yolo_head: PPYOLOEHead
post_process: ~
CSPResNet:
layers: [3, 6, 6, 3]
channels: [64, 128, 256, 512, 1024]
return_idx: [1, 2, 3]
use_large_stem: True
use_alpha: True
CustomCSPPAN:
out_channels: [768, 384, 192]
stage_num: 1
block_num: 3
act: 'swish'
spp: true
PPYOLOEHead:
fpn_strides: [32, 16, 8]
grid_cell_scale: 5.0
grid_cell_offset: 0.5
static_assigner_epoch: 30
use_varifocal_loss: True
loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
static_assigner:
name: ATSSAssigner
topk: 9
assigner:
name: TaskAlignedAssigner
topk: 13
alpha: 1.0
beta: 6.0
nms:
name: MultiClassNMS
nms_top_k: 1000
keep_top_k: 300
score_threshold: 0.01
nms_threshold: 0.7

View File

@@ -0,0 +1,40 @@
worker_num: 4
eval_height: &eval_height 640
eval_width: &eval_width 640
eval_size: &eval_size [*eval_height, *eval_width]
TrainReader:
sample_transforms:
- Decode: {}
- RandomDistort: {}
- RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
- RandomCrop: {}
- RandomFlip: {}
batch_transforms:
- BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768], random_size: True, random_interp: True, keep_ratio: False}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
- PadGT: {}
batch_size: 8
shuffle: true
drop_last: true
use_shared_memory: true
collate_batch: true
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
batch_size: 2
TestReader:
inputs_def:
image_shape: [3, *eval_height, *eval_width]
sample_transforms:
- Decode: {}
- Resize: {target_size: *eval_size, keep_ratio: False, interp: 2}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
batch_size: 1

View File

@@ -0,0 +1,67 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_1x.yml',
'_base_/faster_rcnn_r50_fpn.yml',
'_base_/faster_fpn_reader.yml',
]
pretrain_weights: https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_1x_coco.pdparams
weights: output/faster_rcnn_r50_vd_fpn_1x_coco_cotuning_roadsign/model_final
snapshot_epoch: 5
ResNet:
# index 0 stands for res2
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [0,1,2,3]
num_stages: 4
epoch: 30
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [8, 11]
- !LinearWarmup
start_factor: 0.1
steps: 1000
use_cot: True
BBoxHead:
head: TwoFCHead
roi_extractor:
resolution: 7
sampling_ratio: 0
aligned: True
bbox_assigner: BBoxAssigner
cot_classes: 80
loss_cot:
name: COTLoss
cot_lambda: 1
cot_scale: 1
num_classes: 4
metric: COCO
map_type: integral
TrainDataset:
!COCODataSet
image_dir: images
anno_path: annotations/train_shots10.json
dataset_dir: dataset/roadsign_coco
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
EvalDataset:
!COCODataSet
image_dir: images
anno_path: annotations/roadsign_valid.json
dataset_dir: dataset/roadsign_coco
TestDataset:
!ImageFolder
anno_path: annotations/roadsign_valid.json
dataset_dir: dataset/roadsign_coco

View File

@@ -0,0 +1,81 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'./_base_/optimizer_80e.yml',
'./_base_/ppyoloe_plus_crn.yml',
'./_base_/ppyoloe_plus_reader.yml',
]
log_iter: 100
snapshot_epoch: 10
weights: output/ppyoloe_plus_crn_s_80e_contrast_pcb/model_final
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_s_obj365_pretrained.pdparams
depth_mult: 0.33
width_mult: 0.50
epoch: 80
LearningRate:
base_lr: 0.0001
schedulers:
- !CosineDecay
max_epochs: 96
- !LinearWarmup
start_factor: 0.
epochs: 5
YOLOv3:
backbone: CSPResNet
neck: CustomCSPPAN
yolo_head: PPYOLOEContrastHead
post_process: ~
PPYOLOEContrastHead:
fpn_strides: [32, 16, 8]
grid_cell_scale: 5.0
grid_cell_offset: 0.5
static_assigner_epoch: 100
use_varifocal_loss: True
loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5, contrast: 0.2}
static_assigner:
name: ATSSAssigner
topk: 9
assigner:
name: TaskAlignedAssigner
topk: 13
alpha: 1.0
beta: 6.0
contrast_loss:
name: SupContrast
temperature: 100
sample_num: 2048
thresh: 0.75
nms:
name: MultiClassNMS
nms_top_k: 1000
keep_top_k: 300
score_threshold: 0.01
nms_threshold: 0.7
num_classes: 6
metric: COCO
map_type: integral
TrainDataset:
!COCODataSet
image_dir: images
anno_path: pcb_cocoanno/train_shots30.json
dataset_dir: dataset/pcb
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
EvalDataset:
!COCODataSet
image_dir: images
anno_path: pcb_cocoanno/val.json
dataset_dir: dataset/pcb
TestDataset:
!ImageFolder
anno_path: pcb_cocoanno/val.json
dataset_dir: dataset/pcb