移动paddle_detection
This commit is contained in:
@@ -0,0 +1,41 @@
|
||||
# QueryInst: Instances as Queries
|
||||
|
||||
## Introduction
|
||||
|
||||
QueryInst is a multi-stage end-to-end system that treats instances of interest as learnable queries, enabling query
|
||||
based object detectors, e.g., Sparse R-CNN, to have strong instance segmentation performance. The attributes of
|
||||
instances such as categories, bounding boxes, instance masks, and instance association embeddings are represented by
|
||||
queries in a unified manner. In QueryInst, a query is shared by both detection and segmentation via dynamic convolutions
|
||||
and driven by parallelly-supervised multi-stage learning.
|
||||
|
||||
## Model Zoo
|
||||
|
||||
| Backbone | Lr schd | Proposals | MultiScale | RandomCrop | bbox AP | mask AP | Download | Config |
|
||||
|:------------:|:-------:|:---------:|:----------:|:----------:|:-------:|:-------:|------------------------------------------------------------------------------------------------------|----------------------------------------------------------|
|
||||
| ResNet50-FPN | 1x | 100 | × | × | 42.1 | 37.8 | [model](https://bj.bcebos.com/v1/paddledet/models/queryinst_r50_fpn_1x_pro100_coco.pdparams) | [config](./queryinst_r50_fpn_1x_pro100_coco.yml) |
|
||||
| ResNet50-FPN | 3x | 300 | √ | √ | 47.9 | 42.1 | [model](https://bj.bcebos.com/v1/paddledet/models/queryinst_r50_fpn_ms_crop_3x_pro300_coco.pdparams) | [config](./queryinst_r50_fpn_ms_crop_3x_pro300_coco.yml) |
|
||||
|
||||
- COCO val-set evaluation results.
|
||||
- These configurations are for 4-card training.
|
||||
|
||||
Please modify these parameters as appropriate:
|
||||
|
||||
```yaml
|
||||
worker_num: 4
|
||||
TrainReader:
|
||||
use_shared_memory: true
|
||||
find_unused_parameters: true
|
||||
```
|
||||
|
||||
## Citations
|
||||
|
||||
```
|
||||
@InProceedings{Fang_2021_ICCV,
|
||||
author = {Fang, Yuxin and Yang, Shusheng and Wang, Xinggang and Li, Yu and Fang, Chen and Shan, Ying and Feng, Bin and Liu, Wenyu},
|
||||
title = {Instances As Queries},
|
||||
booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)},
|
||||
month = {October},
|
||||
year = {2021},
|
||||
pages = {6910-6919}
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,17 @@
|
||||
epoch: 12
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.0001
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [8, 11]
|
||||
- !LinearWarmup
|
||||
start_factor: 0.001
|
||||
steps: 1000
|
||||
|
||||
OptimizerBuilder:
|
||||
clip_grad_by_norm: 0.1
|
||||
optimizer:
|
||||
type: AdamW
|
||||
weight_decay: 0.0001
|
||||
@@ -0,0 +1,74 @@
|
||||
num_proposals: &num_proposals 100
|
||||
proposal_embedding_dim: &proposal_embedding_dim 256
|
||||
bbox_resolution: &bbox_resolution 7
|
||||
mask_resolution: &mask_resolution 14
|
||||
|
||||
architecture: QueryInst
|
||||
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
|
||||
|
||||
QueryInst:
|
||||
backbone: ResNet
|
||||
neck: FPN
|
||||
rpn_head: EmbeddingRPNHead
|
||||
roi_head: SparseRoIHead
|
||||
post_process: SparsePostProcess
|
||||
|
||||
ResNet:
|
||||
depth: 50
|
||||
norm_type: bn
|
||||
freeze_at: 0
|
||||
return_idx: [ 0, 1, 2, 3 ]
|
||||
num_stages: 4
|
||||
lr_mult_list: [ 0.1, 0.1, 0.1, 0.1 ]
|
||||
|
||||
FPN:
|
||||
out_channel: *proposal_embedding_dim
|
||||
extra_stage: 0
|
||||
|
||||
EmbeddingRPNHead:
|
||||
num_proposals: *num_proposals
|
||||
|
||||
SparseRoIHead:
|
||||
num_stages: 6
|
||||
bbox_roi_extractor:
|
||||
resolution: *bbox_resolution
|
||||
sampling_ratio: 2
|
||||
aligned: True
|
||||
mask_roi_extractor:
|
||||
resolution: *mask_resolution
|
||||
sampling_ratio: 2
|
||||
aligned: True
|
||||
bbox_head: DIIHead
|
||||
mask_head: DynamicMaskHead
|
||||
loss_func: QueryInstLoss
|
||||
|
||||
DIIHead:
|
||||
feedforward_channels: 2048
|
||||
dynamic_feature_channels: 64
|
||||
roi_resolution: *bbox_resolution
|
||||
num_attn_heads: 8
|
||||
dropout: 0.0
|
||||
num_ffn_fcs: 2
|
||||
num_cls_fcs: 1
|
||||
num_reg_fcs: 3
|
||||
|
||||
DynamicMaskHead:
|
||||
dynamic_feature_channels: 64
|
||||
roi_resolution: *mask_resolution
|
||||
num_convs: 4
|
||||
conv_kernel_size: 3
|
||||
conv_channels: 256
|
||||
upsample_method: 'deconv'
|
||||
upsample_scale_factor: 2
|
||||
|
||||
QueryInstLoss:
|
||||
focal_loss_alpha: 0.25
|
||||
focal_loss_gamma: 2.0
|
||||
class_weight: 2.0
|
||||
l1_weight: 5.0
|
||||
giou_weight: 2.0
|
||||
mask_weight: 8.0
|
||||
|
||||
SparsePostProcess:
|
||||
num_proposals: *num_proposals
|
||||
binary_thresh: 0.5
|
||||
@@ -0,0 +1,43 @@
|
||||
worker_num: 4
|
||||
|
||||
TrainReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Poly2Mask: {del_poly: True}
|
||||
- Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
|
||||
- RandomFlip: {prob: 0.5}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
- Gt2SparseTarget: {}
|
||||
batch_size: 4
|
||||
shuffle: true
|
||||
drop_last: true
|
||||
collate_batch: false
|
||||
use_shared_memory: true
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
- Gt2SparseTarget: {}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
drop_last: false
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
|
||||
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
- Gt2SparseTarget: {}
|
||||
batch_size: 1
|
||||
shuffle: false
|
||||
@@ -0,0 +1,12 @@
|
||||
_BASE_: [
|
||||
'../datasets/coco_instance.yml',
|
||||
'../runtime.yml',
|
||||
'_base_/optimizer_1x.yml',
|
||||
'_base_/queryinst_r50_fpn.yml',
|
||||
'_base_/queryinst_reader.yml',
|
||||
]
|
||||
|
||||
log_iter: 50
|
||||
find_unused_parameters: true
|
||||
|
||||
weights: output/queryinst_r50_fpn_1x_pro100_coco/model_final
|
||||
@@ -0,0 +1,45 @@
|
||||
_BASE_: [
|
||||
'./queryinst_r50_fpn_1x_pro100_coco.yml',
|
||||
]
|
||||
|
||||
weights: output/queryinst_r50_fpn_ms_crop_3x_pro300_coco/model_final
|
||||
|
||||
EmbeddingRPNHead:
|
||||
num_proposals: 300
|
||||
|
||||
QueryInstPostProcess:
|
||||
num_proposals: 300
|
||||
|
||||
epoch: 36
|
||||
|
||||
LearningRate:
|
||||
base_lr: 0.0001
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
gamma: 0.1
|
||||
milestones: [27, 33]
|
||||
- !LinearWarmup
|
||||
start_factor: 0.001
|
||||
steps: 1000
|
||||
|
||||
TrainReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- Poly2Mask: {del_poly: True}
|
||||
- RandomFlip: {prob: 0.5}
|
||||
- RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
|
||||
transforms2: [
|
||||
RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ], max_size: 1333 },
|
||||
RandomSizeCrop: { min_size: 384, max_size: 600, keep_empty: true },
|
||||
RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
|
||||
}
|
||||
- NormalizeImage: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] }
|
||||
- Permute: {}
|
||||
batch_transforms:
|
||||
- PadBatch: {pad_to_stride: 32}
|
||||
- Gt2SparseTarget: {}
|
||||
batch_size: 4
|
||||
shuffle: true
|
||||
drop_last: true
|
||||
collate_batch: false
|
||||
use_shared_memory: true
|
||||
Reference in New Issue
Block a user