移动paddle_detection

2024-09-24 17:02:56 +08:00
parent 90a6d5ec75
commit 3438cf6e0e
2025 changed files with 11 additions and 11 deletions
--- a/services/paddle_services/paddle_detection/configs/queryinst/README.md
+++ b/services/paddle_services/paddle_detection/configs/queryinst/README.md
@@ -0,0 +1,41 @@
+# QueryInst: Instances as Queries
+
+## Introduction
+
+QueryInst is a multi-stage end-to-end system that treats instances of interest as learnable queries, enabling query
+based object detectors, e.g., Sparse R-CNN, to have strong instance segmentation performance. The attributes of
+instances such as categories, bounding boxes, instance masks, and instance association embeddings are represented by
+queries in a unified manner. In QueryInst, a query is shared by both detection and segmentation via dynamic convolutions
+and driven by parallelly-supervised multi-stage learning.
+
+## Model Zoo
+
+|   Backbone   | Lr schd | Proposals | MultiScale | RandomCrop | bbox AP | mask AP | Download                                                                                             | Config                                                   |
+|:------------:|:-------:|:---------:|:----------:|:----------:|:-------:|:-------:|------------------------------------------------------------------------------------------------------|----------------------------------------------------------|
+| ResNet50-FPN |   1x    |    100    |     ×      |     ×      |  42.1   |  37.8   | [model](https://bj.bcebos.com/v1/paddledet/models/queryinst_r50_fpn_1x_pro100_coco.pdparams)         | [config](./queryinst_r50_fpn_1x_pro100_coco.yml)         |
+| ResNet50-FPN |   3x    |    300    |     √      |     √      |  47.9   |  42.1   | [model](https://bj.bcebos.com/v1/paddledet/models/queryinst_r50_fpn_ms_crop_3x_pro300_coco.pdparams) | [config](./queryinst_r50_fpn_ms_crop_3x_pro300_coco.yml) |
+
+- COCO val-set evaluation results.
+- These configurations are for 4-card training.
+
+Please modify these parameters as appropriate:
+
+```yaml
+worker_num: 4
+TrainReader:
+  use_shared_memory: true
+find_unused_parameters: true
+```
+
+## Citations
+
+```
+@InProceedings{Fang_2021_ICCV,
+    author    = {Fang, Yuxin and Yang, Shusheng and Wang, Xinggang and Li, Yu and Fang, Chen and Shan, Ying and Feng, Bin and Liu, Wenyu},
+    title     = {Instances As Queries},
+    booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)},
+    month     = {October},
+    year      = {2021},
+    pages     = {6910-6919}
+}
+```
--- a/services/paddle_services/paddle_detection/configs/queryinst/_base_/optimizer_1x.yml
+++ b/services/paddle_services/paddle_detection/configs/queryinst/_base_/optimizer_1x.yml
@@ -0,0 +1,17 @@
+epoch: 12
+
+LearningRate:
+  base_lr: 0.0001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.001
+    steps: 1000
+
+OptimizerBuilder:
+  clip_grad_by_norm: 0.1
+  optimizer:
+    type: AdamW
+    weight_decay: 0.0001
--- a/services/paddle_services/paddle_detection/configs/queryinst/_base_/queryinst_r50_fpn.yml
+++ b/services/paddle_services/paddle_detection/configs/queryinst/_base_/queryinst_r50_fpn.yml
@@ -0,0 +1,74 @@
+num_proposals: &num_proposals 100
+proposal_embedding_dim: &proposal_embedding_dim 256
+bbox_resolution: &bbox_resolution 7
+mask_resolution: &mask_resolution 14
+
+architecture: QueryInst
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+QueryInst:
+  backbone: ResNet
+  neck: FPN
+  rpn_head: EmbeddingRPNHead
+  roi_head: SparseRoIHead
+  post_process: SparsePostProcess
+
+ResNet:
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [ 0, 1, 2, 3 ]
+  num_stages: 4
+  lr_mult_list: [ 0.1, 0.1, 0.1, 0.1 ]
+
+FPN:
+  out_channel: *proposal_embedding_dim
+  extra_stage: 0
+
+EmbeddingRPNHead:
+  num_proposals: *num_proposals
+
+SparseRoIHead:
+  num_stages: 6
+  bbox_roi_extractor:
+    resolution: *bbox_resolution
+    sampling_ratio: 2
+    aligned: True
+  mask_roi_extractor:
+    resolution: *mask_resolution
+    sampling_ratio: 2
+    aligned: True
+  bbox_head: DIIHead
+  mask_head: DynamicMaskHead
+  loss_func: QueryInstLoss
+
+DIIHead:
+  feedforward_channels: 2048
+  dynamic_feature_channels: 64
+  roi_resolution: *bbox_resolution
+  num_attn_heads: 8
+  dropout: 0.0
+  num_ffn_fcs: 2
+  num_cls_fcs: 1
+  num_reg_fcs: 3
+
+DynamicMaskHead:
+  dynamic_feature_channels: 64
+  roi_resolution: *mask_resolution
+  num_convs: 4
+  conv_kernel_size: 3
+  conv_channels: 256
+  upsample_method: 'deconv'
+  upsample_scale_factor: 2
+
+QueryInstLoss:
+  focal_loss_alpha: 0.25
+  focal_loss_gamma: 2.0
+  class_weight: 2.0
+  l1_weight: 5.0
+  giou_weight: 2.0
+  mask_weight: 8.0
+
+SparsePostProcess:
+  num_proposals: *num_proposals
+  binary_thresh: 0.5
--- a/services/paddle_services/paddle_detection/configs/queryinst/_base_/queryinst_reader.yml
+++ b/services/paddle_services/paddle_detection/configs/queryinst/_base_/queryinst_reader.yml
@@ -0,0 +1,43 @@
+worker_num: 4
+
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - Poly2Mask: {del_poly: True}
+  - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  - Gt2SparseTarget: {}
+  batch_size: 4
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+  use_shared_memory: true
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  - Gt2SparseTarget: {}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  - Gt2SparseTarget: {}
+  batch_size: 1
+  shuffle: false
--- a/services/paddle_services/paddle_detection/configs/queryinst/queryinst_r50_fpn_1x_pro100_coco.yml
+++ b/services/paddle_services/paddle_detection/configs/queryinst/queryinst_r50_fpn_1x_pro100_coco.yml
@@ -0,0 +1,12 @@
+_BASE_: [
+  '../datasets/coco_instance.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/queryinst_r50_fpn.yml',
+  '_base_/queryinst_reader.yml',
+]
+
+log_iter: 50
+find_unused_parameters: true
+
+weights: output/queryinst_r50_fpn_1x_pro100_coco/model_final
--- a/services/paddle_services/paddle_detection/configs/queryinst/queryinst_r50_fpn_ms_crop_3x_pro300_coco.yml
+++ b/services/paddle_services/paddle_detection/configs/queryinst/queryinst_r50_fpn_ms_crop_3x_pro300_coco.yml
@@ -0,0 +1,45 @@
+_BASE_: [
+  './queryinst_r50_fpn_1x_pro100_coco.yml',
+]
+
+weights: output/queryinst_r50_fpn_ms_crop_3x_pro300_coco/model_final
+
+EmbeddingRPNHead:
+  num_proposals: 300
+
+QueryInstPostProcess:
+  num_proposals: 300
+
+epoch: 36
+
+LearningRate:
+  base_lr: 0.0001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [27, 33]
+  - !LinearWarmup
+    start_factor: 0.001
+    steps: 1000
+
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - Poly2Mask: {del_poly: True}
+  - RandomFlip: {prob: 0.5}
+  - RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
+                    transforms2: [
+                        RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ], max_size: 1333 },
+                        RandomSizeCrop: { min_size: 384, max_size: 600, keep_empty: true },
+                        RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
+  }
+  - NormalizeImage: { is_scale: true, mean: [ 0.485,0.456,0.406 ], std: [ 0.229, 0.224,0.225 ] }
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  - Gt2SparseTarget: {}
+  batch_size: 4
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+  use_shared_memory: true