更换文档检测模型
This commit is contained in:
@@ -0,0 +1,254 @@
|
||||
use_gpu: true
|
||||
log_iter: 50
|
||||
save_dir: output
|
||||
snapshot_epoch: 1
|
||||
weights: output/petr_resnet50_16x2_coco/model_final
|
||||
epoch: 100
|
||||
num_joints: &num_joints 17
|
||||
pixel_std: &pixel_std 200
|
||||
metric: COCO
|
||||
num_classes: 1
|
||||
trainsize: &trainsize 512
|
||||
flip_perm: &flip_perm [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
|
||||
find_unused_parameters: False
|
||||
|
||||
#####model
|
||||
architecture: PETR
|
||||
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/PETR_pretrained.pdparams
|
||||
|
||||
PETR:
|
||||
backbone:
|
||||
name: ResNet
|
||||
depth: 50
|
||||
variant: b
|
||||
norm_type: bn
|
||||
freeze_norm: True
|
||||
freeze_at: 0
|
||||
return_idx: [1,2,3]
|
||||
num_stages: 4
|
||||
lr_mult_list: [0.1, 0.1, 0.1, 0.1]
|
||||
neck:
|
||||
name: ChannelMapper
|
||||
in_channels: [512, 1024, 2048]
|
||||
kernel_size: 1
|
||||
out_channels: 256
|
||||
norm_type: "gn"
|
||||
norm_groups: 32
|
||||
act: None
|
||||
num_outs: 4
|
||||
bbox_head:
|
||||
name: PETRHead
|
||||
num_query: 300
|
||||
num_classes: 1 # only person
|
||||
in_channels: 2048
|
||||
sync_cls_avg_factor: true
|
||||
with_kpt_refine: true
|
||||
transformer:
|
||||
name: PETRTransformer
|
||||
as_two_stage: true
|
||||
encoder:
|
||||
name: TransformerEncoder
|
||||
encoder_layer:
|
||||
name: TransformerEncoderLayer
|
||||
d_model: 256
|
||||
attn:
|
||||
name: MSDeformableAttention
|
||||
embed_dim: 256
|
||||
num_heads: 8
|
||||
num_levels: 4
|
||||
num_points: 4
|
||||
dim_feedforward: 1024
|
||||
dropout: 0.1
|
||||
num_layers: 6
|
||||
decoder:
|
||||
name: PETR_TransformerDecoder
|
||||
num_layers: 3
|
||||
return_intermediate: true
|
||||
decoder_layer:
|
||||
name: PETR_TransformerDecoderLayer
|
||||
d_model: 256
|
||||
dim_feedforward: 1024
|
||||
dropout: 0.1
|
||||
self_attn:
|
||||
name: MultiHeadAttention
|
||||
embed_dim: 256
|
||||
num_heads: 8
|
||||
dropout: 0.1
|
||||
cross_attn:
|
||||
name: MultiScaleDeformablePoseAttention
|
||||
embed_dims: 256
|
||||
num_heads: 8
|
||||
num_levels: 4
|
||||
num_points: 17
|
||||
hm_encoder:
|
||||
name: TransformerEncoder
|
||||
encoder_layer:
|
||||
name: TransformerEncoderLayer
|
||||
d_model: 256
|
||||
attn:
|
||||
name: MSDeformableAttention
|
||||
embed_dim: 256
|
||||
num_heads: 8
|
||||
num_levels: 1
|
||||
num_points: 4
|
||||
dim_feedforward: 1024
|
||||
dropout: 0.1
|
||||
num_layers: 1
|
||||
refine_decoder:
|
||||
name: PETR_DeformableDetrTransformerDecoder
|
||||
num_layers: 2
|
||||
return_intermediate: true
|
||||
decoder_layer:
|
||||
name: PETR_TransformerDecoderLayer
|
||||
d_model: 256
|
||||
dim_feedforward: 1024
|
||||
dropout: 0.1
|
||||
self_attn:
|
||||
name: MultiHeadAttention
|
||||
embed_dim: 256
|
||||
num_heads: 8
|
||||
dropout: 0.1
|
||||
cross_attn:
|
||||
name: MSDeformableAttention
|
||||
embed_dim: 256
|
||||
num_levels: 4
|
||||
positional_encoding:
|
||||
name: PositionEmbedding
|
||||
num_pos_feats: 128
|
||||
normalize: true
|
||||
offset: -0.5
|
||||
loss_cls:
|
||||
name: Weighted_FocalLoss
|
||||
use_sigmoid: true
|
||||
gamma: 2.0
|
||||
alpha: 0.25
|
||||
loss_weight: 2.0
|
||||
reduction: "mean"
|
||||
loss_kpt:
|
||||
name: L1Loss
|
||||
loss_weight: 70.0
|
||||
loss_kpt_rpn:
|
||||
name: L1Loss
|
||||
loss_weight: 70.0
|
||||
loss_oks:
|
||||
name: OKSLoss
|
||||
loss_weight: 2.0
|
||||
loss_hm:
|
||||
name: CenterFocalLoss
|
||||
loss_weight: 4.0
|
||||
loss_kpt_refine:
|
||||
name: L1Loss
|
||||
loss_weight: 80.0
|
||||
loss_oks_refine:
|
||||
name: OKSLoss
|
||||
loss_weight: 3.0
|
||||
assigner:
|
||||
name: PoseHungarianAssigner
|
||||
cls_cost:
|
||||
name: FocalLossCost
|
||||
weight: 2.0
|
||||
kpt_cost:
|
||||
name: KptL1Cost
|
||||
weight: 70.0
|
||||
oks_cost:
|
||||
name: OksCost
|
||||
weight: 7.0
|
||||
|
||||
#####optimizer
|
||||
LearningRate:
|
||||
base_lr: 0.0002
|
||||
schedulers:
|
||||
- !PiecewiseDecay
|
||||
milestones: [80]
|
||||
gamma: 0.1
|
||||
use_warmup: false
|
||||
# - !LinearWarmup
|
||||
# start_factor: 0.001
|
||||
# steps: 1000
|
||||
|
||||
OptimizerBuilder:
|
||||
clip_grad_by_norm: 0.1
|
||||
optimizer:
|
||||
type: AdamW
|
||||
regularizer:
|
||||
factor: 0.0001
|
||||
type: L2
|
||||
|
||||
|
||||
#####data
|
||||
TrainDataset:
|
||||
!KeypointBottomUpCocoDataset
|
||||
image_dir: train2017
|
||||
anno_path: annotations/person_keypoints_train2017.json
|
||||
dataset_dir: dataset/coco
|
||||
num_joints: *num_joints
|
||||
return_mask: false
|
||||
|
||||
EvalDataset:
|
||||
!KeypointBottomUpCocoDataset
|
||||
image_dir: val2017
|
||||
anno_path: annotations/person_keypoints_val2017.json
|
||||
dataset_dir: dataset/coco
|
||||
num_joints: *num_joints
|
||||
test_mode: true
|
||||
return_mask: false
|
||||
|
||||
TestDataset:
|
||||
!ImageFolder
|
||||
anno_path: dataset/coco/keypoint_imagelist.txt
|
||||
|
||||
worker_num: 2
|
||||
global_mean: &global_mean [0.485, 0.456, 0.406]
|
||||
global_std: &global_std [0.229, 0.224, 0.225]
|
||||
TrainReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- PhotoMetricDistortion:
|
||||
brightness_delta: 32
|
||||
contrast_range: [0.5, 1.5]
|
||||
saturation_range: [0.5, 1.5]
|
||||
hue_delta: 18
|
||||
- KeyPointFlip:
|
||||
flip_prob: 0.5
|
||||
flip_permutation: *flip_perm
|
||||
- RandomAffine:
|
||||
max_degree: 30
|
||||
scale: [1.0, 1.0]
|
||||
max_shift: 0.
|
||||
trainsize: -1
|
||||
- RandomSelect: { transforms1: [ RandomShortSideRangeResize: { scales: [[400, 1400], [1400, 1400]]} ],
|
||||
transforms2: [
|
||||
RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
|
||||
RandomSizeCrop: { min_size: 384, max_size: 600},
|
||||
RandomShortSideRangeResize: { scales: [[400, 1400], [1400, 1400]]} ]}
|
||||
batch_transforms:
|
||||
- NormalizeImage: {mean: *global_mean, std: *global_std, is_scale: True}
|
||||
- PadGT: {pad_img: True, minimum_gtnum: 1}
|
||||
- Permute: {}
|
||||
batch_size: 2
|
||||
shuffle: true
|
||||
drop_last: true
|
||||
use_shared_memory: true
|
||||
collate_batch: true
|
||||
|
||||
EvalReader:
|
||||
sample_transforms:
|
||||
- PETR_Resize: {img_scale: [[800, 1333]], keep_ratio: True}
|
||||
# - MultiscaleTestResize: {origin_target_size: [[800, 1333]], use_flip: false}
|
||||
- NormalizeImage:
|
||||
mean: *global_mean
|
||||
std: *global_std
|
||||
is_scale: true
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
|
||||
TestReader:
|
||||
sample_transforms:
|
||||
- Decode: {}
|
||||
- EvalAffine: {size: 800}
|
||||
- NormalizeImage:
|
||||
mean: *global_mean
|
||||
std: *global_std
|
||||
is_scale: true
|
||||
- Permute: {}
|
||||
batch_size: 1
|
||||
Reference in New Issue
Block a user