更换文档检测模型

2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions
--- a/paddle_detection/docs/tutorials/KeyPointConfigGuide_en.md
+++ b/paddle_detection/docs/tutorials/KeyPointConfigGuide_en.md
@@ -0,0 +1,299 @@
+**# config yaml guide**
+
+KeyPoint config guide，Take an example of [tinypose_256x192.yml](../../configs/keypoint/tiny_pose/tinypose_256x192.yml)
+
+```yaml
+use_gpu: true                                                                                  #train with gpu or not
+
+log_iter: 5                                                                                    #print log every 5 iter
+
+save_dir: output                                                                               #the directory to save model
+
+snapshot_epoch: 10                                                                             #save model every 10 epochs
+
+weights: output/tinypose_256x192/model_final                                                   #the weight to load(without postfix “.pdparams”）
+
+epoch: 420                                                                                     #the total epoch number to train
+
+num_joints: &num_joints 17                                                                     #number of joints
+
+pixel_std: &pixel_std 200                                                                      #the standard pixel length（don't care）
+
+metric: KeyPointTopDownCOCOEval                                                                #metric function
+
+num_classes: 1                                                                                 #number of classes（just for object detection, don't care）
+
+train_height: &train_height 256                                                                #the height of model input
+
+train_width: &train_width 192                                                                  #the width of model input
+
+trainsize: &trainsize [*train_width, *train_height]                                            #the shape of model input
+
+hmsize: &hmsize [48, 64]                                                                       #the shape of model output
+
+flip_perm: &flip_perm [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]  #the correspondence between left and right keypoint id, for example: left wrist become right wrist after image flip, and also the right wrist becomes left wrist
+
+
+
+
+
+\#####model
+
+architecture: TopDownHRNet                                                                     #the model architecture
+
+
+
+TopDownHRNet:                                                                                  #TopDownHRNet configs
+
+  backbone: LiteHRNet                                                                          #which backbone to use
+
+  post_process: HRNetPostProcess                                                               #the post_process to use
+
+  flip_perm: *flip_perm                                                                        #same to the upper "flip_perm"
+
+  num_joints: *num_joints                                                                      #the joint number（the number of output channels）
+
+  width: &width 40                                                                             #backbone output channels
+
+  loss: KeyPointMSELoss                                                                        #loss funciton
+
+  use_dark: true                                                                               #whther to use DarkPose in postprocess
+
+
+
+LiteHRNet:                                                                                     #LiteHRNet configs
+
+  network_type: wider_naive                                                                    #the network type of backbone
+
+  freeze_at: -1                                                                                #the branch match this id doesn't backward，-1 means all branch backward
+
+  freeze_norm: false                                                                           #whether to freeze normalize weights
+
+  return_idx: [0]                                                                              #the branch id to fetch features
+
+
+
+KeyPointMSELoss:                                                                               #Loss configs
+
+  use_target_weight: true                                                                      #whether to use target weights
+
+  loss_scale: 1.0                                                                              #loss weights，finalloss = loss*loss_scale
+
+
+
+\#####optimizer
+
+LearningRate:                                                                                  #LearningRate configs
+
+  base_lr: 0.002                                                                               #the original base learning rate
+
+  schedulers:
+
+  \- !PiecewiseDecay                                                                           #the scheduler to adjust learning rate
+
+    milestones: [380, 410]                                                                     #the milestones(epochs) to adjust learning rate
+
+    gamma: 0.1                                                                                 #the ratio to adjust learning rate, new_lr = lr*gamma
+
+  \- !LinearWarmup                                                                             #Warmup configs
+
+    start_factor: 0.001                                                                        #the original ratio with respect to base_lr
+
+    steps: 500                                                                                 #iters used to warmup
+
+
+
+OptimizerBuilder:                                                                              #Optimizer type configs
+
+  optimizer:
+
+    type: Adam                                                                                 #optimizer type: Adam
+
+  regularizer:
+
+    factor: 0.0                                                                                #the regularizer weight
+
+    type: L2                                                                                   #regularizer type: L2/L1
+
+
+
+
+
+\#####data
+
+TrainDataset:                                                                                  #Train Dataset configs
+
+  !KeypointTopDownCocoDataset                                                                  #the dataset class to load data
+
+    image_dir: ""                                                                              #the image directory, relative to dataset_dir
+
+    anno_path: aic_coco_train_cocoformat.json                                                  #the train datalist，coco format, relative to dataset_dir
+
+    dataset_dir: dataset                                                                       #the dataset directory, the image_dir and anno_path based on this directory
+
+    num_joints: *num_joints                                                                    #joint numbers
+
+    trainsize: *trainsize                                                                      #the input size of model
+
+    pixel_std: *pixel_std                                                                      #same to the upper "pixel_std"
+
+    use_gt_bbox: True                                                                          #whether to use gt bbox, commonly used in eval
+
+
+
+
+
+EvalDataset:                                                                                   #Eval Dataset configs
+
+  !KeypointTopDownCocoDataset                                                                  #the dataset class to load data
+
+    image_dir: val2017                                                                         #the image directory, relative to dataset_dir
+
+    anno_path: annotations/person_keypoints_val2017.json                                       #the eval datalist，coco format, relative to dataset_dir
+
+    dataset_dir: dataset/coco                                                                  #the dataset directory, the image_dir and anno_path based on this directory
+
+    num_joints: *num_joints                                                                    #joint numbers
+
+    trainsize: *trainsize                                                                      #the input size of model
+
+    pixel_std: *pixel_std                                                                      #same to the upper "pixel_std"
+
+    use_gt_bbox: True                                                                          #whether to use gt bbox, commonly used in eval
+
+    image_thre: 0.5                                                                            #the threshold of detected rect, used while use_gt_bbox is False
+
+
+
+TestDataset:                                                                                   #the test dataset without label
+
+  !ImageFolder                                                                                 #the class to load data, find images by folder
+
+    anno_path: dataset/coco/keypoint_imagelist.txt                                             #the image list file
+
+
+
+worker_num: 2                                                                                  #the workers to load Dataset
+
+global_mean: &global_mean [0.485, 0.456, 0.406]                                                #means used to normalize image
+
+global_std: &global_std [0.229, 0.224, 0.225]                                                  #stds used to normalize image
+
+TrainReader:                                                                                   #TrainReader configs
+
+  sample_transforms:                                                                           #transform configs
+
+    \- RandomFlipHalfBodyTransform:                                                            #random flip & random HalfBodyTransform
+
+        scale: 0.25                                                                            #the maximum scale for size transform
+
+        rot: 30                                                                                #the maximum rotation to transoform
+
+        num_joints_half_body: 8                                                                #the HalfBodyTransform is skiped while joints found is less than this number
+
+        prob_half_body: 0.3                                                                    #the ratio of halfbody transform
+
+        pixel_std: *pixel_std                                                                  #same to upper "pixel_std"
+
+        trainsize: *trainsize                                                                  #the input size of model
+
+        upper_body_ids: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]                                     #the joint id which is  belong to upper body
+
+        flip_pairs: *flip_perm                                                                 #same to the upper "flip_perm"
+
+    \- AugmentationbyInformantionDropping:
+
+        prob_cutout: 0.5                                                                       #the probability to cutout keypoint
+
+        offset_factor: 0.05                                                                    #the jitter offset of cutout position, expressed as a percentage of trainwidth
+
+        num_patch: 1                                                                           #the numbers of area to cutout
+
+        trainsize: *trainsize                                                                  #same to upper "trainsize"
+
+    \- TopDownAffine:
+
+        trainsize: *trainsize                                                                  #same to upper "trainsize"
+
+        use_udp: true                                                                          #whether to use udp_unbias（just for flip eval）
+
+    \- ToHeatmapsTopDown_DARK:                                                                 #generate gt heatmaps
+
+        hmsize: *hmsize                                                                        #the size of output heatmaps
+
+        sigma: 2                                                                               #the sigma of gaussin kernel which used to generate gt heatmaps
+
+  batch_transforms:
+
+    \- NormalizeImage:                                                                         #image normalize class
+
+        mean: *global_mean                                                                     #mean of normalize
+
+        std: *global_std                                                                       #std of normalize
+
+        is_scale: true                                                                         #whether scale by 1/255 to every image pixels，transform pixel from [0,255] to [0,1]
+
+    \- Permute: {}                                                                             #channel transform from HWC to CHW
+
+  batch_size: 128                                                                              #batchsize used for train
+
+  shuffle: true                                                                                #whether to shuffle the images before train
+
+  drop_last: false                                                                             #whether drop the last images which is not enogh for batchsize
+
+
+
+EvalReader:
+
+  sample_transforms:                                                                           #transform configs
+
+    \- TopDownAffine:                                                                          #Affine configs
+
+        trainsize: *trainsize                                                                  #same to upper "trainsize"
+
+        use_udp: true                                                                          #whether to use udp_unbias（just for flip eval）
+
+  batch_transforms:
+
+    \- NormalizeImage:                                                                         #image normalize, the values should be same to values in TrainReader
+
+        mean: *global_mean
+
+        std: *global_std
+
+        is_scale: true
+
+    \- Permute: {}                                                                             #channel transform from HWC to CHW
+
+  batch_size: 16                                                                               #batchsize used for test
+
+
+
+TestReader:
+
+  inputs_def:
+
+    image_shape: [3, *train_height, *train_width]                                              #the input dimensions used in model，CHW
+
+  sample_transforms:
+
+    \- Decode: {}                                                                              #load image
+
+    \- TopDownEvalAffine:                                                                      #Affine class used in Eval
+
+        trainsize: *trainsize                                                                  #the input size of model
+
+    \- NormalizeImage:                                                                         #image normalize, the values should be same to values in TrainReader
+
+        mean: *global_mean                                                                     #mean of normalize
+
+        std: *global_std                                                                       #std of normalize
+
+        is_scale: true                                                                         #whether scale by 1/255 to every image pixels，transform pixel from [0,255] to [0,1]
+
+    \- Permute: {}                                                                             #channel transform from HWC to CHW
+
+  batch_size: 1                                                                                #Test batchsize
+
+  fuse_normalize: false                                                                        #whether fuse the normalize into model while export model, this speedup the model infer
+```