更换文档检测模型

2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions
--- a/paddle_detection/configs/rotate/tools/prepare_data.py
+++ b/paddle_detection/configs/rotate/tools/prepare_data.py
@@ -0,0 +1,128 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import argparse
+from convert import load_dota_infos, data_to_coco
+from slicebase import SliceBase
+
+wordname_15 = [
+    'plane', 'baseball-diamond', 'bridge', 'ground-track-field',
+    'small-vehicle', 'large-vehicle', 'ship', 'tennis-court',
+    'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout',
+    'harbor', 'swimming-pool', 'helicopter'
+]
+
+wordname_16 = wordname_15 + ['container-crane']
+
+wordname_18 = wordname_16 + ['airport', 'helipad']
+
+DATA_CLASSES = {
+    'dota10': wordname_15,
+    'dota15': wordname_16,
+    'dota20': wordname_18
+}
+
+
+def parse_args():
+    parser = argparse.ArgumentParser('prepare data for training')
+
+    parser.add_argument(
+        '--input_dirs',
+        nargs='+',
+        type=str,
+        default=None,
+        help='input dirs which contain image and labelTxt dir')
+
+    parser.add_argument(
+        '--output_dir',
+        type=str,
+        default=None,
+        help='output dirs which contain image and labelTxt dir and coco style json file'
+    )
+
+    parser.add_argument(
+        '--coco_json_file',
+        type=str,
+        default='',
+        help='coco json annotation files')
+
+    parser.add_argument('--subsize', type=int, default=1024, help='patch size')
+
+    parser.add_argument('--gap', type=int, default=200, help='step size')
+
+    parser.add_argument(
+        '--data_type', type=str, default='dota10', help='data type')
+
+    parser.add_argument(
+        '--rates',
+        nargs='+',
+        type=float,
+        default=[1.],
+        help='scales for multi-slice training')
+
+    parser.add_argument(
+        '--nproc', type=int, default=8, help='the processor number')
+
+    parser.add_argument(
+        '--iof_thr',
+        type=float,
+        default=0.5,
+        help='the minimal iof between a object and a window')
+
+    parser.add_argument(
+        '--image_only',
+        action='store_true',
+        default=False,
+        help='only processing image')
+
+    args = parser.parse_args()
+    return args
+
+
+def load_dataset(input_dir, nproc, data_type):
+    if 'dota' in data_type.lower():
+        infos = load_dota_infos(input_dir, nproc)
+    else:
+        raise ValueError('only dota dataset is supported now')
+
+    return infos
+
+
+def main():
+    args = parse_args()
+    infos = []
+    for input_dir in args.input_dirs:
+        infos += load_dataset(input_dir, args.nproc, args.data_type)
+
+    slicer = SliceBase(
+        args.gap,
+        args.subsize,
+        args.iof_thr,
+        num_process=args.nproc,
+        image_only=args.image_only)
+    slicer.slice_data(infos, args.rates, args.output_dir)
+    if args.coco_json_file:
+        infos = load_dota_infos(args.output_dir, args.nproc)
+        coco_json_file = os.path.join(args.output_dir, args.coco_json_file)
+        class_names = DATA_CLASSES[args.data_type]
+        data_to_coco(infos, coco_json_file, class_names, args.nproc)
+
+
+if __name__ == '__main__':
+    main()