更换文档检测模型

2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions
--- a/paddle_detection/configs/rotate/tools/convert.py
+++ b/paddle_detection/configs/rotate/tools/convert.py
@@ -0,0 +1,163 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Reference: https://github.com/CAPTAIN-WHU/DOTA_devkit
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import json
+import cv2
+from tqdm import tqdm
+from multiprocessing import Pool
+
+
+def load_dota_info(image_dir, anno_dir, file_name, ext=None):
+    base_name, extension = os.path.splitext(file_name)
+    if ext and (extension != ext and extension not in ext):
+        return None
+    info = {'image_file': os.path.join(image_dir, file_name), 'annotation': []}
+    anno_file = os.path.join(anno_dir, base_name + '.txt')
+    if not os.path.exists(anno_file):
+        return info
+    with open(anno_file, 'r') as f:
+        for line in f:
+            items = line.strip().split()
+            if (len(items) < 9):
+                continue
+
+            anno = {
+                'poly': list(map(float, items[:8])),
+                'name': items[8],
+                'difficult': '0' if len(items) == 9 else items[9],
+            }
+            info['annotation'].append(anno)
+
+    return info
+
+
+def load_dota_infos(root_dir, num_process=8, ext=None):
+    image_dir = os.path.join(root_dir, 'images')
+    anno_dir = os.path.join(root_dir, 'labelTxt')
+    data_infos = []
+    if num_process > 1:
+        pool = Pool(num_process)
+        results = []
+        for file_name in os.listdir(image_dir):
+            results.append(
+                pool.apply_async(load_dota_info, (image_dir, anno_dir,
+                                                  file_name, ext)))
+
+        pool.close()
+        pool.join()
+
+        for result in results:
+            info = result.get()
+            if info:
+                data_infos.append(info)
+
+    else:
+        for file_name in os.listdir(image_dir):
+            info = load_dota_info(image_dir, anno_dir, file_name, ext)
+            if info:
+                data_infos.append(info)
+
+    return data_infos
+
+
+def process_single_sample(info, image_id, class_names):
+    image_file = info['image_file']
+    single_image = dict()
+    single_image['file_name'] = os.path.split(image_file)[-1]
+    single_image['id'] = image_id
+    image = cv2.imread(image_file)
+    height, width, _ = image.shape
+    single_image['width'] = width
+    single_image['height'] = height
+
+    # process annotation field
+    single_objs = []
+    objects = info['annotation']
+    for obj in objects:
+        poly, name, difficult = obj['poly'], obj['name'], obj['difficult']
+        if difficult == '2':
+            continue
+
+        single_obj = dict()
+        single_obj['category_id'] = class_names.index(name) + 1
+        single_obj['segmentation'] = [poly]
+        single_obj['iscrowd'] = 0
+        xmin, ymin, xmax, ymax = min(poly[0::2]), min(poly[1::2]), max(poly[
+            0::2]), max(poly[1::2])
+        width, height = xmax - xmin, ymax - ymin
+        single_obj['bbox'] = [xmin, ymin, width, height]
+        single_obj['area'] = height * width
+        single_obj['image_id'] = image_id
+        single_objs.append(single_obj)
+
+    return (single_image, single_objs)
+
+
+def data_to_coco(infos, output_path, class_names, num_process):
+    data_dict = dict()
+    data_dict['categories'] = []
+
+    for i, name in enumerate(class_names):
+        data_dict['categories'].append({
+            'id': i + 1,
+            'name': name,
+            'supercategory': name
+        })
+
+    pbar = tqdm(total=len(infos), desc='data to coco')
+    images, annotations = [], []
+    if num_process > 1:
+        pool = Pool(num_process)
+        results = []
+        for i, info in enumerate(infos):
+            image_id = i + 1
+            results.append(
+                pool.apply_async(
+                    process_single_sample, (info, image_id, class_names),
+                    callback=lambda x: pbar.update()))
+
+        pool.close()
+        pool.join()
+
+        for result in results:
+            single_image, single_anno = result.get()
+            images.append(single_image)
+            annotations += single_anno
+
+    else:
+        for i, info in enumerate(infos):
+            image_id = i + 1
+            single_image, single_anno = process_single_sample(info, image_id,
+                                                              class_names)
+            images.append(single_image)
+            annotations += single_anno
+            pbar.update()
+
+    pbar.close()
+
+    for i, anno in enumerate(annotations):
+        anno['id'] = i + 1
+
+    data_dict['images'] = images
+    data_dict['annotations'] = annotations
+
+    with open(output_path, 'w') as f:
+        json.dump(data_dict, f)
--- a/paddle_detection/configs/rotate/tools/generate_result.py
+++ b/paddle_detection/configs/rotate/tools/generate_result.py
@@ -0,0 +1,266 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+import glob
+
+import numpy as np
+from multiprocessing import Pool
+from functools import partial
+from shapely.geometry import Polygon
+import argparse
+
+wordname_15 = [
+    'plane', 'baseball-diamond', 'bridge', 'ground-track-field',
+    'small-vehicle', 'large-vehicle', 'ship', 'tennis-court',
+    'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout',
+    'harbor', 'swimming-pool', 'helicopter'
+]
+
+wordname_16 = wordname_15 + ['container-crane']
+
+wordname_18 = wordname_16 + ['airport', 'helipad']
+
+DATA_CLASSES = {
+    'dota10': wordname_15,
+    'dota15': wordname_16,
+    'dota20': wordname_18
+}
+
+
+def rbox_iou(g, p):
+    """
+    iou of rbox
+    """
+    g = np.array(g)
+    p = np.array(p)
+    g = Polygon(g[:8].reshape((4, 2)))
+    p = Polygon(p[:8].reshape((4, 2)))
+    g = g.buffer(0)
+    p = p.buffer(0)
+    if not g.is_valid or not p.is_valid:
+        return 0
+    inter = Polygon(g).intersection(Polygon(p)).area
+    union = g.area + p.area - inter
+    if union == 0:
+        return 0
+    else:
+        return inter / union
+
+
+def py_cpu_nms_poly_fast(dets, thresh):
+    """
+    Args:
+        dets: pred results
+        thresh: nms threshold
+
+    Returns: index of keep
+    """
+    obbs = dets[:, 0:-1]
+    x1 = np.min(obbs[:, 0::2], axis=1)
+    y1 = np.min(obbs[:, 1::2], axis=1)
+    x2 = np.max(obbs[:, 0::2], axis=1)
+    y2 = np.max(obbs[:, 1::2], axis=1)
+    scores = dets[:, 8]
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+
+    polys = []
+    for i in range(len(dets)):
+        tm_polygon = [
+            dets[i][0], dets[i][1], dets[i][2], dets[i][3], dets[i][4],
+            dets[i][5], dets[i][6], dets[i][7]
+        ]
+        polys.append(tm_polygon)
+    polys = np.array(polys)
+    order = scores.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        ovr = []
+        i = order[0]
+        keep.append(i)
+
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+        w = np.maximum(0.0, xx2 - xx1)
+        h = np.maximum(0.0, yy2 - yy1)
+        hbb_inter = w * h
+        hbb_ovr = hbb_inter / (areas[i] + areas[order[1:]] - hbb_inter)
+        h_inds = np.where(hbb_ovr > 0)[0]
+        tmp_order = order[h_inds + 1]
+        for j in range(tmp_order.size):
+            iou = rbox_iou(polys[i], polys[tmp_order[j]])
+            hbb_ovr[h_inds[j]] = iou
+
+        try:
+            if math.isnan(ovr[0]):
+                pdb.set_trace()
+        except:
+            pass
+        inds = np.where(hbb_ovr <= thresh)[0]
+
+        order = order[inds + 1]
+    return keep
+
+
+def poly2origpoly(poly, x, y, rate):
+    origpoly = []
+    for i in range(int(len(poly) / 2)):
+        tmp_x = float(poly[i * 2] + x) / float(rate)
+        tmp_y = float(poly[i * 2 + 1] + y) / float(rate)
+        origpoly.append(tmp_x)
+        origpoly.append(tmp_y)
+    return origpoly
+
+
+def nmsbynamedict(nameboxdict, nms, thresh):
+    """
+    Args:
+        nameboxdict: nameboxdict
+        nms:   nms
+        thresh: nms threshold
+
+    Returns: nms result as dict
+    """
+    nameboxnmsdict = {x: [] for x in nameboxdict}
+    for imgname in nameboxdict:
+        keep = nms(np.array(nameboxdict[imgname]), thresh)
+        outdets = []
+        for index in keep:
+            outdets.append(nameboxdict[imgname][index])
+        nameboxnmsdict[imgname] = outdets
+    return nameboxnmsdict
+
+
+def merge_single(output_dir, nms, nms_thresh, pred_class_lst):
+    """
+    Args:
+        output_dir: output_dir
+        nms:  nms
+        pred_class_lst: pred_class_lst
+        class_name: class_name
+
+    Returns:
+
+    """
+    class_name, pred_bbox_list = pred_class_lst
+    nameboxdict = {}
+    for line in pred_bbox_list:
+        splitline = line.split(' ')
+        subname = splitline[0]
+        splitname = subname.split('__')
+        oriname = splitname[0]
+        pattern1 = re.compile(r'__\d+___\d+')
+        x_y = re.findall(pattern1, subname)
+        x_y_2 = re.findall(r'\d+', x_y[0])
+        x, y = int(x_y_2[0]), int(x_y_2[1])
+
+        pattern2 = re.compile(r'__([\d+\.]+)__\d+___')
+
+        rate = re.findall(pattern2, subname)[0]
+
+        confidence = splitline[1]
+        poly = list(map(float, splitline[2:]))
+        origpoly = poly2origpoly(poly, x, y, rate)
+        det = origpoly
+        det.append(confidence)
+        det = list(map(float, det))
+        if (oriname not in nameboxdict):
+            nameboxdict[oriname] = []
+        nameboxdict[oriname].append(det)
+    nameboxnmsdict = nmsbynamedict(nameboxdict, nms, nms_thresh)
+
+    # write result
+    dstname = os.path.join(output_dir, class_name + '.txt')
+    with open(dstname, 'w') as f_out:
+        for imgname in nameboxnmsdict:
+            for det in nameboxnmsdict[imgname]:
+                confidence = det[-1]
+                bbox = det[0:-1]
+                outline = imgname + ' ' + str(confidence) + ' ' + ' '.join(
+                    map(str, bbox))
+                f_out.write(outline + '\n')
+
+
+def generate_result(pred_txt_dir,
+                    output_dir='output',
+                    class_names=wordname_15,
+                    nms_thresh=0.1):
+    """
+    pred_txt_dir: dir of pred txt
+    output_dir: dir of output
+    class_names: class names of data
+    """
+    pred_txt_list = glob.glob("{}/*.txt".format(pred_txt_dir))
+
+    # step1: summary pred bbox
+    pred_classes = {}
+    for class_name in class_names:
+        pred_classes[class_name] = []
+
+    for current_txt in pred_txt_list:
+        img_id = os.path.split(current_txt)[1]
+        img_id = img_id.split('.txt')[0]
+        with open(current_txt) as f:
+            res = f.readlines()
+            for item in res:
+                item = item.split(' ')
+                pred_class = item[0]
+                item[0] = img_id
+                pred_bbox = ' '.join(item)
+                pred_classes[pred_class].append(pred_bbox)
+
+    pred_classes_lst = []
+    for class_name in pred_classes.keys():
+        print('class_name: {}, count: {}'.format(class_name,
+                                                 len(pred_classes[class_name])))
+        pred_classes_lst.append((class_name, pred_classes[class_name]))
+
+    # step2: merge
+    pool = Pool(len(class_names))
+    nms = py_cpu_nms_poly_fast
+    mergesingle_fn = partial(merge_single, output_dir, nms, nms_thresh)
+    pool.map(mergesingle_fn, pred_classes_lst)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='generate test results')
+    parser.add_argument('--pred_txt_dir', type=str, help='path of pred txt dir')
+    parser.add_argument(
+        '--output_dir', type=str, default='output', help='path of output dir')
+    parser.add_argument(
+        '--data_type', type=str, default='dota10', help='data type')
+    parser.add_argument(
+        '--nms_thresh',
+        type=float,
+        default=0.1,
+        help='nms threshold while merging results')
+
+    return parser.parse_args()
+
+
+if __name__ == '__main__':
+    args = parse_args()
+
+    output_dir = args.output_dir
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    class_names = DATA_CLASSES[args.data_type]
+
+    generate_result(args.pred_txt_dir, output_dir, class_names)
+    print('done!')
--- a/paddle_detection/configs/rotate/tools/inference_benchmark.py
+++ b/paddle_detection/configs/rotate/tools/inference_benchmark.py
@@ -0,0 +1,378 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import six
+import glob
+import time
+import yaml
+import argparse
+import cv2
+import numpy as np
+
+import paddle
+import paddle.version as paddle_version
+from paddle.inference import Config, create_predictor, PrecisionType, get_trt_runtime_version
+
+TUNED_TRT_DYNAMIC_MODELS = {'DETR'}
+
+
+def check_version(version='2.2'):
+    err = "PaddlePaddle version {} or higher is required, " \
+          "or a suitable develop version is satisfied as well. \n" \
+          "Please make sure the version is good with your code.".format(version)
+
+    version_installed = [
+        paddle_version.major, paddle_version.minor, paddle_version.patch,
+        paddle_version.rc
+    ]
+
+    if version_installed == ['0', '0', '0', '0']:
+        return
+
+    if version == 'develop':
+        raise Exception("PaddlePaddle develop version is required!")
+
+    version_split = version.split('.')
+
+    length = min(len(version_installed), len(version_split))
+    for i in six.moves.range(length):
+        if version_installed[i] > version_split[i]:
+            return
+        if version_installed[i] < version_split[i]:
+            raise Exception(err)
+
+
+def check_trt_version(version='8.2'):
+    err = "TensorRT version {} or higher is required," \
+          "Please make sure the version is good with your code.".format(version)
+    version_split = list(map(int, version.split('.')))
+    version_installed = get_trt_runtime_version()
+    length = min(len(version_installed), len(version_split))
+    for i in six.moves.range(length):
+        if version_installed[i] > version_split[i]:
+            return
+        if version_installed[i] < version_split[i]:
+            raise Exception(err)
+
+
+# preprocess ops
+def decode_image(im_file, im_info):
+    if isinstance(im_file, str):
+        with open(im_file, 'rb') as f:
+            im_read = f.read()
+        data = np.frombuffer(im_read, dtype='uint8')
+        im = cv2.imdecode(data, 1)  # BGR mode, but need RGB mode
+        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+    else:
+        im = im_file
+    im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
+    im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32)
+    return im, im_info
+
+
+class Resize(object):
+    def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
+        if isinstance(target_size, int):
+            target_size = [target_size, target_size]
+        self.target_size = target_size
+        self.keep_ratio = keep_ratio
+        self.interp = interp
+
+    def __call__(self, im, im_info):
+        assert len(self.target_size) == 2
+        assert self.target_size[0] > 0 and self.target_size[1] > 0
+        im_channel = im.shape[2]
+        im_scale_y, im_scale_x = self.generate_scale(im)
+        im = cv2.resize(
+            im,
+            None,
+            None,
+            fx=im_scale_x,
+            fy=im_scale_y,
+            interpolation=self.interp)
+        im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
+        im_info['scale_factor'] = np.array(
+            [im_scale_y, im_scale_x]).astype('float32')
+        return im, im_info
+
+    def generate_scale(self, im):
+        origin_shape = im.shape[:2]
+        im_c = im.shape[2]
+        if self.keep_ratio:
+            im_size_min = np.min(origin_shape)
+            im_size_max = np.max(origin_shape)
+            target_size_min = np.min(self.target_size)
+            target_size_max = np.max(self.target_size)
+            im_scale = float(target_size_min) / float(im_size_min)
+            if np.round(im_scale * im_size_max) > target_size_max:
+                im_scale = float(target_size_max) / float(im_size_max)
+            im_scale_x = im_scale
+            im_scale_y = im_scale
+        else:
+            resize_h, resize_w = self.target_size
+            im_scale_y = resize_h / float(origin_shape[0])
+            im_scale_x = resize_w / float(origin_shape[1])
+        return im_scale_y, im_scale_x
+
+
+class Permute(object):
+    def __init__(self, ):
+        super(Permute, self).__init__()
+
+    def __call__(self, im, im_info):
+        im = im.transpose((2, 0, 1))
+        return im, im_info
+
+
+class NormalizeImage(object):
+    def __init__(self, mean, std, is_scale=True, norm_type='mean_std'):
+        self.mean = mean
+        self.std = std
+        self.is_scale = is_scale
+        self.norm_type = norm_type
+
+    def __call__(self, im, im_info):
+        im = im.astype(np.float32, copy=False)
+        if self.is_scale:
+            scale = 1.0 / 255.0
+            im *= scale
+
+        if self.norm_type == 'mean_std':
+            mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
+            std = np.array(self.std)[np.newaxis, np.newaxis, :]
+            im -= mean
+            im /= std
+        return im, im_info
+
+
+class PadStride(object):
+    def __init__(self, stride=0):
+        self.coarsest_stride = stride
+
+    def __call__(self, im, im_info):
+        coarsest_stride = self.coarsest_stride
+        if coarsest_stride <= 0:
+            return im, im_info
+        im_c, im_h, im_w = im.shape
+        pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
+        pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
+        padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
+        padding_im[:, :im_h, :im_w] = im
+        return padding_im, im_info
+
+
+def preprocess(im, preprocess_ops):
+    # process image by preprocess_ops
+    im_info = {
+        'scale_factor': np.array(
+            [1., 1.], dtype=np.float32),
+        'im_shape': None,
+    }
+    im, im_info = decode_image(im, im_info)
+    for operator in preprocess_ops:
+        im, im_info = operator(im, im_info)
+    return im, im_info
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--model_dir', type=str, help='directory of inference model')
+    parser.add_argument(
+        '--run_mode', type=str, default='paddle', help='running mode')
+    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
+    parser.add_argument(
+        '--image_dir',
+        type=str,
+        default='/paddle/data/DOTA_1024_ss/test1024/images',
+        help='directory of test images')
+    parser.add_argument(
+        '--warmup_iter', type=int, default=5, help='num of warmup iters')
+    parser.add_argument(
+        '--total_iter', type=int, default=2000, help='num of total iters')
+    parser.add_argument(
+        '--log_iter', type=int, default=50, help='num of log interval')
+    parser.add_argument(
+        '--tuned_trt_shape_file',
+        type=str,
+        default='shape_range_info.pbtxt',
+        help='dynamic shape range info')
+    args = parser.parse_args()
+    return args
+
+
+def init_predictor(FLAGS):
+    model_dir, run_mode, batch_size = FLAGS.model_dir, FLAGS.run_mode, FLAGS.batch_size
+    yaml_file = os.path.join(model_dir, 'infer_cfg.yml')
+    with open(yaml_file) as f:
+        yml_conf = yaml.safe_load(f)
+
+    config = Config(
+        os.path.join(model_dir, 'model.pdmodel'),
+        os.path.join(model_dir, 'model.pdiparams'))
+
+    # initial GPU memory(M), device ID
+    config.enable_use_gpu(200, 0)
+    # optimize graph and fuse op
+    config.switch_ir_optim(True)
+
+    precision_map = {
+        'trt_int8': Config.Precision.Int8,
+        'trt_fp32': Config.Precision.Float32,
+        'trt_fp16': Config.Precision.Half
+    }
+
+    arch = yml_conf['arch']
+    tuned_trt_shape_file = os.path.join(model_dir, FLAGS.tuned_trt_shape_file)
+
+    if run_mode in precision_map.keys():
+        if arch in TUNED_TRT_DYNAMIC_MODELS and not os.path.exists(
+                tuned_trt_shape_file):
+            print(
+                'dynamic shape range info is saved in {}. After that, rerun the code'.
+                format(tuned_trt_shape_file))
+            config.collect_shape_range_info(tuned_trt_shape_file)
+        config.enable_tensorrt_engine(
+            workspace_size=(1 << 25) * batch_size,
+            max_batch_size=batch_size,
+            min_subgraph_size=yml_conf['min_subgraph_size'],
+            precision_mode=precision_map[run_mode],
+            use_static=True,
+            use_calib_mode=False)
+
+        if yml_conf['use_dynamic_shape']:
+            if arch in TUNED_TRT_DYNAMIC_MODELS and os.path.exists(
+                    tuned_trt_shape_file):
+                config.enable_tuned_tensorrt_dynamic_shape(tuned_trt_shape_file,
+                                                           True)
+            else:
+                min_input_shape = {
+                    'image': [batch_size, 3, 640, 640],
+                    'scale_factor': [batch_size, 2]
+                }
+                max_input_shape = {
+                    'image': [batch_size, 3, 1280, 1280],
+                    'scale_factor': [batch_size, 2]
+                }
+                opt_input_shape = {
+                    'image': [batch_size, 3, 1024, 1024],
+                    'scale_factor': [batch_size, 2]
+                }
+                config.set_trt_dynamic_shape_info(
+                    min_input_shape, max_input_shape, opt_input_shape)
+
+    # disable print log when predict
+    config.disable_glog_info()
+    # enable shared memory
+    config.enable_memory_optim()
+    # disable feed, fetch OP, needed by zero_copy_run
+    config.switch_use_feed_fetch_ops(False)
+    predictor = create_predictor(config)
+    return predictor, yml_conf
+
+
+def create_preprocess_ops(yml_conf):
+    preprocess_ops = []
+    for op_info in yml_conf['Preprocess']:
+        new_op_info = op_info.copy()
+        op_type = new_op_info.pop('type')
+        preprocess_ops.append(eval(op_type)(**new_op_info))
+    return preprocess_ops
+
+
+def get_test_images(image_dir):
+    images = set()
+    infer_dir = os.path.abspath(image_dir)
+    exts = ['jpg', 'jpeg', 'png', 'bmp']
+    exts += [ext.upper() for ext in exts]
+    for ext in exts:
+        images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
+    images = list(images)
+    return images
+
+
+def create_inputs(image_files, preprocess_ops):
+    inputs = dict()
+    im_list, im_info_list = [], []
+    for im_path in image_files:
+        im, im_info = preprocess(im_path, preprocess_ops)
+        im_list.append(im)
+        im_info_list.append(im_info)
+
+    inputs['im_shape'] = np.stack(
+        [e['im_shape'] for e in im_info_list], axis=0).astype('float32')
+    inputs['scale_factor'] = np.stack(
+        [e['scale_factor'] for e in im_info_list], axis=0).astype('float32')
+    inputs['image'] = np.stack(im_list, axis=0).astype('float32')
+    return inputs
+
+
+def measure_speed(FLAGS):
+    predictor, yml_conf = init_predictor(FLAGS)
+    input_names = predictor.get_input_names()
+    preprocess_ops = create_preprocess_ops(yml_conf)
+
+    image_files = get_test_images(FLAGS.image_dir)
+
+    batch_size = FLAGS.batch_size
+    warmup_iter, log_iter, total_iter = FLAGS.warmup_iter, FLAGS.log_iter, FLAGS.total_iter
+
+    total_time = 0
+    fps = 0
+    for i in range(0, total_iter, batch_size):
+        # make data ready
+        inputs = create_inputs(image_files[i:i + batch_size], preprocess_ops)
+        for name in input_names:
+            input_tensor = predictor.get_input_handle(name)
+            input_tensor.copy_from_cpu(inputs[name])
+
+        paddle.device.cuda.synchronize()
+        # start running
+        start_time = time.perf_counter()
+        predictor.run()
+        paddle.device.cuda.synchronize()
+
+        if i >= warmup_iter:
+            total_time += time.perf_counter() - start_time
+            if (i + 1) % log_iter == 0:
+                fps = (i + 1 - warmup_iter) / total_time
+                print(
+                    f'Done image [{i + 1:<3}/ {total_iter}], '
+                    f'fps: {fps:.1f} img / s, '
+                    f'times per image: {1000 / fps:.1f} ms / img',
+                    flush=True)
+
+        if (i + 1) == total_iter:
+            fps = (i + 1 - warmup_iter) / total_time
+            print(
+                f'Overall fps: {fps:.1f} img / s, '
+                f'times per image: {1000 / fps:.1f} ms / img',
+                flush=True)
+            break
+
+
+if __name__ == '__main__':
+    FLAGS = parse_args()
+    if 'trt' in FLAGS.run_mode:
+        check_version('develop')
+        check_trt_version('8.2')
+    else:
+        check_version('2.4')
+    measure_speed(FLAGS)
--- a/paddle_detection/configs/rotate/tools/onnx_infer.py
+++ b/paddle_detection/configs/rotate/tools/onnx_infer.py
@@ -0,0 +1,302 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import six
+import glob
+import copy
+import yaml
+import argparse
+import cv2
+import numpy as np
+from shapely.geometry import Polygon
+from onnxruntime import InferenceSession
+
+
+# preprocess ops
+def decode_image(img_path):
+    with open(img_path, 'rb') as f:
+        im_read = f.read()
+    data = np.frombuffer(im_read, dtype='uint8')
+    im = cv2.imdecode(data, 1)  # BGR mode, but need RGB mode
+    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+    img_info = {
+        "im_shape": np.array(
+            im.shape[:2], dtype=np.float32),
+        "scale_factor": np.array(
+            [1., 1.], dtype=np.float32)
+    }
+    return im, img_info
+
+
+class Resize(object):
+    def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
+        if isinstance(target_size, int):
+            target_size = [target_size, target_size]
+        self.target_size = target_size
+        self.keep_ratio = keep_ratio
+        self.interp = interp
+
+    def __call__(self, im, im_info):
+        assert len(self.target_size) == 2
+        assert self.target_size[0] > 0 and self.target_size[1] > 0
+        im_channel = im.shape[2]
+        im_scale_y, im_scale_x = self.generate_scale(im)
+        im = cv2.resize(
+            im,
+            None,
+            None,
+            fx=im_scale_x,
+            fy=im_scale_y,
+            interpolation=self.interp)
+        im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
+        im_info['scale_factor'] = np.array(
+            [im_scale_y, im_scale_x]).astype('float32')
+        return im, im_info
+
+    def generate_scale(self, im):
+        origin_shape = im.shape[:2]
+        im_c = im.shape[2]
+        if self.keep_ratio:
+            im_size_min = np.min(origin_shape)
+            im_size_max = np.max(origin_shape)
+            target_size_min = np.min(self.target_size)
+            target_size_max = np.max(self.target_size)
+            im_scale = float(target_size_min) / float(im_size_min)
+            if np.round(im_scale * im_size_max) > target_size_max:
+                im_scale = float(target_size_max) / float(im_size_max)
+            im_scale_x = im_scale
+            im_scale_y = im_scale
+        else:
+            resize_h, resize_w = self.target_size
+            im_scale_y = resize_h / float(origin_shape[0])
+            im_scale_x = resize_w / float(origin_shape[1])
+        return im_scale_y, im_scale_x
+
+
+class Permute(object):
+    def __init__(self, ):
+        super(Permute, self).__init__()
+
+    def __call__(self, im, im_info):
+        im = im.transpose((2, 0, 1))
+        return im, im_info
+
+
+class NormalizeImage(object):
+    def __init__(self, mean, std, is_scale=True, norm_type='mean_std'):
+        self.mean = mean
+        self.std = std
+        self.is_scale = is_scale
+        self.norm_type = norm_type
+
+    def __call__(self, im, im_info):
+        im = im.astype(np.float32, copy=False)
+        if self.is_scale:
+            scale = 1.0 / 255.0
+            im *= scale
+
+        if self.norm_type == 'mean_std':
+            mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
+            std = np.array(self.std)[np.newaxis, np.newaxis, :]
+            im -= mean
+            im /= std
+        return im, im_info
+
+
+class PadStride(object):
+    def __init__(self, stride=0):
+        self.coarsest_stride = stride
+
+    def __call__(self, im, im_info):
+        coarsest_stride = self.coarsest_stride
+        if coarsest_stride <= 0:
+            return im, im_info
+        im_c, im_h, im_w = im.shape
+        pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
+        pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
+        padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
+        padding_im[:, :im_h, :im_w] = im
+        return padding_im, im_info
+
+
+class Compose:
+    def __init__(self, transforms):
+        self.transforms = []
+        for op_info in transforms:
+            new_op_info = op_info.copy()
+            op_type = new_op_info.pop('type')
+            self.transforms.append(eval(op_type)(**new_op_info))
+
+    def __call__(self, img_path):
+        img, im_info = decode_image(img_path)
+        for t in self.transforms:
+            img, im_info = t(img, im_info)
+        inputs = copy.deepcopy(im_info)
+        inputs['image'] = img
+        return inputs
+
+
+# postprocess
+def rbox_iou(g, p):
+    g = np.array(g)
+    p = np.array(p)
+    g = Polygon(g[:8].reshape((4, 2)))
+    p = Polygon(p[:8].reshape((4, 2)))
+    g = g.buffer(0)
+    p = p.buffer(0)
+    if not g.is_valid or not p.is_valid:
+        return 0
+    inter = Polygon(g).intersection(Polygon(p)).area
+    union = g.area + p.area - inter
+    if union == 0:
+        return 0
+    else:
+        return inter / union
+
+
+def multiclass_nms_rotated(pred_bboxes,
+                           pred_scores,
+                           iou_threshlod=0.1,
+                           score_threshold=0.1):
+    """
+    Args:
+        pred_bboxes (numpy.ndarray): [B, N, 8]
+        pred_scores (numpy.ndarray): [B, C, N]
+    
+    Return:
+        bboxes (numpy.ndarray): [N, 10]
+        bbox_num (numpy.ndarray): [B]
+    """
+    bbox_num = []
+    bboxes = []
+    for bbox_per_img, score_per_img in zip(pred_bboxes, pred_scores):
+        num_per_img = 0
+        for cls_id, score_per_cls in enumerate(score_per_img):
+            keep_mask = score_per_cls > score_threshold
+            bbox = bbox_per_img[keep_mask]
+            score = score_per_cls[keep_mask]
+
+            idx = score.argsort()[::-1]
+            bbox = bbox[idx]
+            score = score[idx]
+            keep_idx = []
+            for i, b in enumerate(bbox):
+                supressed = False
+                for gi in keep_idx:
+                    g = bbox[gi]
+                    if rbox_iou(b, g) > iou_threshlod:
+                        supressed = True
+                        break
+
+                if supressed:
+                    continue
+
+                keep_idx.append(i)
+
+            keep_box = bbox[keep_idx]
+            keep_score = score[keep_idx]
+            keep_cls_ids = np.ones(len(keep_idx)) * cls_id
+            bboxes.append(
+                np.concatenate(
+                    [keep_cls_ids[:, None], keep_score[:, None], keep_box],
+                    axis=-1))
+            num_per_img += len(keep_idx)
+
+        bbox_num.append(num_per_img)
+
+    return np.concatenate(bboxes, axis=0), np.array(bbox_num)
+
+
+def get_test_images(infer_dir, infer_img):
+    """
+    Get image path list in TEST mode
+    """
+    assert infer_img is not None or infer_dir is not None, \
+        "--image_file or --image_dir should be set"
+    assert infer_img is None or os.path.isfile(infer_img), \
+            "{} is not a file".format(infer_img)
+    assert infer_dir is None or os.path.isdir(infer_dir), \
+            "{} is not a directory".format(infer_dir)
+
+    # infer_img has a higher priority
+    if infer_img and os.path.isfile(infer_img):
+        return [infer_img]
+
+    images = set()
+    infer_dir = os.path.abspath(infer_dir)
+    assert os.path.isdir(infer_dir), \
+        "infer_dir {} is not a directory".format(infer_dir)
+    exts = ['jpg', 'jpeg', 'png', 'bmp']
+    exts += [ext.upper() for ext in exts]
+    for ext in exts:
+        images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
+    images = list(images)
+
+    assert len(images) > 0, "no image found in {}".format(infer_dir)
+    print("Found {} inference images in total.".format(len(images)))
+
+    return images
+
+
+def predict_image(infer_config, predictor, img_list):
+    # load preprocess transforms
+    transforms = Compose(infer_config['Preprocess'])
+    # predict image
+    for img_path in img_list:
+        inputs = transforms(img_path)
+        inputs_name = [var.name for var in predictor.get_inputs()]
+        inputs = {k: inputs[k][None, ] for k in inputs_name}
+
+        outputs = predictor.run(output_names=None, input_feed=inputs)
+
+        bboxes, bbox_num = multiclass_nms_rotated(
+            np.array(outputs[0]), np.array(outputs[1]))
+        print("ONNXRuntime predict: ")
+        for bbox in bboxes:
+            if bbox[0] > -1 and bbox[1] > infer_config['draw_threshold']:
+                print(f"{int(bbox[0])} {bbox[1]} "
+                      f"{bbox[2]} {bbox[3]} {bbox[4]} {bbox[5]}"
+                      f"{bbox[6]} {bbox[7]} {bbox[8]} {bbox[9]}")
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--infer_cfg", type=str, help="infer_cfg.yml")
+    parser.add_argument(
+        '--onnx_file',
+        type=str,
+        default="model.onnx",
+        help="onnx model file path")
+    parser.add_argument("--image_dir", type=str)
+    parser.add_argument("--image_file", type=str)
+    return parser.parse_args()
+
+
+if __name__ == '__main__':
+    FLAGS = parse_args()
+    # load image list
+    img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
+    # load predictor
+    predictor = InferenceSession(FLAGS.onnx_file)
+    # load infer config
+    with open(FLAGS.infer_cfg) as f:
+        infer_config = yaml.safe_load(f)
+
+    predict_image(infer_config, predictor, img_list)
--- a/paddle_detection/configs/rotate/tools/prepare_data.py
+++ b/paddle_detection/configs/rotate/tools/prepare_data.py
@@ -0,0 +1,128 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import argparse
+from convert import load_dota_infos, data_to_coco
+from slicebase import SliceBase
+
+wordname_15 = [
+    'plane', 'baseball-diamond', 'bridge', 'ground-track-field',
+    'small-vehicle', 'large-vehicle', 'ship', 'tennis-court',
+    'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout',
+    'harbor', 'swimming-pool', 'helicopter'
+]
+
+wordname_16 = wordname_15 + ['container-crane']
+
+wordname_18 = wordname_16 + ['airport', 'helipad']
+
+DATA_CLASSES = {
+    'dota10': wordname_15,
+    'dota15': wordname_16,
+    'dota20': wordname_18
+}
+
+
+def parse_args():
+    parser = argparse.ArgumentParser('prepare data for training')
+
+    parser.add_argument(
+        '--input_dirs',
+        nargs='+',
+        type=str,
+        default=None,
+        help='input dirs which contain image and labelTxt dir')
+
+    parser.add_argument(
+        '--output_dir',
+        type=str,
+        default=None,
+        help='output dirs which contain image and labelTxt dir and coco style json file'
+    )
+
+    parser.add_argument(
+        '--coco_json_file',
+        type=str,
+        default='',
+        help='coco json annotation files')
+
+    parser.add_argument('--subsize', type=int, default=1024, help='patch size')
+
+    parser.add_argument('--gap', type=int, default=200, help='step size')
+
+    parser.add_argument(
+        '--data_type', type=str, default='dota10', help='data type')
+
+    parser.add_argument(
+        '--rates',
+        nargs='+',
+        type=float,
+        default=[1.],
+        help='scales for multi-slice training')
+
+    parser.add_argument(
+        '--nproc', type=int, default=8, help='the processor number')
+
+    parser.add_argument(
+        '--iof_thr',
+        type=float,
+        default=0.5,
+        help='the minimal iof between a object and a window')
+
+    parser.add_argument(
+        '--image_only',
+        action='store_true',
+        default=False,
+        help='only processing image')
+
+    args = parser.parse_args()
+    return args
+
+
+def load_dataset(input_dir, nproc, data_type):
+    if 'dota' in data_type.lower():
+        infos = load_dota_infos(input_dir, nproc)
+    else:
+        raise ValueError('only dota dataset is supported now')
+
+    return infos
+
+
+def main():
+    args = parse_args()
+    infos = []
+    for input_dir in args.input_dirs:
+        infos += load_dataset(input_dir, args.nproc, args.data_type)
+
+    slicer = SliceBase(
+        args.gap,
+        args.subsize,
+        args.iof_thr,
+        num_process=args.nproc,
+        image_only=args.image_only)
+    slicer.slice_data(infos, args.rates, args.output_dir)
+    if args.coco_json_file:
+        infos = load_dota_infos(args.output_dir, args.nproc)
+        coco_json_file = os.path.join(args.output_dir, args.coco_json_file)
+        class_names = DATA_CLASSES[args.data_type]
+        data_to_coco(infos, coco_json_file, class_names, args.nproc)
+
+
+if __name__ == '__main__':
+    main()
--- a/paddle_detection/configs/rotate/tools/slicebase.py
+++ b/paddle_detection/configs/rotate/tools/slicebase.py
@@ -0,0 +1,267 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Reference: https://github.com/CAPTAIN-WHU/DOTA_devkit
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import math
+import copy
+from numbers import Number
+from multiprocessing import Pool
+
+import cv2
+import numpy as np
+from tqdm import tqdm
+import shapely.geometry as shgeo
+
+
+def choose_best_pointorder_fit_another(poly1, poly2):
+    """
+        To make the two polygons best fit with each point
+    """
+    x1, y1, x2, y2, x3, y3, x4, y4 = poly1
+    combinate = [
+        np.array([x1, y1, x2, y2, x3, y3, x4, y4]),
+        np.array([x2, y2, x3, y3, x4, y4, x1, y1]),
+        np.array([x3, y3, x4, y4, x1, y1, x2, y2]),
+        np.array([x4, y4, x1, y1, x2, y2, x3, y3])
+    ]
+    dst_coordinate = np.array(poly2)
+    distances = np.array(
+        [np.sum((coord - dst_coordinate)**2) for coord in combinate])
+    sorted = distances.argsort()
+    return combinate[sorted[0]]
+
+
+def cal_line_length(point1, point2):
+    return math.sqrt(
+        math.pow(point1[0] - point2[0], 2) + math.pow(point1[1] - point2[1], 2))
+
+
+class SliceBase(object):
+    def __init__(self,
+                 gap=512,
+                 subsize=1024,
+                 thresh=0.7,
+                 choosebestpoint=True,
+                 ext='.png',
+                 padding=True,
+                 num_process=8,
+                 image_only=False):
+        self.gap = gap
+        self.subsize = subsize
+        self.slide = subsize - gap
+        self.thresh = thresh
+        self.choosebestpoint = choosebestpoint
+        self.ext = ext
+        self.padding = padding
+        self.num_process = num_process
+        self.image_only = image_only
+
+    def get_windows(self, height, width):
+        windows = []
+        left, up = 0, 0
+        while (left < width):
+            if (left + self.subsize >= width):
+                left = max(width - self.subsize, 0)
+            up = 0
+            while (up < height):
+                if (up + self.subsize >= height):
+                    up = max(height - self.subsize, 0)
+                right = min(left + self.subsize, width - 1)
+                down = min(up + self.subsize, height - 1)
+                windows.append((left, up, right, down))
+                if (up + self.subsize >= height):
+                    break
+                else:
+                    up = up + self.slide
+            if (left + self.subsize >= width):
+                break
+            else:
+                left = left + self.slide
+
+        return windows
+
+    def slice_image_single(self, image, windows, output_dir, output_name):
+        image_dir = os.path.join(output_dir, 'images')
+        for (left, up, right, down) in windows:
+            image_name = output_name + str(left) + '___' + str(up) + self.ext
+            subimg = copy.deepcopy(image[up:up + self.subsize, left:left +
+                                         self.subsize])
+            h, w, c = subimg.shape
+            if (self.padding):
+                outimg = np.zeros((self.subsize, self.subsize, 3))
+                outimg[0:h, 0:w, :] = subimg
+                cv2.imwrite(os.path.join(image_dir, image_name), outimg)
+            else:
+                cv2.imwrite(os.path.join(image_dir, image_name), subimg)
+
+    def iof(self, poly1, poly2):
+        inter_poly = poly1.intersection(poly2)
+        inter_area = inter_poly.area
+        poly1_area = poly1.area
+        half_iou = inter_area / poly1_area
+        return inter_poly, half_iou
+
+    def translate(self, poly, left, up):
+        n = len(poly)
+        out_poly = np.zeros(n)
+        for i in range(n // 2):
+            out_poly[i * 2] = int(poly[i * 2] - left)
+            out_poly[i * 2 + 1] = int(poly[i * 2 + 1] - up)
+        return out_poly
+
+    def get_poly4_from_poly5(self, poly):
+        distances = [
+            cal_line_length((poly[i * 2], poly[i * 2 + 1]),
+                            (poly[(i + 1) * 2], poly[(i + 1) * 2 + 1]))
+            for i in range(int(len(poly) / 2 - 1))
+        ]
+        distances.append(
+            cal_line_length((poly[0], poly[1]), (poly[8], poly[9])))
+        pos = np.array(distances).argsort()[0]
+        count = 0
+        out_poly = []
+        while count < 5:
+            if (count == pos):
+                out_poly.append(
+                    (poly[count * 2] + poly[(count * 2 + 2) % 10]) / 2)
+                out_poly.append(
+                    (poly[(count * 2 + 1) % 10] + poly[(count * 2 + 3) % 10]) /
+                    2)
+                count = count + 1
+            elif (count == (pos + 1) % 5):
+                count = count + 1
+                continue
+
+            else:
+                out_poly.append(poly[count * 2])
+                out_poly.append(poly[count * 2 + 1])
+                count = count + 1
+        return out_poly
+
+    def slice_anno_single(self, annos, windows, output_dir, output_name):
+        anno_dir = os.path.join(output_dir, 'labelTxt')
+        for (left, up, right, down) in windows:
+            image_poly = shgeo.Polygon(
+                [(left, up), (right, up), (right, down), (left, down)])
+            anno_file = output_name + str(left) + '___' + str(up) + '.txt'
+            with open(os.path.join(anno_dir, anno_file), 'w') as f:
+                for anno in annos:
+                    gt_poly = shgeo.Polygon(
+                        [(anno['poly'][0], anno['poly'][1]),
+                         (anno['poly'][2], anno['poly'][3]),
+                         (anno['poly'][4], anno['poly'][5]),
+                         (anno['poly'][6], anno['poly'][7])])
+                    if gt_poly.area <= 0:
+                        continue
+                    inter_poly, iof = self.iof(gt_poly, image_poly)
+                    if iof == 1:
+                        final_poly = self.translate(anno['poly'], left, up)
+                    elif iof > 0:
+                        inter_poly = shgeo.polygon.orient(inter_poly, sign=1)
+                        out_poly = list(inter_poly.exterior.coords)[0:-1]
+                        if len(out_poly) < 4 or len(out_poly) > 5:
+                            continue
+
+                        final_poly = []
+                        for p in out_poly:
+                            final_poly.append(p[0])
+                            final_poly.append(p[1])
+
+                        if len(out_poly) == 5:
+                            final_poly = self.get_poly4_from_poly5(final_poly)
+
+                        if self.choosebestpoint:
+                            final_poly = choose_best_pointorder_fit_another(
+                                final_poly, anno['poly'])
+
+                        final_poly = self.translate(final_poly, left, up)
+                        final_poly = np.clip(final_poly, 1, self.subsize)
+                    else:
+                        continue
+                    outline = ' '.join(list(map(str, final_poly)))
+                    if iof >= self.thresh:
+                        outline = outline + ' ' + anno['name'] + ' ' + str(anno[
+                            'difficult'])
+                    else:
+                        outline = outline + ' ' + anno['name'] + ' ' + '2'
+
+                    f.write(outline + '\n')
+
+    def slice_data_single(self, info, rate, output_dir):
+        file_name = info['image_file']
+        base_name = os.path.splitext(os.path.split(file_name)[-1])[0]
+        base_name = base_name + '__' + str(rate) + '__'
+        img = cv2.imread(file_name)
+        if img.shape == ():
+            return
+
+        if (rate != 1):
+            resize_img = cv2.resize(
+                img, None, fx=rate, fy=rate, interpolation=cv2.INTER_CUBIC)
+        else:
+            resize_img = img
+
+        height, width, _ = resize_img.shape
+        windows = self.get_windows(height, width)
+        self.slice_image_single(resize_img, windows, output_dir, base_name)
+        if not self.image_only:
+            annos = info['annotation']
+            for anno in annos:
+                anno['poly'] = list(map(lambda x: rate * x, anno['poly']))
+            self.slice_anno_single(annos, windows, output_dir, base_name)
+
+    def check_or_mkdirs(self, path):
+        if not os.path.exists(path):
+            os.makedirs(path, exist_ok=True)
+
+    def slice_data(self, infos, rates, output_dir):
+        """
+        Args:
+            infos (list[dict]): data_infos
+            rates (float, list): scale rates
+            output_dir (str): output directory
+        """
+        if isinstance(rates, Number):
+            rates = [rates, ]
+
+        self.check_or_mkdirs(output_dir)
+        self.check_or_mkdirs(os.path.join(output_dir, 'images'))
+        if not self.image_only:
+            self.check_or_mkdirs(os.path.join(output_dir, 'labelTxt'))
+
+        pbar = tqdm(total=len(rates) * len(infos), desc='slicing data')
+
+        if self.num_process <= 1:
+            for rate in rates:
+                for info in infos:
+                    self.slice_data_single(info, rate, output_dir)
+                    pbar.update()
+        else:
+            pool = Pool(self.num_process)
+            for rate in rates:
+                for info in infos:
+                    pool.apply_async(
+                        self.slice_data_single, (info, rate, output_dir),
+                        callback=lambda x: pbar.update())
+
+            pool.close()
+            pool.join()
+
+        pbar.close()