更换文档检测模型

This commit is contained in:
2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions

View File

@@ -0,0 +1,163 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Reference: https://github.com/CAPTAIN-WHU/DOTA_devkit
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import json
import cv2
from tqdm import tqdm
from multiprocessing import Pool
def load_dota_info(image_dir, anno_dir, file_name, ext=None):
base_name, extension = os.path.splitext(file_name)
if ext and (extension != ext and extension not in ext):
return None
info = {'image_file': os.path.join(image_dir, file_name), 'annotation': []}
anno_file = os.path.join(anno_dir, base_name + '.txt')
if not os.path.exists(anno_file):
return info
with open(anno_file, 'r') as f:
for line in f:
items = line.strip().split()
if (len(items) < 9):
continue
anno = {
'poly': list(map(float, items[:8])),
'name': items[8],
'difficult': '0' if len(items) == 9 else items[9],
}
info['annotation'].append(anno)
return info
def load_dota_infos(root_dir, num_process=8, ext=None):
image_dir = os.path.join(root_dir, 'images')
anno_dir = os.path.join(root_dir, 'labelTxt')
data_infos = []
if num_process > 1:
pool = Pool(num_process)
results = []
for file_name in os.listdir(image_dir):
results.append(
pool.apply_async(load_dota_info, (image_dir, anno_dir,
file_name, ext)))
pool.close()
pool.join()
for result in results:
info = result.get()
if info:
data_infos.append(info)
else:
for file_name in os.listdir(image_dir):
info = load_dota_info(image_dir, anno_dir, file_name, ext)
if info:
data_infos.append(info)
return data_infos
def process_single_sample(info, image_id, class_names):
image_file = info['image_file']
single_image = dict()
single_image['file_name'] = os.path.split(image_file)[-1]
single_image['id'] = image_id
image = cv2.imread(image_file)
height, width, _ = image.shape
single_image['width'] = width
single_image['height'] = height
# process annotation field
single_objs = []
objects = info['annotation']
for obj in objects:
poly, name, difficult = obj['poly'], obj['name'], obj['difficult']
if difficult == '2':
continue
single_obj = dict()
single_obj['category_id'] = class_names.index(name) + 1
single_obj['segmentation'] = [poly]
single_obj['iscrowd'] = 0
xmin, ymin, xmax, ymax = min(poly[0::2]), min(poly[1::2]), max(poly[
0::2]), max(poly[1::2])
width, height = xmax - xmin, ymax - ymin
single_obj['bbox'] = [xmin, ymin, width, height]
single_obj['area'] = height * width
single_obj['image_id'] = image_id
single_objs.append(single_obj)
return (single_image, single_objs)
def data_to_coco(infos, output_path, class_names, num_process):
data_dict = dict()
data_dict['categories'] = []
for i, name in enumerate(class_names):
data_dict['categories'].append({
'id': i + 1,
'name': name,
'supercategory': name
})
pbar = tqdm(total=len(infos), desc='data to coco')
images, annotations = [], []
if num_process > 1:
pool = Pool(num_process)
results = []
for i, info in enumerate(infos):
image_id = i + 1
results.append(
pool.apply_async(
process_single_sample, (info, image_id, class_names),
callback=lambda x: pbar.update()))
pool.close()
pool.join()
for result in results:
single_image, single_anno = result.get()
images.append(single_image)
annotations += single_anno
else:
for i, info in enumerate(infos):
image_id = i + 1
single_image, single_anno = process_single_sample(info, image_id,
class_names)
images.append(single_image)
annotations += single_anno
pbar.update()
pbar.close()
for i, anno in enumerate(annotations):
anno['id'] = i + 1
data_dict['images'] = images
data_dict['annotations'] = annotations
with open(output_path, 'w') as f:
json.dump(data_dict, f)

View File

@@ -0,0 +1,266 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import re
import glob
import numpy as np
from multiprocessing import Pool
from functools import partial
from shapely.geometry import Polygon
import argparse
wordname_15 = [
'plane', 'baseball-diamond', 'bridge', 'ground-track-field',
'small-vehicle', 'large-vehicle', 'ship', 'tennis-court',
'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout',
'harbor', 'swimming-pool', 'helicopter'
]
wordname_16 = wordname_15 + ['container-crane']
wordname_18 = wordname_16 + ['airport', 'helipad']
DATA_CLASSES = {
'dota10': wordname_15,
'dota15': wordname_16,
'dota20': wordname_18
}
def rbox_iou(g, p):
"""
iou of rbox
"""
g = np.array(g)
p = np.array(p)
g = Polygon(g[:8].reshape((4, 2)))
p = Polygon(p[:8].reshape((4, 2)))
g = g.buffer(0)
p = p.buffer(0)
if not g.is_valid or not p.is_valid:
return 0
inter = Polygon(g).intersection(Polygon(p)).area
union = g.area + p.area - inter
if union == 0:
return 0
else:
return inter / union
def py_cpu_nms_poly_fast(dets, thresh):
"""
Args:
dets: pred results
thresh: nms threshold
Returns: index of keep
"""
obbs = dets[:, 0:-1]
x1 = np.min(obbs[:, 0::2], axis=1)
y1 = np.min(obbs[:, 1::2], axis=1)
x2 = np.max(obbs[:, 0::2], axis=1)
y2 = np.max(obbs[:, 1::2], axis=1)
scores = dets[:, 8]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
polys = []
for i in range(len(dets)):
tm_polygon = [
dets[i][0], dets[i][1], dets[i][2], dets[i][3], dets[i][4],
dets[i][5], dets[i][6], dets[i][7]
]
polys.append(tm_polygon)
polys = np.array(polys)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
ovr = []
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1)
h = np.maximum(0.0, yy2 - yy1)
hbb_inter = w * h
hbb_ovr = hbb_inter / (areas[i] + areas[order[1:]] - hbb_inter)
h_inds = np.where(hbb_ovr > 0)[0]
tmp_order = order[h_inds + 1]
for j in range(tmp_order.size):
iou = rbox_iou(polys[i], polys[tmp_order[j]])
hbb_ovr[h_inds[j]] = iou
try:
if math.isnan(ovr[0]):
pdb.set_trace()
except:
pass
inds = np.where(hbb_ovr <= thresh)[0]
order = order[inds + 1]
return keep
def poly2origpoly(poly, x, y, rate):
origpoly = []
for i in range(int(len(poly) / 2)):
tmp_x = float(poly[i * 2] + x) / float(rate)
tmp_y = float(poly[i * 2 + 1] + y) / float(rate)
origpoly.append(tmp_x)
origpoly.append(tmp_y)
return origpoly
def nmsbynamedict(nameboxdict, nms, thresh):
"""
Args:
nameboxdict: nameboxdict
nms: nms
thresh: nms threshold
Returns: nms result as dict
"""
nameboxnmsdict = {x: [] for x in nameboxdict}
for imgname in nameboxdict:
keep = nms(np.array(nameboxdict[imgname]), thresh)
outdets = []
for index in keep:
outdets.append(nameboxdict[imgname][index])
nameboxnmsdict[imgname] = outdets
return nameboxnmsdict
def merge_single(output_dir, nms, nms_thresh, pred_class_lst):
"""
Args:
output_dir: output_dir
nms: nms
pred_class_lst: pred_class_lst
class_name: class_name
Returns:
"""
class_name, pred_bbox_list = pred_class_lst
nameboxdict = {}
for line in pred_bbox_list:
splitline = line.split(' ')
subname = splitline[0]
splitname = subname.split('__')
oriname = splitname[0]
pattern1 = re.compile(r'__\d+___\d+')
x_y = re.findall(pattern1, subname)
x_y_2 = re.findall(r'\d+', x_y[0])
x, y = int(x_y_2[0]), int(x_y_2[1])
pattern2 = re.compile(r'__([\d+\.]+)__\d+___')
rate = re.findall(pattern2, subname)[0]
confidence = splitline[1]
poly = list(map(float, splitline[2:]))
origpoly = poly2origpoly(poly, x, y, rate)
det = origpoly
det.append(confidence)
det = list(map(float, det))
if (oriname not in nameboxdict):
nameboxdict[oriname] = []
nameboxdict[oriname].append(det)
nameboxnmsdict = nmsbynamedict(nameboxdict, nms, nms_thresh)
# write result
dstname = os.path.join(output_dir, class_name + '.txt')
with open(dstname, 'w') as f_out:
for imgname in nameboxnmsdict:
for det in nameboxnmsdict[imgname]:
confidence = det[-1]
bbox = det[0:-1]
outline = imgname + ' ' + str(confidence) + ' ' + ' '.join(
map(str, bbox))
f_out.write(outline + '\n')
def generate_result(pred_txt_dir,
output_dir='output',
class_names=wordname_15,
nms_thresh=0.1):
"""
pred_txt_dir: dir of pred txt
output_dir: dir of output
class_names: class names of data
"""
pred_txt_list = glob.glob("{}/*.txt".format(pred_txt_dir))
# step1: summary pred bbox
pred_classes = {}
for class_name in class_names:
pred_classes[class_name] = []
for current_txt in pred_txt_list:
img_id = os.path.split(current_txt)[1]
img_id = img_id.split('.txt')[0]
with open(current_txt) as f:
res = f.readlines()
for item in res:
item = item.split(' ')
pred_class = item[0]
item[0] = img_id
pred_bbox = ' '.join(item)
pred_classes[pred_class].append(pred_bbox)
pred_classes_lst = []
for class_name in pred_classes.keys():
print('class_name: {}, count: {}'.format(class_name,
len(pred_classes[class_name])))
pred_classes_lst.append((class_name, pred_classes[class_name]))
# step2: merge
pool = Pool(len(class_names))
nms = py_cpu_nms_poly_fast
mergesingle_fn = partial(merge_single, output_dir, nms, nms_thresh)
pool.map(mergesingle_fn, pred_classes_lst)
def parse_args():
parser = argparse.ArgumentParser(description='generate test results')
parser.add_argument('--pred_txt_dir', type=str, help='path of pred txt dir')
parser.add_argument(
'--output_dir', type=str, default='output', help='path of output dir')
parser.add_argument(
'--data_type', type=str, default='dota10', help='data type')
parser.add_argument(
'--nms_thresh',
type=float,
default=0.1,
help='nms threshold while merging results')
return parser.parse_args()
if __name__ == '__main__':
args = parse_args()
output_dir = args.output_dir
if not os.path.exists(output_dir):
os.makedirs(output_dir)
class_names = DATA_CLASSES[args.data_type]
generate_result(args.pred_txt_dir, output_dir, class_names)
print('done!')

View File

@@ -0,0 +1,378 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import six
import glob
import time
import yaml
import argparse
import cv2
import numpy as np
import paddle
import paddle.version as paddle_version
from paddle.inference import Config, create_predictor, PrecisionType, get_trt_runtime_version
TUNED_TRT_DYNAMIC_MODELS = {'DETR'}
def check_version(version='2.2'):
err = "PaddlePaddle version {} or higher is required, " \
"or a suitable develop version is satisfied as well. \n" \
"Please make sure the version is good with your code.".format(version)
version_installed = [
paddle_version.major, paddle_version.minor, paddle_version.patch,
paddle_version.rc
]
if version_installed == ['0', '0', '0', '0']:
return
if version == 'develop':
raise Exception("PaddlePaddle develop version is required!")
version_split = version.split('.')
length = min(len(version_installed), len(version_split))
for i in six.moves.range(length):
if version_installed[i] > version_split[i]:
return
if version_installed[i] < version_split[i]:
raise Exception(err)
def check_trt_version(version='8.2'):
err = "TensorRT version {} or higher is required," \
"Please make sure the version is good with your code.".format(version)
version_split = list(map(int, version.split('.')))
version_installed = get_trt_runtime_version()
length = min(len(version_installed), len(version_split))
for i in six.moves.range(length):
if version_installed[i] > version_split[i]:
return
if version_installed[i] < version_split[i]:
raise Exception(err)
# preprocess ops
def decode_image(im_file, im_info):
if isinstance(im_file, str):
with open(im_file, 'rb') as f:
im_read = f.read()
data = np.frombuffer(im_read, dtype='uint8')
im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
else:
im = im_file
im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32)
return im, im_info
class Resize(object):
def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
if isinstance(target_size, int):
target_size = [target_size, target_size]
self.target_size = target_size
self.keep_ratio = keep_ratio
self.interp = interp
def __call__(self, im, im_info):
assert len(self.target_size) == 2
assert self.target_size[0] > 0 and self.target_size[1] > 0
im_channel = im.shape[2]
im_scale_y, im_scale_x = self.generate_scale(im)
im = cv2.resize(
im,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
im_info['scale_factor'] = np.array(
[im_scale_y, im_scale_x]).astype('float32')
return im, im_info
def generate_scale(self, im):
origin_shape = im.shape[:2]
im_c = im.shape[2]
if self.keep_ratio:
im_size_min = np.min(origin_shape)
im_size_max = np.max(origin_shape)
target_size_min = np.min(self.target_size)
target_size_max = np.max(self.target_size)
im_scale = float(target_size_min) / float(im_size_min)
if np.round(im_scale * im_size_max) > target_size_max:
im_scale = float(target_size_max) / float(im_size_max)
im_scale_x = im_scale
im_scale_y = im_scale
else:
resize_h, resize_w = self.target_size
im_scale_y = resize_h / float(origin_shape[0])
im_scale_x = resize_w / float(origin_shape[1])
return im_scale_y, im_scale_x
class Permute(object):
def __init__(self, ):
super(Permute, self).__init__()
def __call__(self, im, im_info):
im = im.transpose((2, 0, 1))
return im, im_info
class NormalizeImage(object):
def __init__(self, mean, std, is_scale=True, norm_type='mean_std'):
self.mean = mean
self.std = std
self.is_scale = is_scale
self.norm_type = norm_type
def __call__(self, im, im_info):
im = im.astype(np.float32, copy=False)
if self.is_scale:
scale = 1.0 / 255.0
im *= scale
if self.norm_type == 'mean_std':
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
im -= mean
im /= std
return im, im_info
class PadStride(object):
def __init__(self, stride=0):
self.coarsest_stride = stride
def __call__(self, im, im_info):
coarsest_stride = self.coarsest_stride
if coarsest_stride <= 0:
return im, im_info
im_c, im_h, im_w = im.shape
pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
padding_im[:, :im_h, :im_w] = im
return padding_im, im_info
def preprocess(im, preprocess_ops):
# process image by preprocess_ops
im_info = {
'scale_factor': np.array(
[1., 1.], dtype=np.float32),
'im_shape': None,
}
im, im_info = decode_image(im, im_info)
for operator in preprocess_ops:
im, im_info = operator(im, im_info)
return im, im_info
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
'--model_dir', type=str, help='directory of inference model')
parser.add_argument(
'--run_mode', type=str, default='paddle', help='running mode')
parser.add_argument('--batch_size', type=int, default=1, help='batch size')
parser.add_argument(
'--image_dir',
type=str,
default='/paddle/data/DOTA_1024_ss/test1024/images',
help='directory of test images')
parser.add_argument(
'--warmup_iter', type=int, default=5, help='num of warmup iters')
parser.add_argument(
'--total_iter', type=int, default=2000, help='num of total iters')
parser.add_argument(
'--log_iter', type=int, default=50, help='num of log interval')
parser.add_argument(
'--tuned_trt_shape_file',
type=str,
default='shape_range_info.pbtxt',
help='dynamic shape range info')
args = parser.parse_args()
return args
def init_predictor(FLAGS):
model_dir, run_mode, batch_size = FLAGS.model_dir, FLAGS.run_mode, FLAGS.batch_size
yaml_file = os.path.join(model_dir, 'infer_cfg.yml')
with open(yaml_file) as f:
yml_conf = yaml.safe_load(f)
config = Config(
os.path.join(model_dir, 'model.pdmodel'),
os.path.join(model_dir, 'model.pdiparams'))
# initial GPU memory(M), device ID
config.enable_use_gpu(200, 0)
# optimize graph and fuse op
config.switch_ir_optim(True)
precision_map = {
'trt_int8': Config.Precision.Int8,
'trt_fp32': Config.Precision.Float32,
'trt_fp16': Config.Precision.Half
}
arch = yml_conf['arch']
tuned_trt_shape_file = os.path.join(model_dir, FLAGS.tuned_trt_shape_file)
if run_mode in precision_map.keys():
if arch in TUNED_TRT_DYNAMIC_MODELS and not os.path.exists(
tuned_trt_shape_file):
print(
'dynamic shape range info is saved in {}. After that, rerun the code'.
format(tuned_trt_shape_file))
config.collect_shape_range_info(tuned_trt_shape_file)
config.enable_tensorrt_engine(
workspace_size=(1 << 25) * batch_size,
max_batch_size=batch_size,
min_subgraph_size=yml_conf['min_subgraph_size'],
precision_mode=precision_map[run_mode],
use_static=True,
use_calib_mode=False)
if yml_conf['use_dynamic_shape']:
if arch in TUNED_TRT_DYNAMIC_MODELS and os.path.exists(
tuned_trt_shape_file):
config.enable_tuned_tensorrt_dynamic_shape(tuned_trt_shape_file,
True)
else:
min_input_shape = {
'image': [batch_size, 3, 640, 640],
'scale_factor': [batch_size, 2]
}
max_input_shape = {
'image': [batch_size, 3, 1280, 1280],
'scale_factor': [batch_size, 2]
}
opt_input_shape = {
'image': [batch_size, 3, 1024, 1024],
'scale_factor': [batch_size, 2]
}
config.set_trt_dynamic_shape_info(
min_input_shape, max_input_shape, opt_input_shape)
# disable print log when predict
config.disable_glog_info()
# enable shared memory
config.enable_memory_optim()
# disable feed, fetch OP, needed by zero_copy_run
config.switch_use_feed_fetch_ops(False)
predictor = create_predictor(config)
return predictor, yml_conf
def create_preprocess_ops(yml_conf):
preprocess_ops = []
for op_info in yml_conf['Preprocess']:
new_op_info = op_info.copy()
op_type = new_op_info.pop('type')
preprocess_ops.append(eval(op_type)(**new_op_info))
return preprocess_ops
def get_test_images(image_dir):
images = set()
infer_dir = os.path.abspath(image_dir)
exts = ['jpg', 'jpeg', 'png', 'bmp']
exts += [ext.upper() for ext in exts]
for ext in exts:
images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
images = list(images)
return images
def create_inputs(image_files, preprocess_ops):
inputs = dict()
im_list, im_info_list = [], []
for im_path in image_files:
im, im_info = preprocess(im_path, preprocess_ops)
im_list.append(im)
im_info_list.append(im_info)
inputs['im_shape'] = np.stack(
[e['im_shape'] for e in im_info_list], axis=0).astype('float32')
inputs['scale_factor'] = np.stack(
[e['scale_factor'] for e in im_info_list], axis=0).astype('float32')
inputs['image'] = np.stack(im_list, axis=0).astype('float32')
return inputs
def measure_speed(FLAGS):
predictor, yml_conf = init_predictor(FLAGS)
input_names = predictor.get_input_names()
preprocess_ops = create_preprocess_ops(yml_conf)
image_files = get_test_images(FLAGS.image_dir)
batch_size = FLAGS.batch_size
warmup_iter, log_iter, total_iter = FLAGS.warmup_iter, FLAGS.log_iter, FLAGS.total_iter
total_time = 0
fps = 0
for i in range(0, total_iter, batch_size):
# make data ready
inputs = create_inputs(image_files[i:i + batch_size], preprocess_ops)
for name in input_names:
input_tensor = predictor.get_input_handle(name)
input_tensor.copy_from_cpu(inputs[name])
paddle.device.cuda.synchronize()
# start running
start_time = time.perf_counter()
predictor.run()
paddle.device.cuda.synchronize()
if i >= warmup_iter:
total_time += time.perf_counter() - start_time
if (i + 1) % log_iter == 0:
fps = (i + 1 - warmup_iter) / total_time
print(
f'Done image [{i + 1:<3}/ {total_iter}], '
f'fps: {fps:.1f} img / s, '
f'times per image: {1000 / fps:.1f} ms / img',
flush=True)
if (i + 1) == total_iter:
fps = (i + 1 - warmup_iter) / total_time
print(
f'Overall fps: {fps:.1f} img / s, '
f'times per image: {1000 / fps:.1f} ms / img',
flush=True)
break
if __name__ == '__main__':
FLAGS = parse_args()
if 'trt' in FLAGS.run_mode:
check_version('develop')
check_trt_version('8.2')
else:
check_version('2.4')
measure_speed(FLAGS)

View File

@@ -0,0 +1,302 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import six
import glob
import copy
import yaml
import argparse
import cv2
import numpy as np
from shapely.geometry import Polygon
from onnxruntime import InferenceSession
# preprocess ops
def decode_image(img_path):
with open(img_path, 'rb') as f:
im_read = f.read()
data = np.frombuffer(im_read, dtype='uint8')
im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
img_info = {
"im_shape": np.array(
im.shape[:2], dtype=np.float32),
"scale_factor": np.array(
[1., 1.], dtype=np.float32)
}
return im, img_info
class Resize(object):
def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
if isinstance(target_size, int):
target_size = [target_size, target_size]
self.target_size = target_size
self.keep_ratio = keep_ratio
self.interp = interp
def __call__(self, im, im_info):
assert len(self.target_size) == 2
assert self.target_size[0] > 0 and self.target_size[1] > 0
im_channel = im.shape[2]
im_scale_y, im_scale_x = self.generate_scale(im)
im = cv2.resize(
im,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
im_info['scale_factor'] = np.array(
[im_scale_y, im_scale_x]).astype('float32')
return im, im_info
def generate_scale(self, im):
origin_shape = im.shape[:2]
im_c = im.shape[2]
if self.keep_ratio:
im_size_min = np.min(origin_shape)
im_size_max = np.max(origin_shape)
target_size_min = np.min(self.target_size)
target_size_max = np.max(self.target_size)
im_scale = float(target_size_min) / float(im_size_min)
if np.round(im_scale * im_size_max) > target_size_max:
im_scale = float(target_size_max) / float(im_size_max)
im_scale_x = im_scale
im_scale_y = im_scale
else:
resize_h, resize_w = self.target_size
im_scale_y = resize_h / float(origin_shape[0])
im_scale_x = resize_w / float(origin_shape[1])
return im_scale_y, im_scale_x
class Permute(object):
def __init__(self, ):
super(Permute, self).__init__()
def __call__(self, im, im_info):
im = im.transpose((2, 0, 1))
return im, im_info
class NormalizeImage(object):
def __init__(self, mean, std, is_scale=True, norm_type='mean_std'):
self.mean = mean
self.std = std
self.is_scale = is_scale
self.norm_type = norm_type
def __call__(self, im, im_info):
im = im.astype(np.float32, copy=False)
if self.is_scale:
scale = 1.0 / 255.0
im *= scale
if self.norm_type == 'mean_std':
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
im -= mean
im /= std
return im, im_info
class PadStride(object):
def __init__(self, stride=0):
self.coarsest_stride = stride
def __call__(self, im, im_info):
coarsest_stride = self.coarsest_stride
if coarsest_stride <= 0:
return im, im_info
im_c, im_h, im_w = im.shape
pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
padding_im[:, :im_h, :im_w] = im
return padding_im, im_info
class Compose:
def __init__(self, transforms):
self.transforms = []
for op_info in transforms:
new_op_info = op_info.copy()
op_type = new_op_info.pop('type')
self.transforms.append(eval(op_type)(**new_op_info))
def __call__(self, img_path):
img, im_info = decode_image(img_path)
for t in self.transforms:
img, im_info = t(img, im_info)
inputs = copy.deepcopy(im_info)
inputs['image'] = img
return inputs
# postprocess
def rbox_iou(g, p):
g = np.array(g)
p = np.array(p)
g = Polygon(g[:8].reshape((4, 2)))
p = Polygon(p[:8].reshape((4, 2)))
g = g.buffer(0)
p = p.buffer(0)
if not g.is_valid or not p.is_valid:
return 0
inter = Polygon(g).intersection(Polygon(p)).area
union = g.area + p.area - inter
if union == 0:
return 0
else:
return inter / union
def multiclass_nms_rotated(pred_bboxes,
pred_scores,
iou_threshlod=0.1,
score_threshold=0.1):
"""
Args:
pred_bboxes (numpy.ndarray): [B, N, 8]
pred_scores (numpy.ndarray): [B, C, N]
Return:
bboxes (numpy.ndarray): [N, 10]
bbox_num (numpy.ndarray): [B]
"""
bbox_num = []
bboxes = []
for bbox_per_img, score_per_img in zip(pred_bboxes, pred_scores):
num_per_img = 0
for cls_id, score_per_cls in enumerate(score_per_img):
keep_mask = score_per_cls > score_threshold
bbox = bbox_per_img[keep_mask]
score = score_per_cls[keep_mask]
idx = score.argsort()[::-1]
bbox = bbox[idx]
score = score[idx]
keep_idx = []
for i, b in enumerate(bbox):
supressed = False
for gi in keep_idx:
g = bbox[gi]
if rbox_iou(b, g) > iou_threshlod:
supressed = True
break
if supressed:
continue
keep_idx.append(i)
keep_box = bbox[keep_idx]
keep_score = score[keep_idx]
keep_cls_ids = np.ones(len(keep_idx)) * cls_id
bboxes.append(
np.concatenate(
[keep_cls_ids[:, None], keep_score[:, None], keep_box],
axis=-1))
num_per_img += len(keep_idx)
bbox_num.append(num_per_img)
return np.concatenate(bboxes, axis=0), np.array(bbox_num)
def get_test_images(infer_dir, infer_img):
"""
Get image path list in TEST mode
"""
assert infer_img is not None or infer_dir is not None, \
"--image_file or --image_dir should be set"
assert infer_img is None or os.path.isfile(infer_img), \
"{} is not a file".format(infer_img)
assert infer_dir is None or os.path.isdir(infer_dir), \
"{} is not a directory".format(infer_dir)
# infer_img has a higher priority
if infer_img and os.path.isfile(infer_img):
return [infer_img]
images = set()
infer_dir = os.path.abspath(infer_dir)
assert os.path.isdir(infer_dir), \
"infer_dir {} is not a directory".format(infer_dir)
exts = ['jpg', 'jpeg', 'png', 'bmp']
exts += [ext.upper() for ext in exts]
for ext in exts:
images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
images = list(images)
assert len(images) > 0, "no image found in {}".format(infer_dir)
print("Found {} inference images in total.".format(len(images)))
return images
def predict_image(infer_config, predictor, img_list):
# load preprocess transforms
transforms = Compose(infer_config['Preprocess'])
# predict image
for img_path in img_list:
inputs = transforms(img_path)
inputs_name = [var.name for var in predictor.get_inputs()]
inputs = {k: inputs[k][None, ] for k in inputs_name}
outputs = predictor.run(output_names=None, input_feed=inputs)
bboxes, bbox_num = multiclass_nms_rotated(
np.array(outputs[0]), np.array(outputs[1]))
print("ONNXRuntime predict: ")
for bbox in bboxes:
if bbox[0] > -1 and bbox[1] > infer_config['draw_threshold']:
print(f"{int(bbox[0])} {bbox[1]} "
f"{bbox[2]} {bbox[3]} {bbox[4]} {bbox[5]}"
f"{bbox[6]} {bbox[7]} {bbox[8]} {bbox[9]}")
def parse_args():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--infer_cfg", type=str, help="infer_cfg.yml")
parser.add_argument(
'--onnx_file',
type=str,
default="model.onnx",
help="onnx model file path")
parser.add_argument("--image_dir", type=str)
parser.add_argument("--image_file", type=str)
return parser.parse_args()
if __name__ == '__main__':
FLAGS = parse_args()
# load image list
img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
# load predictor
predictor = InferenceSession(FLAGS.onnx_file)
# load infer config
with open(FLAGS.infer_cfg) as f:
infer_config = yaml.safe_load(f)
predict_image(infer_config, predictor, img_list)

View File

@@ -0,0 +1,128 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import argparse
from convert import load_dota_infos, data_to_coco
from slicebase import SliceBase
wordname_15 = [
'plane', 'baseball-diamond', 'bridge', 'ground-track-field',
'small-vehicle', 'large-vehicle', 'ship', 'tennis-court',
'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout',
'harbor', 'swimming-pool', 'helicopter'
]
wordname_16 = wordname_15 + ['container-crane']
wordname_18 = wordname_16 + ['airport', 'helipad']
DATA_CLASSES = {
'dota10': wordname_15,
'dota15': wordname_16,
'dota20': wordname_18
}
def parse_args():
parser = argparse.ArgumentParser('prepare data for training')
parser.add_argument(
'--input_dirs',
nargs='+',
type=str,
default=None,
help='input dirs which contain image and labelTxt dir')
parser.add_argument(
'--output_dir',
type=str,
default=None,
help='output dirs which contain image and labelTxt dir and coco style json file'
)
parser.add_argument(
'--coco_json_file',
type=str,
default='',
help='coco json annotation files')
parser.add_argument('--subsize', type=int, default=1024, help='patch size')
parser.add_argument('--gap', type=int, default=200, help='step size')
parser.add_argument(
'--data_type', type=str, default='dota10', help='data type')
parser.add_argument(
'--rates',
nargs='+',
type=float,
default=[1.],
help='scales for multi-slice training')
parser.add_argument(
'--nproc', type=int, default=8, help='the processor number')
parser.add_argument(
'--iof_thr',
type=float,
default=0.5,
help='the minimal iof between a object and a window')
parser.add_argument(
'--image_only',
action='store_true',
default=False,
help='only processing image')
args = parser.parse_args()
return args
def load_dataset(input_dir, nproc, data_type):
if 'dota' in data_type.lower():
infos = load_dota_infos(input_dir, nproc)
else:
raise ValueError('only dota dataset is supported now')
return infos
def main():
args = parse_args()
infos = []
for input_dir in args.input_dirs:
infos += load_dataset(input_dir, args.nproc, args.data_type)
slicer = SliceBase(
args.gap,
args.subsize,
args.iof_thr,
num_process=args.nproc,
image_only=args.image_only)
slicer.slice_data(infos, args.rates, args.output_dir)
if args.coco_json_file:
infos = load_dota_infos(args.output_dir, args.nproc)
coco_json_file = os.path.join(args.output_dir, args.coco_json_file)
class_names = DATA_CLASSES[args.data_type]
data_to_coco(infos, coco_json_file, class_names, args.nproc)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,267 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Reference: https://github.com/CAPTAIN-WHU/DOTA_devkit
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import math
import copy
from numbers import Number
from multiprocessing import Pool
import cv2
import numpy as np
from tqdm import tqdm
import shapely.geometry as shgeo
def choose_best_pointorder_fit_another(poly1, poly2):
"""
To make the two polygons best fit with each point
"""
x1, y1, x2, y2, x3, y3, x4, y4 = poly1
combinate = [
np.array([x1, y1, x2, y2, x3, y3, x4, y4]),
np.array([x2, y2, x3, y3, x4, y4, x1, y1]),
np.array([x3, y3, x4, y4, x1, y1, x2, y2]),
np.array([x4, y4, x1, y1, x2, y2, x3, y3])
]
dst_coordinate = np.array(poly2)
distances = np.array(
[np.sum((coord - dst_coordinate)**2) for coord in combinate])
sorted = distances.argsort()
return combinate[sorted[0]]
def cal_line_length(point1, point2):
return math.sqrt(
math.pow(point1[0] - point2[0], 2) + math.pow(point1[1] - point2[1], 2))
class SliceBase(object):
def __init__(self,
gap=512,
subsize=1024,
thresh=0.7,
choosebestpoint=True,
ext='.png',
padding=True,
num_process=8,
image_only=False):
self.gap = gap
self.subsize = subsize
self.slide = subsize - gap
self.thresh = thresh
self.choosebestpoint = choosebestpoint
self.ext = ext
self.padding = padding
self.num_process = num_process
self.image_only = image_only
def get_windows(self, height, width):
windows = []
left, up = 0, 0
while (left < width):
if (left + self.subsize >= width):
left = max(width - self.subsize, 0)
up = 0
while (up < height):
if (up + self.subsize >= height):
up = max(height - self.subsize, 0)
right = min(left + self.subsize, width - 1)
down = min(up + self.subsize, height - 1)
windows.append((left, up, right, down))
if (up + self.subsize >= height):
break
else:
up = up + self.slide
if (left + self.subsize >= width):
break
else:
left = left + self.slide
return windows
def slice_image_single(self, image, windows, output_dir, output_name):
image_dir = os.path.join(output_dir, 'images')
for (left, up, right, down) in windows:
image_name = output_name + str(left) + '___' + str(up) + self.ext
subimg = copy.deepcopy(image[up:up + self.subsize, left:left +
self.subsize])
h, w, c = subimg.shape
if (self.padding):
outimg = np.zeros((self.subsize, self.subsize, 3))
outimg[0:h, 0:w, :] = subimg
cv2.imwrite(os.path.join(image_dir, image_name), outimg)
else:
cv2.imwrite(os.path.join(image_dir, image_name), subimg)
def iof(self, poly1, poly2):
inter_poly = poly1.intersection(poly2)
inter_area = inter_poly.area
poly1_area = poly1.area
half_iou = inter_area / poly1_area
return inter_poly, half_iou
def translate(self, poly, left, up):
n = len(poly)
out_poly = np.zeros(n)
for i in range(n // 2):
out_poly[i * 2] = int(poly[i * 2] - left)
out_poly[i * 2 + 1] = int(poly[i * 2 + 1] - up)
return out_poly
def get_poly4_from_poly5(self, poly):
distances = [
cal_line_length((poly[i * 2], poly[i * 2 + 1]),
(poly[(i + 1) * 2], poly[(i + 1) * 2 + 1]))
for i in range(int(len(poly) / 2 - 1))
]
distances.append(
cal_line_length((poly[0], poly[1]), (poly[8], poly[9])))
pos = np.array(distances).argsort()[0]
count = 0
out_poly = []
while count < 5:
if (count == pos):
out_poly.append(
(poly[count * 2] + poly[(count * 2 + 2) % 10]) / 2)
out_poly.append(
(poly[(count * 2 + 1) % 10] + poly[(count * 2 + 3) % 10]) /
2)
count = count + 1
elif (count == (pos + 1) % 5):
count = count + 1
continue
else:
out_poly.append(poly[count * 2])
out_poly.append(poly[count * 2 + 1])
count = count + 1
return out_poly
def slice_anno_single(self, annos, windows, output_dir, output_name):
anno_dir = os.path.join(output_dir, 'labelTxt')
for (left, up, right, down) in windows:
image_poly = shgeo.Polygon(
[(left, up), (right, up), (right, down), (left, down)])
anno_file = output_name + str(left) + '___' + str(up) + '.txt'
with open(os.path.join(anno_dir, anno_file), 'w') as f:
for anno in annos:
gt_poly = shgeo.Polygon(
[(anno['poly'][0], anno['poly'][1]),
(anno['poly'][2], anno['poly'][3]),
(anno['poly'][4], anno['poly'][5]),
(anno['poly'][6], anno['poly'][7])])
if gt_poly.area <= 0:
continue
inter_poly, iof = self.iof(gt_poly, image_poly)
if iof == 1:
final_poly = self.translate(anno['poly'], left, up)
elif iof > 0:
inter_poly = shgeo.polygon.orient(inter_poly, sign=1)
out_poly = list(inter_poly.exterior.coords)[0:-1]
if len(out_poly) < 4 or len(out_poly) > 5:
continue
final_poly = []
for p in out_poly:
final_poly.append(p[0])
final_poly.append(p[1])
if len(out_poly) == 5:
final_poly = self.get_poly4_from_poly5(final_poly)
if self.choosebestpoint:
final_poly = choose_best_pointorder_fit_another(
final_poly, anno['poly'])
final_poly = self.translate(final_poly, left, up)
final_poly = np.clip(final_poly, 1, self.subsize)
else:
continue
outline = ' '.join(list(map(str, final_poly)))
if iof >= self.thresh:
outline = outline + ' ' + anno['name'] + ' ' + str(anno[
'difficult'])
else:
outline = outline + ' ' + anno['name'] + ' ' + '2'
f.write(outline + '\n')
def slice_data_single(self, info, rate, output_dir):
file_name = info['image_file']
base_name = os.path.splitext(os.path.split(file_name)[-1])[0]
base_name = base_name + '__' + str(rate) + '__'
img = cv2.imread(file_name)
if img.shape == ():
return
if (rate != 1):
resize_img = cv2.resize(
img, None, fx=rate, fy=rate, interpolation=cv2.INTER_CUBIC)
else:
resize_img = img
height, width, _ = resize_img.shape
windows = self.get_windows(height, width)
self.slice_image_single(resize_img, windows, output_dir, base_name)
if not self.image_only:
annos = info['annotation']
for anno in annos:
anno['poly'] = list(map(lambda x: rate * x, anno['poly']))
self.slice_anno_single(annos, windows, output_dir, base_name)
def check_or_mkdirs(self, path):
if not os.path.exists(path):
os.makedirs(path, exist_ok=True)
def slice_data(self, infos, rates, output_dir):
"""
Args:
infos (list[dict]): data_infos
rates (float, list): scale rates
output_dir (str): output directory
"""
if isinstance(rates, Number):
rates = [rates, ]
self.check_or_mkdirs(output_dir)
self.check_or_mkdirs(os.path.join(output_dir, 'images'))
if not self.image_only:
self.check_or_mkdirs(os.path.join(output_dir, 'labelTxt'))
pbar = tqdm(total=len(rates) * len(infos), desc='slicing data')
if self.num_process <= 1:
for rate in rates:
for info in infos:
self.slice_data_single(info, rate, output_dir)
pbar.update()
else:
pool = Pool(self.num_process)
for rate in rates:
for info in infos:
pool.apply_async(
self.slice_data_single, (info, rate, output_dir),
callback=lambda x: pbar.update())
pool.close()
pool.join()
pbar.close()