更换文档检测模型
This commit is contained in:
163
paddle_detection/configs/rotate/tools/convert.py
Normal file
163
paddle_detection/configs/rotate/tools/convert.py
Normal file
@@ -0,0 +1,163 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# Reference: https://github.com/CAPTAIN-WHU/DOTA_devkit
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import json
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
from multiprocessing import Pool
|
||||
|
||||
|
||||
def load_dota_info(image_dir, anno_dir, file_name, ext=None):
|
||||
base_name, extension = os.path.splitext(file_name)
|
||||
if ext and (extension != ext and extension not in ext):
|
||||
return None
|
||||
info = {'image_file': os.path.join(image_dir, file_name), 'annotation': []}
|
||||
anno_file = os.path.join(anno_dir, base_name + '.txt')
|
||||
if not os.path.exists(anno_file):
|
||||
return info
|
||||
with open(anno_file, 'r') as f:
|
||||
for line in f:
|
||||
items = line.strip().split()
|
||||
if (len(items) < 9):
|
||||
continue
|
||||
|
||||
anno = {
|
||||
'poly': list(map(float, items[:8])),
|
||||
'name': items[8],
|
||||
'difficult': '0' if len(items) == 9 else items[9],
|
||||
}
|
||||
info['annotation'].append(anno)
|
||||
|
||||
return info
|
||||
|
||||
|
||||
def load_dota_infos(root_dir, num_process=8, ext=None):
|
||||
image_dir = os.path.join(root_dir, 'images')
|
||||
anno_dir = os.path.join(root_dir, 'labelTxt')
|
||||
data_infos = []
|
||||
if num_process > 1:
|
||||
pool = Pool(num_process)
|
||||
results = []
|
||||
for file_name in os.listdir(image_dir):
|
||||
results.append(
|
||||
pool.apply_async(load_dota_info, (image_dir, anno_dir,
|
||||
file_name, ext)))
|
||||
|
||||
pool.close()
|
||||
pool.join()
|
||||
|
||||
for result in results:
|
||||
info = result.get()
|
||||
if info:
|
||||
data_infos.append(info)
|
||||
|
||||
else:
|
||||
for file_name in os.listdir(image_dir):
|
||||
info = load_dota_info(image_dir, anno_dir, file_name, ext)
|
||||
if info:
|
||||
data_infos.append(info)
|
||||
|
||||
return data_infos
|
||||
|
||||
|
||||
def process_single_sample(info, image_id, class_names):
|
||||
image_file = info['image_file']
|
||||
single_image = dict()
|
||||
single_image['file_name'] = os.path.split(image_file)[-1]
|
||||
single_image['id'] = image_id
|
||||
image = cv2.imread(image_file)
|
||||
height, width, _ = image.shape
|
||||
single_image['width'] = width
|
||||
single_image['height'] = height
|
||||
|
||||
# process annotation field
|
||||
single_objs = []
|
||||
objects = info['annotation']
|
||||
for obj in objects:
|
||||
poly, name, difficult = obj['poly'], obj['name'], obj['difficult']
|
||||
if difficult == '2':
|
||||
continue
|
||||
|
||||
single_obj = dict()
|
||||
single_obj['category_id'] = class_names.index(name) + 1
|
||||
single_obj['segmentation'] = [poly]
|
||||
single_obj['iscrowd'] = 0
|
||||
xmin, ymin, xmax, ymax = min(poly[0::2]), min(poly[1::2]), max(poly[
|
||||
0::2]), max(poly[1::2])
|
||||
width, height = xmax - xmin, ymax - ymin
|
||||
single_obj['bbox'] = [xmin, ymin, width, height]
|
||||
single_obj['area'] = height * width
|
||||
single_obj['image_id'] = image_id
|
||||
single_objs.append(single_obj)
|
||||
|
||||
return (single_image, single_objs)
|
||||
|
||||
|
||||
def data_to_coco(infos, output_path, class_names, num_process):
|
||||
data_dict = dict()
|
||||
data_dict['categories'] = []
|
||||
|
||||
for i, name in enumerate(class_names):
|
||||
data_dict['categories'].append({
|
||||
'id': i + 1,
|
||||
'name': name,
|
||||
'supercategory': name
|
||||
})
|
||||
|
||||
pbar = tqdm(total=len(infos), desc='data to coco')
|
||||
images, annotations = [], []
|
||||
if num_process > 1:
|
||||
pool = Pool(num_process)
|
||||
results = []
|
||||
for i, info in enumerate(infos):
|
||||
image_id = i + 1
|
||||
results.append(
|
||||
pool.apply_async(
|
||||
process_single_sample, (info, image_id, class_names),
|
||||
callback=lambda x: pbar.update()))
|
||||
|
||||
pool.close()
|
||||
pool.join()
|
||||
|
||||
for result in results:
|
||||
single_image, single_anno = result.get()
|
||||
images.append(single_image)
|
||||
annotations += single_anno
|
||||
|
||||
else:
|
||||
for i, info in enumerate(infos):
|
||||
image_id = i + 1
|
||||
single_image, single_anno = process_single_sample(info, image_id,
|
||||
class_names)
|
||||
images.append(single_image)
|
||||
annotations += single_anno
|
||||
pbar.update()
|
||||
|
||||
pbar.close()
|
||||
|
||||
for i, anno in enumerate(annotations):
|
||||
anno['id'] = i + 1
|
||||
|
||||
data_dict['images'] = images
|
||||
data_dict['annotations'] = annotations
|
||||
|
||||
with open(output_path, 'w') as f:
|
||||
json.dump(data_dict, f)
|
||||
266
paddle_detection/configs/rotate/tools/generate_result.py
Normal file
266
paddle_detection/configs/rotate/tools/generate_result.py
Normal file
@@ -0,0 +1,266 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import re
|
||||
import glob
|
||||
|
||||
import numpy as np
|
||||
from multiprocessing import Pool
|
||||
from functools import partial
|
||||
from shapely.geometry import Polygon
|
||||
import argparse
|
||||
|
||||
wordname_15 = [
|
||||
'plane', 'baseball-diamond', 'bridge', 'ground-track-field',
|
||||
'small-vehicle', 'large-vehicle', 'ship', 'tennis-court',
|
||||
'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout',
|
||||
'harbor', 'swimming-pool', 'helicopter'
|
||||
]
|
||||
|
||||
wordname_16 = wordname_15 + ['container-crane']
|
||||
|
||||
wordname_18 = wordname_16 + ['airport', 'helipad']
|
||||
|
||||
DATA_CLASSES = {
|
||||
'dota10': wordname_15,
|
||||
'dota15': wordname_16,
|
||||
'dota20': wordname_18
|
||||
}
|
||||
|
||||
|
||||
def rbox_iou(g, p):
|
||||
"""
|
||||
iou of rbox
|
||||
"""
|
||||
g = np.array(g)
|
||||
p = np.array(p)
|
||||
g = Polygon(g[:8].reshape((4, 2)))
|
||||
p = Polygon(p[:8].reshape((4, 2)))
|
||||
g = g.buffer(0)
|
||||
p = p.buffer(0)
|
||||
if not g.is_valid or not p.is_valid:
|
||||
return 0
|
||||
inter = Polygon(g).intersection(Polygon(p)).area
|
||||
union = g.area + p.area - inter
|
||||
if union == 0:
|
||||
return 0
|
||||
else:
|
||||
return inter / union
|
||||
|
||||
|
||||
def py_cpu_nms_poly_fast(dets, thresh):
|
||||
"""
|
||||
Args:
|
||||
dets: pred results
|
||||
thresh: nms threshold
|
||||
|
||||
Returns: index of keep
|
||||
"""
|
||||
obbs = dets[:, 0:-1]
|
||||
x1 = np.min(obbs[:, 0::2], axis=1)
|
||||
y1 = np.min(obbs[:, 1::2], axis=1)
|
||||
x2 = np.max(obbs[:, 0::2], axis=1)
|
||||
y2 = np.max(obbs[:, 1::2], axis=1)
|
||||
scores = dets[:, 8]
|
||||
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
||||
|
||||
polys = []
|
||||
for i in range(len(dets)):
|
||||
tm_polygon = [
|
||||
dets[i][0], dets[i][1], dets[i][2], dets[i][3], dets[i][4],
|
||||
dets[i][5], dets[i][6], dets[i][7]
|
||||
]
|
||||
polys.append(tm_polygon)
|
||||
polys = np.array(polys)
|
||||
order = scores.argsort()[::-1]
|
||||
|
||||
keep = []
|
||||
while order.size > 0:
|
||||
ovr = []
|
||||
i = order[0]
|
||||
keep.append(i)
|
||||
|
||||
xx1 = np.maximum(x1[i], x1[order[1:]])
|
||||
yy1 = np.maximum(y1[i], y1[order[1:]])
|
||||
xx2 = np.minimum(x2[i], x2[order[1:]])
|
||||
yy2 = np.minimum(y2[i], y2[order[1:]])
|
||||
w = np.maximum(0.0, xx2 - xx1)
|
||||
h = np.maximum(0.0, yy2 - yy1)
|
||||
hbb_inter = w * h
|
||||
hbb_ovr = hbb_inter / (areas[i] + areas[order[1:]] - hbb_inter)
|
||||
h_inds = np.where(hbb_ovr > 0)[0]
|
||||
tmp_order = order[h_inds + 1]
|
||||
for j in range(tmp_order.size):
|
||||
iou = rbox_iou(polys[i], polys[tmp_order[j]])
|
||||
hbb_ovr[h_inds[j]] = iou
|
||||
|
||||
try:
|
||||
if math.isnan(ovr[0]):
|
||||
pdb.set_trace()
|
||||
except:
|
||||
pass
|
||||
inds = np.where(hbb_ovr <= thresh)[0]
|
||||
|
||||
order = order[inds + 1]
|
||||
return keep
|
||||
|
||||
|
||||
def poly2origpoly(poly, x, y, rate):
|
||||
origpoly = []
|
||||
for i in range(int(len(poly) / 2)):
|
||||
tmp_x = float(poly[i * 2] + x) / float(rate)
|
||||
tmp_y = float(poly[i * 2 + 1] + y) / float(rate)
|
||||
origpoly.append(tmp_x)
|
||||
origpoly.append(tmp_y)
|
||||
return origpoly
|
||||
|
||||
|
||||
def nmsbynamedict(nameboxdict, nms, thresh):
|
||||
"""
|
||||
Args:
|
||||
nameboxdict: nameboxdict
|
||||
nms: nms
|
||||
thresh: nms threshold
|
||||
|
||||
Returns: nms result as dict
|
||||
"""
|
||||
nameboxnmsdict = {x: [] for x in nameboxdict}
|
||||
for imgname in nameboxdict:
|
||||
keep = nms(np.array(nameboxdict[imgname]), thresh)
|
||||
outdets = []
|
||||
for index in keep:
|
||||
outdets.append(nameboxdict[imgname][index])
|
||||
nameboxnmsdict[imgname] = outdets
|
||||
return nameboxnmsdict
|
||||
|
||||
|
||||
def merge_single(output_dir, nms, nms_thresh, pred_class_lst):
|
||||
"""
|
||||
Args:
|
||||
output_dir: output_dir
|
||||
nms: nms
|
||||
pred_class_lst: pred_class_lst
|
||||
class_name: class_name
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
class_name, pred_bbox_list = pred_class_lst
|
||||
nameboxdict = {}
|
||||
for line in pred_bbox_list:
|
||||
splitline = line.split(' ')
|
||||
subname = splitline[0]
|
||||
splitname = subname.split('__')
|
||||
oriname = splitname[0]
|
||||
pattern1 = re.compile(r'__\d+___\d+')
|
||||
x_y = re.findall(pattern1, subname)
|
||||
x_y_2 = re.findall(r'\d+', x_y[0])
|
||||
x, y = int(x_y_2[0]), int(x_y_2[1])
|
||||
|
||||
pattern2 = re.compile(r'__([\d+\.]+)__\d+___')
|
||||
|
||||
rate = re.findall(pattern2, subname)[0]
|
||||
|
||||
confidence = splitline[1]
|
||||
poly = list(map(float, splitline[2:]))
|
||||
origpoly = poly2origpoly(poly, x, y, rate)
|
||||
det = origpoly
|
||||
det.append(confidence)
|
||||
det = list(map(float, det))
|
||||
if (oriname not in nameboxdict):
|
||||
nameboxdict[oriname] = []
|
||||
nameboxdict[oriname].append(det)
|
||||
nameboxnmsdict = nmsbynamedict(nameboxdict, nms, nms_thresh)
|
||||
|
||||
# write result
|
||||
dstname = os.path.join(output_dir, class_name + '.txt')
|
||||
with open(dstname, 'w') as f_out:
|
||||
for imgname in nameboxnmsdict:
|
||||
for det in nameboxnmsdict[imgname]:
|
||||
confidence = det[-1]
|
||||
bbox = det[0:-1]
|
||||
outline = imgname + ' ' + str(confidence) + ' ' + ' '.join(
|
||||
map(str, bbox))
|
||||
f_out.write(outline + '\n')
|
||||
|
||||
|
||||
def generate_result(pred_txt_dir,
|
||||
output_dir='output',
|
||||
class_names=wordname_15,
|
||||
nms_thresh=0.1):
|
||||
"""
|
||||
pred_txt_dir: dir of pred txt
|
||||
output_dir: dir of output
|
||||
class_names: class names of data
|
||||
"""
|
||||
pred_txt_list = glob.glob("{}/*.txt".format(pred_txt_dir))
|
||||
|
||||
# step1: summary pred bbox
|
||||
pred_classes = {}
|
||||
for class_name in class_names:
|
||||
pred_classes[class_name] = []
|
||||
|
||||
for current_txt in pred_txt_list:
|
||||
img_id = os.path.split(current_txt)[1]
|
||||
img_id = img_id.split('.txt')[0]
|
||||
with open(current_txt) as f:
|
||||
res = f.readlines()
|
||||
for item in res:
|
||||
item = item.split(' ')
|
||||
pred_class = item[0]
|
||||
item[0] = img_id
|
||||
pred_bbox = ' '.join(item)
|
||||
pred_classes[pred_class].append(pred_bbox)
|
||||
|
||||
pred_classes_lst = []
|
||||
for class_name in pred_classes.keys():
|
||||
print('class_name: {}, count: {}'.format(class_name,
|
||||
len(pred_classes[class_name])))
|
||||
pred_classes_lst.append((class_name, pred_classes[class_name]))
|
||||
|
||||
# step2: merge
|
||||
pool = Pool(len(class_names))
|
||||
nms = py_cpu_nms_poly_fast
|
||||
mergesingle_fn = partial(merge_single, output_dir, nms, nms_thresh)
|
||||
pool.map(mergesingle_fn, pred_classes_lst)
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='generate test results')
|
||||
parser.add_argument('--pred_txt_dir', type=str, help='path of pred txt dir')
|
||||
parser.add_argument(
|
||||
'--output_dir', type=str, default='output', help='path of output dir')
|
||||
parser.add_argument(
|
||||
'--data_type', type=str, default='dota10', help='data type')
|
||||
parser.add_argument(
|
||||
'--nms_thresh',
|
||||
type=float,
|
||||
default=0.1,
|
||||
help='nms threshold while merging results')
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parse_args()
|
||||
|
||||
output_dir = args.output_dir
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
|
||||
class_names = DATA_CLASSES[args.data_type]
|
||||
|
||||
generate_result(args.pred_txt_dir, output_dir, class_names)
|
||||
print('done!')
|
||||
378
paddle_detection/configs/rotate/tools/inference_benchmark.py
Normal file
378
paddle_detection/configs/rotate/tools/inference_benchmark.py
Normal file
@@ -0,0 +1,378 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
import six
|
||||
import glob
|
||||
import time
|
||||
import yaml
|
||||
import argparse
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
import paddle
|
||||
import paddle.version as paddle_version
|
||||
from paddle.inference import Config, create_predictor, PrecisionType, get_trt_runtime_version
|
||||
|
||||
TUNED_TRT_DYNAMIC_MODELS = {'DETR'}
|
||||
|
||||
|
||||
def check_version(version='2.2'):
|
||||
err = "PaddlePaddle version {} or higher is required, " \
|
||||
"or a suitable develop version is satisfied as well. \n" \
|
||||
"Please make sure the version is good with your code.".format(version)
|
||||
|
||||
version_installed = [
|
||||
paddle_version.major, paddle_version.minor, paddle_version.patch,
|
||||
paddle_version.rc
|
||||
]
|
||||
|
||||
if version_installed == ['0', '0', '0', '0']:
|
||||
return
|
||||
|
||||
if version == 'develop':
|
||||
raise Exception("PaddlePaddle develop version is required!")
|
||||
|
||||
version_split = version.split('.')
|
||||
|
||||
length = min(len(version_installed), len(version_split))
|
||||
for i in six.moves.range(length):
|
||||
if version_installed[i] > version_split[i]:
|
||||
return
|
||||
if version_installed[i] < version_split[i]:
|
||||
raise Exception(err)
|
||||
|
||||
|
||||
def check_trt_version(version='8.2'):
|
||||
err = "TensorRT version {} or higher is required," \
|
||||
"Please make sure the version is good with your code.".format(version)
|
||||
version_split = list(map(int, version.split('.')))
|
||||
version_installed = get_trt_runtime_version()
|
||||
length = min(len(version_installed), len(version_split))
|
||||
for i in six.moves.range(length):
|
||||
if version_installed[i] > version_split[i]:
|
||||
return
|
||||
if version_installed[i] < version_split[i]:
|
||||
raise Exception(err)
|
||||
|
||||
|
||||
# preprocess ops
|
||||
def decode_image(im_file, im_info):
|
||||
if isinstance(im_file, str):
|
||||
with open(im_file, 'rb') as f:
|
||||
im_read = f.read()
|
||||
data = np.frombuffer(im_read, dtype='uint8')
|
||||
im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
|
||||
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
|
||||
else:
|
||||
im = im_file
|
||||
im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
|
||||
im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32)
|
||||
return im, im_info
|
||||
|
||||
|
||||
class Resize(object):
|
||||
def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
|
||||
if isinstance(target_size, int):
|
||||
target_size = [target_size, target_size]
|
||||
self.target_size = target_size
|
||||
self.keep_ratio = keep_ratio
|
||||
self.interp = interp
|
||||
|
||||
def __call__(self, im, im_info):
|
||||
assert len(self.target_size) == 2
|
||||
assert self.target_size[0] > 0 and self.target_size[1] > 0
|
||||
im_channel = im.shape[2]
|
||||
im_scale_y, im_scale_x = self.generate_scale(im)
|
||||
im = cv2.resize(
|
||||
im,
|
||||
None,
|
||||
None,
|
||||
fx=im_scale_x,
|
||||
fy=im_scale_y,
|
||||
interpolation=self.interp)
|
||||
im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
|
||||
im_info['scale_factor'] = np.array(
|
||||
[im_scale_y, im_scale_x]).astype('float32')
|
||||
return im, im_info
|
||||
|
||||
def generate_scale(self, im):
|
||||
origin_shape = im.shape[:2]
|
||||
im_c = im.shape[2]
|
||||
if self.keep_ratio:
|
||||
im_size_min = np.min(origin_shape)
|
||||
im_size_max = np.max(origin_shape)
|
||||
target_size_min = np.min(self.target_size)
|
||||
target_size_max = np.max(self.target_size)
|
||||
im_scale = float(target_size_min) / float(im_size_min)
|
||||
if np.round(im_scale * im_size_max) > target_size_max:
|
||||
im_scale = float(target_size_max) / float(im_size_max)
|
||||
im_scale_x = im_scale
|
||||
im_scale_y = im_scale
|
||||
else:
|
||||
resize_h, resize_w = self.target_size
|
||||
im_scale_y = resize_h / float(origin_shape[0])
|
||||
im_scale_x = resize_w / float(origin_shape[1])
|
||||
return im_scale_y, im_scale_x
|
||||
|
||||
|
||||
class Permute(object):
|
||||
def __init__(self, ):
|
||||
super(Permute, self).__init__()
|
||||
|
||||
def __call__(self, im, im_info):
|
||||
im = im.transpose((2, 0, 1))
|
||||
return im, im_info
|
||||
|
||||
|
||||
class NormalizeImage(object):
|
||||
def __init__(self, mean, std, is_scale=True, norm_type='mean_std'):
|
||||
self.mean = mean
|
||||
self.std = std
|
||||
self.is_scale = is_scale
|
||||
self.norm_type = norm_type
|
||||
|
||||
def __call__(self, im, im_info):
|
||||
im = im.astype(np.float32, copy=False)
|
||||
if self.is_scale:
|
||||
scale = 1.0 / 255.0
|
||||
im *= scale
|
||||
|
||||
if self.norm_type == 'mean_std':
|
||||
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
|
||||
std = np.array(self.std)[np.newaxis, np.newaxis, :]
|
||||
im -= mean
|
||||
im /= std
|
||||
return im, im_info
|
||||
|
||||
|
||||
class PadStride(object):
|
||||
def __init__(self, stride=0):
|
||||
self.coarsest_stride = stride
|
||||
|
||||
def __call__(self, im, im_info):
|
||||
coarsest_stride = self.coarsest_stride
|
||||
if coarsest_stride <= 0:
|
||||
return im, im_info
|
||||
im_c, im_h, im_w = im.shape
|
||||
pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
|
||||
pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
|
||||
padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
|
||||
padding_im[:, :im_h, :im_w] = im
|
||||
return padding_im, im_info
|
||||
|
||||
|
||||
def preprocess(im, preprocess_ops):
|
||||
# process image by preprocess_ops
|
||||
im_info = {
|
||||
'scale_factor': np.array(
|
||||
[1., 1.], dtype=np.float32),
|
||||
'im_shape': None,
|
||||
}
|
||||
im, im_info = decode_image(im, im_info)
|
||||
for operator in preprocess_ops:
|
||||
im, im_info = operator(im, im_info)
|
||||
return im, im_info
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
'--model_dir', type=str, help='directory of inference model')
|
||||
parser.add_argument(
|
||||
'--run_mode', type=str, default='paddle', help='running mode')
|
||||
parser.add_argument('--batch_size', type=int, default=1, help='batch size')
|
||||
parser.add_argument(
|
||||
'--image_dir',
|
||||
type=str,
|
||||
default='/paddle/data/DOTA_1024_ss/test1024/images',
|
||||
help='directory of test images')
|
||||
parser.add_argument(
|
||||
'--warmup_iter', type=int, default=5, help='num of warmup iters')
|
||||
parser.add_argument(
|
||||
'--total_iter', type=int, default=2000, help='num of total iters')
|
||||
parser.add_argument(
|
||||
'--log_iter', type=int, default=50, help='num of log interval')
|
||||
parser.add_argument(
|
||||
'--tuned_trt_shape_file',
|
||||
type=str,
|
||||
default='shape_range_info.pbtxt',
|
||||
help='dynamic shape range info')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def init_predictor(FLAGS):
|
||||
model_dir, run_mode, batch_size = FLAGS.model_dir, FLAGS.run_mode, FLAGS.batch_size
|
||||
yaml_file = os.path.join(model_dir, 'infer_cfg.yml')
|
||||
with open(yaml_file) as f:
|
||||
yml_conf = yaml.safe_load(f)
|
||||
|
||||
config = Config(
|
||||
os.path.join(model_dir, 'model.pdmodel'),
|
||||
os.path.join(model_dir, 'model.pdiparams'))
|
||||
|
||||
# initial GPU memory(M), device ID
|
||||
config.enable_use_gpu(200, 0)
|
||||
# optimize graph and fuse op
|
||||
config.switch_ir_optim(True)
|
||||
|
||||
precision_map = {
|
||||
'trt_int8': Config.Precision.Int8,
|
||||
'trt_fp32': Config.Precision.Float32,
|
||||
'trt_fp16': Config.Precision.Half
|
||||
}
|
||||
|
||||
arch = yml_conf['arch']
|
||||
tuned_trt_shape_file = os.path.join(model_dir, FLAGS.tuned_trt_shape_file)
|
||||
|
||||
if run_mode in precision_map.keys():
|
||||
if arch in TUNED_TRT_DYNAMIC_MODELS and not os.path.exists(
|
||||
tuned_trt_shape_file):
|
||||
print(
|
||||
'dynamic shape range info is saved in {}. After that, rerun the code'.
|
||||
format(tuned_trt_shape_file))
|
||||
config.collect_shape_range_info(tuned_trt_shape_file)
|
||||
config.enable_tensorrt_engine(
|
||||
workspace_size=(1 << 25) * batch_size,
|
||||
max_batch_size=batch_size,
|
||||
min_subgraph_size=yml_conf['min_subgraph_size'],
|
||||
precision_mode=precision_map[run_mode],
|
||||
use_static=True,
|
||||
use_calib_mode=False)
|
||||
|
||||
if yml_conf['use_dynamic_shape']:
|
||||
if arch in TUNED_TRT_DYNAMIC_MODELS and os.path.exists(
|
||||
tuned_trt_shape_file):
|
||||
config.enable_tuned_tensorrt_dynamic_shape(tuned_trt_shape_file,
|
||||
True)
|
||||
else:
|
||||
min_input_shape = {
|
||||
'image': [batch_size, 3, 640, 640],
|
||||
'scale_factor': [batch_size, 2]
|
||||
}
|
||||
max_input_shape = {
|
||||
'image': [batch_size, 3, 1280, 1280],
|
||||
'scale_factor': [batch_size, 2]
|
||||
}
|
||||
opt_input_shape = {
|
||||
'image': [batch_size, 3, 1024, 1024],
|
||||
'scale_factor': [batch_size, 2]
|
||||
}
|
||||
config.set_trt_dynamic_shape_info(
|
||||
min_input_shape, max_input_shape, opt_input_shape)
|
||||
|
||||
# disable print log when predict
|
||||
config.disable_glog_info()
|
||||
# enable shared memory
|
||||
config.enable_memory_optim()
|
||||
# disable feed, fetch OP, needed by zero_copy_run
|
||||
config.switch_use_feed_fetch_ops(False)
|
||||
predictor = create_predictor(config)
|
||||
return predictor, yml_conf
|
||||
|
||||
|
||||
def create_preprocess_ops(yml_conf):
|
||||
preprocess_ops = []
|
||||
for op_info in yml_conf['Preprocess']:
|
||||
new_op_info = op_info.copy()
|
||||
op_type = new_op_info.pop('type')
|
||||
preprocess_ops.append(eval(op_type)(**new_op_info))
|
||||
return preprocess_ops
|
||||
|
||||
|
||||
def get_test_images(image_dir):
|
||||
images = set()
|
||||
infer_dir = os.path.abspath(image_dir)
|
||||
exts = ['jpg', 'jpeg', 'png', 'bmp']
|
||||
exts += [ext.upper() for ext in exts]
|
||||
for ext in exts:
|
||||
images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
|
||||
images = list(images)
|
||||
return images
|
||||
|
||||
|
||||
def create_inputs(image_files, preprocess_ops):
|
||||
inputs = dict()
|
||||
im_list, im_info_list = [], []
|
||||
for im_path in image_files:
|
||||
im, im_info = preprocess(im_path, preprocess_ops)
|
||||
im_list.append(im)
|
||||
im_info_list.append(im_info)
|
||||
|
||||
inputs['im_shape'] = np.stack(
|
||||
[e['im_shape'] for e in im_info_list], axis=0).astype('float32')
|
||||
inputs['scale_factor'] = np.stack(
|
||||
[e['scale_factor'] for e in im_info_list], axis=0).astype('float32')
|
||||
inputs['image'] = np.stack(im_list, axis=0).astype('float32')
|
||||
return inputs
|
||||
|
||||
|
||||
def measure_speed(FLAGS):
|
||||
predictor, yml_conf = init_predictor(FLAGS)
|
||||
input_names = predictor.get_input_names()
|
||||
preprocess_ops = create_preprocess_ops(yml_conf)
|
||||
|
||||
image_files = get_test_images(FLAGS.image_dir)
|
||||
|
||||
batch_size = FLAGS.batch_size
|
||||
warmup_iter, log_iter, total_iter = FLAGS.warmup_iter, FLAGS.log_iter, FLAGS.total_iter
|
||||
|
||||
total_time = 0
|
||||
fps = 0
|
||||
for i in range(0, total_iter, batch_size):
|
||||
# make data ready
|
||||
inputs = create_inputs(image_files[i:i + batch_size], preprocess_ops)
|
||||
for name in input_names:
|
||||
input_tensor = predictor.get_input_handle(name)
|
||||
input_tensor.copy_from_cpu(inputs[name])
|
||||
|
||||
paddle.device.cuda.synchronize()
|
||||
# start running
|
||||
start_time = time.perf_counter()
|
||||
predictor.run()
|
||||
paddle.device.cuda.synchronize()
|
||||
|
||||
if i >= warmup_iter:
|
||||
total_time += time.perf_counter() - start_time
|
||||
if (i + 1) % log_iter == 0:
|
||||
fps = (i + 1 - warmup_iter) / total_time
|
||||
print(
|
||||
f'Done image [{i + 1:<3}/ {total_iter}], '
|
||||
f'fps: {fps:.1f} img / s, '
|
||||
f'times per image: {1000 / fps:.1f} ms / img',
|
||||
flush=True)
|
||||
|
||||
if (i + 1) == total_iter:
|
||||
fps = (i + 1 - warmup_iter) / total_time
|
||||
print(
|
||||
f'Overall fps: {fps:.1f} img / s, '
|
||||
f'times per image: {1000 / fps:.1f} ms / img',
|
||||
flush=True)
|
||||
break
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
FLAGS = parse_args()
|
||||
if 'trt' in FLAGS.run_mode:
|
||||
check_version('develop')
|
||||
check_trt_version('8.2')
|
||||
else:
|
||||
check_version('2.4')
|
||||
measure_speed(FLAGS)
|
||||
302
paddle_detection/configs/rotate/tools/onnx_infer.py
Normal file
302
paddle_detection/configs/rotate/tools/onnx_infer.py
Normal file
@@ -0,0 +1,302 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
import six
|
||||
import glob
|
||||
import copy
|
||||
import yaml
|
||||
import argparse
|
||||
import cv2
|
||||
import numpy as np
|
||||
from shapely.geometry import Polygon
|
||||
from onnxruntime import InferenceSession
|
||||
|
||||
|
||||
# preprocess ops
|
||||
def decode_image(img_path):
|
||||
with open(img_path, 'rb') as f:
|
||||
im_read = f.read()
|
||||
data = np.frombuffer(im_read, dtype='uint8')
|
||||
im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
|
||||
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
|
||||
img_info = {
|
||||
"im_shape": np.array(
|
||||
im.shape[:2], dtype=np.float32),
|
||||
"scale_factor": np.array(
|
||||
[1., 1.], dtype=np.float32)
|
||||
}
|
||||
return im, img_info
|
||||
|
||||
|
||||
class Resize(object):
|
||||
def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
|
||||
if isinstance(target_size, int):
|
||||
target_size = [target_size, target_size]
|
||||
self.target_size = target_size
|
||||
self.keep_ratio = keep_ratio
|
||||
self.interp = interp
|
||||
|
||||
def __call__(self, im, im_info):
|
||||
assert len(self.target_size) == 2
|
||||
assert self.target_size[0] > 0 and self.target_size[1] > 0
|
||||
im_channel = im.shape[2]
|
||||
im_scale_y, im_scale_x = self.generate_scale(im)
|
||||
im = cv2.resize(
|
||||
im,
|
||||
None,
|
||||
None,
|
||||
fx=im_scale_x,
|
||||
fy=im_scale_y,
|
||||
interpolation=self.interp)
|
||||
im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
|
||||
im_info['scale_factor'] = np.array(
|
||||
[im_scale_y, im_scale_x]).astype('float32')
|
||||
return im, im_info
|
||||
|
||||
def generate_scale(self, im):
|
||||
origin_shape = im.shape[:2]
|
||||
im_c = im.shape[2]
|
||||
if self.keep_ratio:
|
||||
im_size_min = np.min(origin_shape)
|
||||
im_size_max = np.max(origin_shape)
|
||||
target_size_min = np.min(self.target_size)
|
||||
target_size_max = np.max(self.target_size)
|
||||
im_scale = float(target_size_min) / float(im_size_min)
|
||||
if np.round(im_scale * im_size_max) > target_size_max:
|
||||
im_scale = float(target_size_max) / float(im_size_max)
|
||||
im_scale_x = im_scale
|
||||
im_scale_y = im_scale
|
||||
else:
|
||||
resize_h, resize_w = self.target_size
|
||||
im_scale_y = resize_h / float(origin_shape[0])
|
||||
im_scale_x = resize_w / float(origin_shape[1])
|
||||
return im_scale_y, im_scale_x
|
||||
|
||||
|
||||
class Permute(object):
|
||||
def __init__(self, ):
|
||||
super(Permute, self).__init__()
|
||||
|
||||
def __call__(self, im, im_info):
|
||||
im = im.transpose((2, 0, 1))
|
||||
return im, im_info
|
||||
|
||||
|
||||
class NormalizeImage(object):
|
||||
def __init__(self, mean, std, is_scale=True, norm_type='mean_std'):
|
||||
self.mean = mean
|
||||
self.std = std
|
||||
self.is_scale = is_scale
|
||||
self.norm_type = norm_type
|
||||
|
||||
def __call__(self, im, im_info):
|
||||
im = im.astype(np.float32, copy=False)
|
||||
if self.is_scale:
|
||||
scale = 1.0 / 255.0
|
||||
im *= scale
|
||||
|
||||
if self.norm_type == 'mean_std':
|
||||
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
|
||||
std = np.array(self.std)[np.newaxis, np.newaxis, :]
|
||||
im -= mean
|
||||
im /= std
|
||||
return im, im_info
|
||||
|
||||
|
||||
class PadStride(object):
|
||||
def __init__(self, stride=0):
|
||||
self.coarsest_stride = stride
|
||||
|
||||
def __call__(self, im, im_info):
|
||||
coarsest_stride = self.coarsest_stride
|
||||
if coarsest_stride <= 0:
|
||||
return im, im_info
|
||||
im_c, im_h, im_w = im.shape
|
||||
pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
|
||||
pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
|
||||
padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
|
||||
padding_im[:, :im_h, :im_w] = im
|
||||
return padding_im, im_info
|
||||
|
||||
|
||||
class Compose:
|
||||
def __init__(self, transforms):
|
||||
self.transforms = []
|
||||
for op_info in transforms:
|
||||
new_op_info = op_info.copy()
|
||||
op_type = new_op_info.pop('type')
|
||||
self.transforms.append(eval(op_type)(**new_op_info))
|
||||
|
||||
def __call__(self, img_path):
|
||||
img, im_info = decode_image(img_path)
|
||||
for t in self.transforms:
|
||||
img, im_info = t(img, im_info)
|
||||
inputs = copy.deepcopy(im_info)
|
||||
inputs['image'] = img
|
||||
return inputs
|
||||
|
||||
|
||||
# postprocess
|
||||
def rbox_iou(g, p):
|
||||
g = np.array(g)
|
||||
p = np.array(p)
|
||||
g = Polygon(g[:8].reshape((4, 2)))
|
||||
p = Polygon(p[:8].reshape((4, 2)))
|
||||
g = g.buffer(0)
|
||||
p = p.buffer(0)
|
||||
if not g.is_valid or not p.is_valid:
|
||||
return 0
|
||||
inter = Polygon(g).intersection(Polygon(p)).area
|
||||
union = g.area + p.area - inter
|
||||
if union == 0:
|
||||
return 0
|
||||
else:
|
||||
return inter / union
|
||||
|
||||
|
||||
def multiclass_nms_rotated(pred_bboxes,
|
||||
pred_scores,
|
||||
iou_threshlod=0.1,
|
||||
score_threshold=0.1):
|
||||
"""
|
||||
Args:
|
||||
pred_bboxes (numpy.ndarray): [B, N, 8]
|
||||
pred_scores (numpy.ndarray): [B, C, N]
|
||||
|
||||
Return:
|
||||
bboxes (numpy.ndarray): [N, 10]
|
||||
bbox_num (numpy.ndarray): [B]
|
||||
"""
|
||||
bbox_num = []
|
||||
bboxes = []
|
||||
for bbox_per_img, score_per_img in zip(pred_bboxes, pred_scores):
|
||||
num_per_img = 0
|
||||
for cls_id, score_per_cls in enumerate(score_per_img):
|
||||
keep_mask = score_per_cls > score_threshold
|
||||
bbox = bbox_per_img[keep_mask]
|
||||
score = score_per_cls[keep_mask]
|
||||
|
||||
idx = score.argsort()[::-1]
|
||||
bbox = bbox[idx]
|
||||
score = score[idx]
|
||||
keep_idx = []
|
||||
for i, b in enumerate(bbox):
|
||||
supressed = False
|
||||
for gi in keep_idx:
|
||||
g = bbox[gi]
|
||||
if rbox_iou(b, g) > iou_threshlod:
|
||||
supressed = True
|
||||
break
|
||||
|
||||
if supressed:
|
||||
continue
|
||||
|
||||
keep_idx.append(i)
|
||||
|
||||
keep_box = bbox[keep_idx]
|
||||
keep_score = score[keep_idx]
|
||||
keep_cls_ids = np.ones(len(keep_idx)) * cls_id
|
||||
bboxes.append(
|
||||
np.concatenate(
|
||||
[keep_cls_ids[:, None], keep_score[:, None], keep_box],
|
||||
axis=-1))
|
||||
num_per_img += len(keep_idx)
|
||||
|
||||
bbox_num.append(num_per_img)
|
||||
|
||||
return np.concatenate(bboxes, axis=0), np.array(bbox_num)
|
||||
|
||||
|
||||
def get_test_images(infer_dir, infer_img):
|
||||
"""
|
||||
Get image path list in TEST mode
|
||||
"""
|
||||
assert infer_img is not None or infer_dir is not None, \
|
||||
"--image_file or --image_dir should be set"
|
||||
assert infer_img is None or os.path.isfile(infer_img), \
|
||||
"{} is not a file".format(infer_img)
|
||||
assert infer_dir is None or os.path.isdir(infer_dir), \
|
||||
"{} is not a directory".format(infer_dir)
|
||||
|
||||
# infer_img has a higher priority
|
||||
if infer_img and os.path.isfile(infer_img):
|
||||
return [infer_img]
|
||||
|
||||
images = set()
|
||||
infer_dir = os.path.abspath(infer_dir)
|
||||
assert os.path.isdir(infer_dir), \
|
||||
"infer_dir {} is not a directory".format(infer_dir)
|
||||
exts = ['jpg', 'jpeg', 'png', 'bmp']
|
||||
exts += [ext.upper() for ext in exts]
|
||||
for ext in exts:
|
||||
images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
|
||||
images = list(images)
|
||||
|
||||
assert len(images) > 0, "no image found in {}".format(infer_dir)
|
||||
print("Found {} inference images in total.".format(len(images)))
|
||||
|
||||
return images
|
||||
|
||||
|
||||
def predict_image(infer_config, predictor, img_list):
|
||||
# load preprocess transforms
|
||||
transforms = Compose(infer_config['Preprocess'])
|
||||
# predict image
|
||||
for img_path in img_list:
|
||||
inputs = transforms(img_path)
|
||||
inputs_name = [var.name for var in predictor.get_inputs()]
|
||||
inputs = {k: inputs[k][None, ] for k in inputs_name}
|
||||
|
||||
outputs = predictor.run(output_names=None, input_feed=inputs)
|
||||
|
||||
bboxes, bbox_num = multiclass_nms_rotated(
|
||||
np.array(outputs[0]), np.array(outputs[1]))
|
||||
print("ONNXRuntime predict: ")
|
||||
for bbox in bboxes:
|
||||
if bbox[0] > -1 and bbox[1] > infer_config['draw_threshold']:
|
||||
print(f"{int(bbox[0])} {bbox[1]} "
|
||||
f"{bbox[2]} {bbox[3]} {bbox[4]} {bbox[5]}"
|
||||
f"{bbox[6]} {bbox[7]} {bbox[8]} {bbox[9]}")
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--infer_cfg", type=str, help="infer_cfg.yml")
|
||||
parser.add_argument(
|
||||
'--onnx_file',
|
||||
type=str,
|
||||
default="model.onnx",
|
||||
help="onnx model file path")
|
||||
parser.add_argument("--image_dir", type=str)
|
||||
parser.add_argument("--image_file", type=str)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
FLAGS = parse_args()
|
||||
# load image list
|
||||
img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
|
||||
# load predictor
|
||||
predictor = InferenceSession(FLAGS.onnx_file)
|
||||
# load infer config
|
||||
with open(FLAGS.infer_cfg) as f:
|
||||
infer_config = yaml.safe_load(f)
|
||||
|
||||
predict_image(infer_config, predictor, img_list)
|
||||
128
paddle_detection/configs/rotate/tools/prepare_data.py
Normal file
128
paddle_detection/configs/rotate/tools/prepare_data.py
Normal file
@@ -0,0 +1,128 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import argparse
|
||||
from convert import load_dota_infos, data_to_coco
|
||||
from slicebase import SliceBase
|
||||
|
||||
wordname_15 = [
|
||||
'plane', 'baseball-diamond', 'bridge', 'ground-track-field',
|
||||
'small-vehicle', 'large-vehicle', 'ship', 'tennis-court',
|
||||
'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout',
|
||||
'harbor', 'swimming-pool', 'helicopter'
|
||||
]
|
||||
|
||||
wordname_16 = wordname_15 + ['container-crane']
|
||||
|
||||
wordname_18 = wordname_16 + ['airport', 'helipad']
|
||||
|
||||
DATA_CLASSES = {
|
||||
'dota10': wordname_15,
|
||||
'dota15': wordname_16,
|
||||
'dota20': wordname_18
|
||||
}
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser('prepare data for training')
|
||||
|
||||
parser.add_argument(
|
||||
'--input_dirs',
|
||||
nargs='+',
|
||||
type=str,
|
||||
default=None,
|
||||
help='input dirs which contain image and labelTxt dir')
|
||||
|
||||
parser.add_argument(
|
||||
'--output_dir',
|
||||
type=str,
|
||||
default=None,
|
||||
help='output dirs which contain image and labelTxt dir and coco style json file'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--coco_json_file',
|
||||
type=str,
|
||||
default='',
|
||||
help='coco json annotation files')
|
||||
|
||||
parser.add_argument('--subsize', type=int, default=1024, help='patch size')
|
||||
|
||||
parser.add_argument('--gap', type=int, default=200, help='step size')
|
||||
|
||||
parser.add_argument(
|
||||
'--data_type', type=str, default='dota10', help='data type')
|
||||
|
||||
parser.add_argument(
|
||||
'--rates',
|
||||
nargs='+',
|
||||
type=float,
|
||||
default=[1.],
|
||||
help='scales for multi-slice training')
|
||||
|
||||
parser.add_argument(
|
||||
'--nproc', type=int, default=8, help='the processor number')
|
||||
|
||||
parser.add_argument(
|
||||
'--iof_thr',
|
||||
type=float,
|
||||
default=0.5,
|
||||
help='the minimal iof between a object and a window')
|
||||
|
||||
parser.add_argument(
|
||||
'--image_only',
|
||||
action='store_true',
|
||||
default=False,
|
||||
help='only processing image')
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def load_dataset(input_dir, nproc, data_type):
|
||||
if 'dota' in data_type.lower():
|
||||
infos = load_dota_infos(input_dir, nproc)
|
||||
else:
|
||||
raise ValueError('only dota dataset is supported now')
|
||||
|
||||
return infos
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
infos = []
|
||||
for input_dir in args.input_dirs:
|
||||
infos += load_dataset(input_dir, args.nproc, args.data_type)
|
||||
|
||||
slicer = SliceBase(
|
||||
args.gap,
|
||||
args.subsize,
|
||||
args.iof_thr,
|
||||
num_process=args.nproc,
|
||||
image_only=args.image_only)
|
||||
slicer.slice_data(infos, args.rates, args.output_dir)
|
||||
if args.coco_json_file:
|
||||
infos = load_dota_infos(args.output_dir, args.nproc)
|
||||
coco_json_file = os.path.join(args.output_dir, args.coco_json_file)
|
||||
class_names = DATA_CLASSES[args.data_type]
|
||||
data_to_coco(infos, coco_json_file, class_names, args.nproc)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
267
paddle_detection/configs/rotate/tools/slicebase.py
Normal file
267
paddle_detection/configs/rotate/tools/slicebase.py
Normal file
@@ -0,0 +1,267 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# Reference: https://github.com/CAPTAIN-WHU/DOTA_devkit
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import math
|
||||
import copy
|
||||
from numbers import Number
|
||||
from multiprocessing import Pool
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
import shapely.geometry as shgeo
|
||||
|
||||
|
||||
def choose_best_pointorder_fit_another(poly1, poly2):
|
||||
"""
|
||||
To make the two polygons best fit with each point
|
||||
"""
|
||||
x1, y1, x2, y2, x3, y3, x4, y4 = poly1
|
||||
combinate = [
|
||||
np.array([x1, y1, x2, y2, x3, y3, x4, y4]),
|
||||
np.array([x2, y2, x3, y3, x4, y4, x1, y1]),
|
||||
np.array([x3, y3, x4, y4, x1, y1, x2, y2]),
|
||||
np.array([x4, y4, x1, y1, x2, y2, x3, y3])
|
||||
]
|
||||
dst_coordinate = np.array(poly2)
|
||||
distances = np.array(
|
||||
[np.sum((coord - dst_coordinate)**2) for coord in combinate])
|
||||
sorted = distances.argsort()
|
||||
return combinate[sorted[0]]
|
||||
|
||||
|
||||
def cal_line_length(point1, point2):
|
||||
return math.sqrt(
|
||||
math.pow(point1[0] - point2[0], 2) + math.pow(point1[1] - point2[1], 2))
|
||||
|
||||
|
||||
class SliceBase(object):
|
||||
def __init__(self,
|
||||
gap=512,
|
||||
subsize=1024,
|
||||
thresh=0.7,
|
||||
choosebestpoint=True,
|
||||
ext='.png',
|
||||
padding=True,
|
||||
num_process=8,
|
||||
image_only=False):
|
||||
self.gap = gap
|
||||
self.subsize = subsize
|
||||
self.slide = subsize - gap
|
||||
self.thresh = thresh
|
||||
self.choosebestpoint = choosebestpoint
|
||||
self.ext = ext
|
||||
self.padding = padding
|
||||
self.num_process = num_process
|
||||
self.image_only = image_only
|
||||
|
||||
def get_windows(self, height, width):
|
||||
windows = []
|
||||
left, up = 0, 0
|
||||
while (left < width):
|
||||
if (left + self.subsize >= width):
|
||||
left = max(width - self.subsize, 0)
|
||||
up = 0
|
||||
while (up < height):
|
||||
if (up + self.subsize >= height):
|
||||
up = max(height - self.subsize, 0)
|
||||
right = min(left + self.subsize, width - 1)
|
||||
down = min(up + self.subsize, height - 1)
|
||||
windows.append((left, up, right, down))
|
||||
if (up + self.subsize >= height):
|
||||
break
|
||||
else:
|
||||
up = up + self.slide
|
||||
if (left + self.subsize >= width):
|
||||
break
|
||||
else:
|
||||
left = left + self.slide
|
||||
|
||||
return windows
|
||||
|
||||
def slice_image_single(self, image, windows, output_dir, output_name):
|
||||
image_dir = os.path.join(output_dir, 'images')
|
||||
for (left, up, right, down) in windows:
|
||||
image_name = output_name + str(left) + '___' + str(up) + self.ext
|
||||
subimg = copy.deepcopy(image[up:up + self.subsize, left:left +
|
||||
self.subsize])
|
||||
h, w, c = subimg.shape
|
||||
if (self.padding):
|
||||
outimg = np.zeros((self.subsize, self.subsize, 3))
|
||||
outimg[0:h, 0:w, :] = subimg
|
||||
cv2.imwrite(os.path.join(image_dir, image_name), outimg)
|
||||
else:
|
||||
cv2.imwrite(os.path.join(image_dir, image_name), subimg)
|
||||
|
||||
def iof(self, poly1, poly2):
|
||||
inter_poly = poly1.intersection(poly2)
|
||||
inter_area = inter_poly.area
|
||||
poly1_area = poly1.area
|
||||
half_iou = inter_area / poly1_area
|
||||
return inter_poly, half_iou
|
||||
|
||||
def translate(self, poly, left, up):
|
||||
n = len(poly)
|
||||
out_poly = np.zeros(n)
|
||||
for i in range(n // 2):
|
||||
out_poly[i * 2] = int(poly[i * 2] - left)
|
||||
out_poly[i * 2 + 1] = int(poly[i * 2 + 1] - up)
|
||||
return out_poly
|
||||
|
||||
def get_poly4_from_poly5(self, poly):
|
||||
distances = [
|
||||
cal_line_length((poly[i * 2], poly[i * 2 + 1]),
|
||||
(poly[(i + 1) * 2], poly[(i + 1) * 2 + 1]))
|
||||
for i in range(int(len(poly) / 2 - 1))
|
||||
]
|
||||
distances.append(
|
||||
cal_line_length((poly[0], poly[1]), (poly[8], poly[9])))
|
||||
pos = np.array(distances).argsort()[0]
|
||||
count = 0
|
||||
out_poly = []
|
||||
while count < 5:
|
||||
if (count == pos):
|
||||
out_poly.append(
|
||||
(poly[count * 2] + poly[(count * 2 + 2) % 10]) / 2)
|
||||
out_poly.append(
|
||||
(poly[(count * 2 + 1) % 10] + poly[(count * 2 + 3) % 10]) /
|
||||
2)
|
||||
count = count + 1
|
||||
elif (count == (pos + 1) % 5):
|
||||
count = count + 1
|
||||
continue
|
||||
|
||||
else:
|
||||
out_poly.append(poly[count * 2])
|
||||
out_poly.append(poly[count * 2 + 1])
|
||||
count = count + 1
|
||||
return out_poly
|
||||
|
||||
def slice_anno_single(self, annos, windows, output_dir, output_name):
|
||||
anno_dir = os.path.join(output_dir, 'labelTxt')
|
||||
for (left, up, right, down) in windows:
|
||||
image_poly = shgeo.Polygon(
|
||||
[(left, up), (right, up), (right, down), (left, down)])
|
||||
anno_file = output_name + str(left) + '___' + str(up) + '.txt'
|
||||
with open(os.path.join(anno_dir, anno_file), 'w') as f:
|
||||
for anno in annos:
|
||||
gt_poly = shgeo.Polygon(
|
||||
[(anno['poly'][0], anno['poly'][1]),
|
||||
(anno['poly'][2], anno['poly'][3]),
|
||||
(anno['poly'][4], anno['poly'][5]),
|
||||
(anno['poly'][6], anno['poly'][7])])
|
||||
if gt_poly.area <= 0:
|
||||
continue
|
||||
inter_poly, iof = self.iof(gt_poly, image_poly)
|
||||
if iof == 1:
|
||||
final_poly = self.translate(anno['poly'], left, up)
|
||||
elif iof > 0:
|
||||
inter_poly = shgeo.polygon.orient(inter_poly, sign=1)
|
||||
out_poly = list(inter_poly.exterior.coords)[0:-1]
|
||||
if len(out_poly) < 4 or len(out_poly) > 5:
|
||||
continue
|
||||
|
||||
final_poly = []
|
||||
for p in out_poly:
|
||||
final_poly.append(p[0])
|
||||
final_poly.append(p[1])
|
||||
|
||||
if len(out_poly) == 5:
|
||||
final_poly = self.get_poly4_from_poly5(final_poly)
|
||||
|
||||
if self.choosebestpoint:
|
||||
final_poly = choose_best_pointorder_fit_another(
|
||||
final_poly, anno['poly'])
|
||||
|
||||
final_poly = self.translate(final_poly, left, up)
|
||||
final_poly = np.clip(final_poly, 1, self.subsize)
|
||||
else:
|
||||
continue
|
||||
outline = ' '.join(list(map(str, final_poly)))
|
||||
if iof >= self.thresh:
|
||||
outline = outline + ' ' + anno['name'] + ' ' + str(anno[
|
||||
'difficult'])
|
||||
else:
|
||||
outline = outline + ' ' + anno['name'] + ' ' + '2'
|
||||
|
||||
f.write(outline + '\n')
|
||||
|
||||
def slice_data_single(self, info, rate, output_dir):
|
||||
file_name = info['image_file']
|
||||
base_name = os.path.splitext(os.path.split(file_name)[-1])[0]
|
||||
base_name = base_name + '__' + str(rate) + '__'
|
||||
img = cv2.imread(file_name)
|
||||
if img.shape == ():
|
||||
return
|
||||
|
||||
if (rate != 1):
|
||||
resize_img = cv2.resize(
|
||||
img, None, fx=rate, fy=rate, interpolation=cv2.INTER_CUBIC)
|
||||
else:
|
||||
resize_img = img
|
||||
|
||||
height, width, _ = resize_img.shape
|
||||
windows = self.get_windows(height, width)
|
||||
self.slice_image_single(resize_img, windows, output_dir, base_name)
|
||||
if not self.image_only:
|
||||
annos = info['annotation']
|
||||
for anno in annos:
|
||||
anno['poly'] = list(map(lambda x: rate * x, anno['poly']))
|
||||
self.slice_anno_single(annos, windows, output_dir, base_name)
|
||||
|
||||
def check_or_mkdirs(self, path):
|
||||
if not os.path.exists(path):
|
||||
os.makedirs(path, exist_ok=True)
|
||||
|
||||
def slice_data(self, infos, rates, output_dir):
|
||||
"""
|
||||
Args:
|
||||
infos (list[dict]): data_infos
|
||||
rates (float, list): scale rates
|
||||
output_dir (str): output directory
|
||||
"""
|
||||
if isinstance(rates, Number):
|
||||
rates = [rates, ]
|
||||
|
||||
self.check_or_mkdirs(output_dir)
|
||||
self.check_or_mkdirs(os.path.join(output_dir, 'images'))
|
||||
if not self.image_only:
|
||||
self.check_or_mkdirs(os.path.join(output_dir, 'labelTxt'))
|
||||
|
||||
pbar = tqdm(total=len(rates) * len(infos), desc='slicing data')
|
||||
|
||||
if self.num_process <= 1:
|
||||
for rate in rates:
|
||||
for info in infos:
|
||||
self.slice_data_single(info, rate, output_dir)
|
||||
pbar.update()
|
||||
else:
|
||||
pool = Pool(self.num_process)
|
||||
for rate in rates:
|
||||
for info in infos:
|
||||
pool.apply_async(
|
||||
self.slice_data_single, (info, rate, output_dir),
|
||||
callback=lambda x: pbar.update())
|
||||
|
||||
pool.close()
|
||||
pool.join()
|
||||
|
||||
pbar.close()
|
||||
Reference in New Issue
Block a user