更换文档检测模型

This commit is contained in:
2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions

View File

@@ -0,0 +1,231 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import codecs
import os
import yaml
import numpy as np
import cv2
from sklearn.cluster import DBSCAN
from pptracking.python.det_infer import load_predictor
class LaneSegPredictor:
def __init__(self, lane_seg_config, model_dir):
"""
Prepare for prediction.
The usage and docs of paddle inference, please refer to
https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html
"""
if not os.path.exists(lane_seg_config):
raise ValueError("Cannot find : {},".format(lane_seg_config))
args = yaml.safe_load(open(lane_seg_config))
self.model_dir = model_dir
self.args = args[args['type']]
self.shape = None
self.filter_horizontal_flag = self.args['filter_horizontal_flag']
self.horizontal_filtration_degree = self.args[
'horizontal_filtration_degree']
self.horizontal_filtering_threshold = self.args[
'horizontal_filtering_threshold']
try:
self.predictor, _ = load_predictor(
model_dir=self.model_dir,
run_mode=self.args['run_mode'],
batch_size=self.args['batch_size'],
device=self.args['device'],
min_subgraph_size=self.args['min_subgraph_size'],
use_dynamic_shape=self.args['use_dynamic_shape'],
trt_min_shape=self.args['trt_min_shape'],
trt_max_shape=self.args['trt_max_shape'],
trt_opt_shape=self.args['trt_opt_shape'],
trt_calib_mode=self.args['trt_calib_mode'],
cpu_threads=self.args['cpu_threads'],
enable_mkldnn=self.args['enable_mkldnn'])
except Exception as e:
print(str(e))
exit()
def run(self, img):
input_names = self.predictor.get_input_names()
input_handle = self.predictor.get_input_handle(input_names[0])
output_names = self.predictor.get_output_names()
output_handle = self.predictor.get_output_handle(output_names[0])
img = np.array(img)
self.shape = img.shape[1:3]
img = self.normalize(img)
img = np.transpose(img, (0, 3, 1, 2))
input_handle.reshape(img.shape)
input_handle.copy_from_cpu(img)
self.predictor.run()
results = output_handle.copy_to_cpu()
results = self.postprocess(results)
return self.get_line(results)
def normalize(self, im, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)):
mean = np.array(mean)[np.newaxis, np.newaxis, :]
std = np.array(std)[np.newaxis, np.newaxis, :]
im = im.astype(np.float32, copy=False) / 255.0
im -= mean
im /= std
return im
def postprocess(self, pred):
pred = np.argmax(pred, axis=1)
pred[pred == 3] = 0
pred[pred > 0] = 255
return pred
def get_line(self, results):
lines = []
directions = []
for i in range(results.shape[0]):
line, direction = self.hough_line(np.uint8(results[i]))
lines.append(line)
directions.append(direction)
return lines, directions
def get_distance(self, array_1, array_2):
lon_a = array_1[0]
lat_a = array_1[1]
lon_b = array_2[0]
lat_b = array_2[1]
s = pow(pow((lat_b - lat_a), 2) + pow((lon_b - lon_a), 2), 0.5)
return s
def get_angle(self, array):
import math
x1, y1, x2, y2 = array
a_x = x2 - x1
a_y = y2 - y1
angle1 = math.atan2(a_y, a_x)
angle1 = int(angle1 * 180 / math.pi)
if angle1 > 90:
angle1 = 180 - angle1
return angle1
def get_proportion(self, lines):
proportion = 0.0
h, w = self.shape
for line in lines:
x1, y1, x2, y2 = line
length = abs(y2 - y1) / h + abs(x2 - x1) / w
proportion = proportion + length
return proportion
def line_cluster(self, linesP):
points = []
for i in range(0, len(linesP)):
l = linesP[i]
x_center = (float(
(max(l[2], l[0]) - min(l[2], l[0]))) / 2.0 + min(l[2], l[0]))
y_center = (float(
(max(l[3], l[1]) - min(l[3], l[1]))) / 2.0 + min(l[3], l[1]))
points.append([x_center, y_center])
dbscan = DBSCAN(
eps=50, min_samples=2, metric=self.get_distance).fit(points)
labels = dbscan.labels_
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
cluster_list = list([] for i in range(n_clusters_))
if linesP is not None:
for i in range(0, len(linesP)):
if labels[i] == -1:
continue
l = linesP[i]
x1, y1, x2, y2 = l
if y2 >= y1:
cluster_list[labels[i]].append([x1, y1, x2, y2])
else:
ll = [x2, y2, x1, y1]
cluster_list[labels[i]].append(ll)
return cluster_list
def hough_line(self,
binary_img,
min_line=50,
min_line_points=50,
max_line_gap=10):
linesP = cv2.HoughLinesP(binary_img, 1, np.pi / 180, min_line, None,
min_line_points, max_line_gap)
if linesP is None:
return [], None
coarse_cluster_list = self.line_cluster(linesP[:, 0])
filter_lines_output, direction = self.filter_lines(coarse_cluster_list)
return filter_lines_output, direction
def filter_lines(self, coarse_cluster_list):
lines = []
angles = []
for i in range(len(coarse_cluster_list)):
if len(coarse_cluster_list[i]) == 0:
continue
coarse_cluster_list[i] = np.array(coarse_cluster_list[i])
distance = abs(coarse_cluster_list[i][:, 3] - coarse_cluster_list[i]
[:, 1]) + abs(coarse_cluster_list[i][:, 2] -
coarse_cluster_list[i][:, 0])
l = coarse_cluster_list[i][np.argmax(distance)]
angles.append(self.get_angle(l))
lines.append(l)
if len(lines) == 0:
return [], None
if not self.filter_horizontal_flag:
return lines, None
#filter horizontal roads
angles = np.array(angles)
max_angle, min_angle = np.max(angles), np.min(angles)
if (max_angle - min_angle) < self.horizontal_filtration_degree:
return lines, np.mean(angles)
thr_angle = (
max_angle + min_angle) * self.horizontal_filtering_threshold
lines = np.array(lines)
min_angle_line = lines[np.where(angles < thr_angle)]
max_angle_line = lines[np.where(angles >= thr_angle)]
max_angle_line_pro = self.get_proportion(max_angle_line)
min_angle_line_pro = self.get_proportion(min_angle_line)
if max_angle_line_pro >= min_angle_line_pro:
angle_list = angles[np.where(angles >= thr_angle)]
return max_angle_line, np.mean(angle_list)
else:
angle_list = angles[np.where(angles < thr_angle)]
return min_angle_line, np.mean(angle_list)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,150 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import yaml
import glob
import cv2
import numpy as np
import math
import paddle
import sys
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
# add deploy path of PaddleDetection to sys.path
parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 3)))
sys.path.insert(0, parent_path)
from paddle.inference import Config, create_predictor
from python.utils import argsparser, Timer, get_current_memory_mb
from python.benchmark_utils import PaddleInferBenchmark
from python.infer import Detector, print_arguments
from pipeline.pphuman.attr_infer import AttrDetector
class VehicleAttr(AttrDetector):
"""
Args:
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt
trt_opt_shape (int): opt shape for dynamic shape in trt
trt_calib_mode (bool): If the model is produced by TRT offline quantitative
calibration, trt_calib_mode need to set True
cpu_threads (int): cpu threads
enable_mkldnn (bool): whether to open MKLDNN
type_threshold (float): The threshold of score for vehicle type recognition.
color_threshold (float): The threshold of score for vehicle color recognition.
"""
def __init__(self,
model_dir,
device='CPU',
run_mode='paddle',
batch_size=1,
trt_min_shape=1,
trt_max_shape=1280,
trt_opt_shape=640,
trt_calib_mode=False,
cpu_threads=1,
enable_mkldnn=False,
output_dir='output',
color_threshold=0.5,
type_threshold=0.5):
super(VehicleAttr, self).__init__(
model_dir=model_dir,
device=device,
run_mode=run_mode,
batch_size=batch_size,
trt_min_shape=trt_min_shape,
trt_max_shape=trt_max_shape,
trt_opt_shape=trt_opt_shape,
trt_calib_mode=trt_calib_mode,
cpu_threads=cpu_threads,
enable_mkldnn=enable_mkldnn,
output_dir=output_dir)
self.color_threshold = color_threshold
self.type_threshold = type_threshold
self.result_history = {}
self.color_list = [
"yellow", "orange", "green", "gray", "red", "blue", "white",
"golden", "brown", "black"
]
self.type_list = [
"sedan", "suv", "van", "hatchback", "mpv", "pickup", "bus", "truck",
"estate"
]
@classmethod
def init_with_cfg(cls, args, cfg):
return cls(model_dir=cfg['model_dir'],
batch_size=cfg['batch_size'],
color_threshold=cfg['color_threshold'],
type_threshold=cfg['type_threshold'],
device=args.device,
run_mode=args.run_mode,
trt_min_shape=args.trt_min_shape,
trt_max_shape=args.trt_max_shape,
trt_opt_shape=args.trt_opt_shape,
trt_calib_mode=args.trt_calib_mode,
cpu_threads=args.cpu_threads,
enable_mkldnn=args.enable_mkldnn)
def postprocess(self, inputs, result):
# postprocess output of predictor
im_results = result['output']
batch_res = []
for res in im_results:
res = res.tolist()
attr_res = []
color_res_str = "Color: "
type_res_str = "Type: "
color_idx = np.argmax(res[:10])
type_idx = np.argmax(res[10:])
if res[color_idx] >= self.color_threshold:
color_res_str += self.color_list[color_idx]
else:
color_res_str += "Unknown"
attr_res.append(color_res_str)
if res[type_idx + 10] >= self.type_threshold:
type_res_str += self.type_list[type_idx]
else:
type_res_str += "Unknown"
attr_res.append(type_res_str)
batch_res.append(attr_res)
result = {'output': batch_res}
return result
if __name__ == '__main__':
paddle.enable_static()
parser = argsparser()
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
], "device should be CPU, GPU, NPU or XPU"
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
main()

View File

@@ -0,0 +1,331 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import yaml
import glob
from functools import reduce
import time
import cv2
import numpy as np
import math
import paddle
import sys
parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 3)))
sys.path.insert(0, parent_path)
from python.infer import get_test_images
from python.preprocess import preprocess, NormalizeImage, Permute, Resize_Mult32
from pipeline.ppvehicle.vehicle_plateutils import create_predictor, get_infer_gpuid, get_rotate_crop_image, draw_boxes
from pipeline.ppvehicle.vehicleplate_postprocess import build_post_process
from pipeline.cfg_utils import merge_cfg, print_arguments, argsparser
class PlateDetector(object):
def __init__(self, args, cfg):
self.args = args
self.pre_process_list = {
'Resize_Mult32': {
'limit_side_len': cfg['det_limit_side_len'],
'limit_type': cfg['det_limit_type'],
},
'NormalizeImage': {
'mean': [0.485, 0.456, 0.406],
'std': [0.229, 0.224, 0.225],
'is_scale': True,
},
'Permute': {}
}
postprocess_params = {}
postprocess_params['name'] = 'DBPostProcess'
postprocess_params["thresh"] = 0.3
postprocess_params["box_thresh"] = 0.6
postprocess_params["max_candidates"] = 1000
postprocess_params["unclip_ratio"] = 1.5
postprocess_params["use_dilation"] = False
postprocess_params["score_mode"] = "fast"
self.postprocess_op = build_post_process(postprocess_params)
self.predictor, self.input_tensor, self.output_tensors, self.config = create_predictor(
args, cfg, 'det')
def preprocess(self, im_path):
preprocess_ops = []
for op_type, new_op_info in self.pre_process_list.items():
preprocess_ops.append(eval(op_type)(**new_op_info))
input_im_lst = []
input_im_info_lst = []
im, im_info = preprocess(im_path, preprocess_ops)
input_im_lst.append(im)
input_im_info_lst.append(im_info['im_shape'] / im_info['scale_factor'])
return np.stack(input_im_lst, axis=0), input_im_info_lst
def order_points_clockwise(self, pts):
rect = np.zeros((4, 2), dtype="float32")
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
return rect
def clip_det_res(self, points, img_height, img_width):
for pno in range(points.shape[0]):
points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1))
points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1))
return points
def filter_tag_det_res(self, dt_boxes, image_shape):
img_height, img_width = image_shape[0:2]
dt_boxes_new = []
for box in dt_boxes:
box = self.order_points_clockwise(box)
box = self.clip_det_res(box, img_height, img_width)
rect_width = int(np.linalg.norm(box[0] - box[1]))
rect_height = int(np.linalg.norm(box[0] - box[3]))
if rect_width <= 3 or rect_height <= 3:
continue
dt_boxes_new.append(box)
dt_boxes = np.array(dt_boxes_new)
return dt_boxes
def filter_tag_det_res_only_clip(self, dt_boxes, image_shape):
img_height, img_width = image_shape[0:2]
dt_boxes_new = []
for box in dt_boxes:
box = self.clip_det_res(box, img_height, img_width)
dt_boxes_new.append(box)
dt_boxes = np.array(dt_boxes_new)
return dt_boxes
def predict_image(self, img_list):
st = time.time()
dt_batch_boxes = []
for image in img_list:
img, shape_list = self.preprocess(image)
if img is None:
return None, 0
self.input_tensor.copy_from_cpu(img)
self.predictor.run()
outputs = []
for output_tensor in self.output_tensors:
output = output_tensor.copy_to_cpu()
outputs.append(output)
preds = {}
preds['maps'] = outputs[0]
#self.predictor.try_shrink_memory()
post_result = self.postprocess_op(preds, shape_list)
# print("post_result length:{}".format(len(post_result)))
org_shape = image.shape
dt_boxes = post_result[0]['points']
dt_boxes = self.filter_tag_det_res(dt_boxes, org_shape)
dt_batch_boxes.append(dt_boxes)
et = time.time()
return dt_batch_boxes, et - st
class TextRecognizer(object):
def __init__(self, args, cfg, use_gpu=True):
self.rec_image_shape = cfg['rec_image_shape']
self.rec_batch_num = cfg['rec_batch_num']
word_dict_path = cfg['word_dict_path']
use_space_char = True
postprocess_params = {
'name': 'CTCLabelDecode',
"character_dict_path": word_dict_path,
"use_space_char": use_space_char
}
self.postprocess_op = build_post_process(postprocess_params)
self.predictor, self.input_tensor, self.output_tensors, self.config = \
create_predictor(args, cfg, 'rec')
self.use_onnx = False
def resize_norm_img(self, img, max_wh_ratio):
imgC, imgH, imgW = self.rec_image_shape
assert imgC == img.shape[2]
imgW = int((imgH * max_wh_ratio))
if self.use_onnx:
w = self.input_tensor.shape[3:][0]
if w is not None and w > 0:
imgW = w
h, w = img.shape[:2]
ratio = w / float(h)
if math.ceil(imgH * ratio) > imgW:
resized_w = imgW
else:
resized_w = int(math.ceil(imgH * ratio))
resized_image = cv2.resize(img, (resized_w, imgH))
resized_image = resized_image.astype('float32')
resized_image = resized_image.transpose((2, 0, 1)) / 255
resized_image -= 0.5
resized_image /= 0.5
padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
padding_im[:, :, 0:resized_w] = resized_image
return padding_im
def predict_text(self, img_list):
img_num = len(img_list)
# Calculate the aspect ratio of all text bars
width_list = []
for img in img_list:
width_list.append(img.shape[1] / float(img.shape[0]))
# Sorting can speed up the recognition process
indices = np.argsort(np.array(width_list))
rec_res = [['', 0.0]] * img_num
batch_num = self.rec_batch_num
st = time.time()
for beg_img_no in range(0, img_num, batch_num):
end_img_no = min(img_num, beg_img_no + batch_num)
norm_img_batch = []
imgC, imgH, imgW = self.rec_image_shape
max_wh_ratio = imgW / imgH
# max_wh_ratio = 0
for ino in range(beg_img_no, end_img_no):
h, w = img_list[indices[ino]].shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for ino in range(beg_img_no, end_img_no):
norm_img = self.resize_norm_img(img_list[indices[ino]],
max_wh_ratio)
norm_img = norm_img[np.newaxis, :]
norm_img_batch.append(norm_img)
norm_img_batch = np.concatenate(norm_img_batch)
norm_img_batch = norm_img_batch.copy()
if self.use_onnx:
input_dict = {}
input_dict[self.input_tensor.name] = norm_img_batch
outputs = self.predictor.run(self.output_tensors, input_dict)
preds = outputs[0]
else:
self.input_tensor.copy_from_cpu(norm_img_batch)
self.predictor.run()
outputs = []
for output_tensor in self.output_tensors:
output = output_tensor.copy_to_cpu()
outputs.append(output)
if len(outputs) != 1:
preds = outputs
else:
preds = outputs[0]
rec_result = self.postprocess_op(preds)
for rno in range(len(rec_result)):
rec_res[indices[beg_img_no + rno]] = rec_result[rno]
return rec_res, time.time() - st
class PlateRecognizer(object):
def __init__(self, args, cfg):
use_gpu = args.device.lower() == "gpu"
self.platedetector = PlateDetector(args, cfg)
self.textrecognizer = TextRecognizer(args, cfg, use_gpu=use_gpu)
def get_platelicense(self, image_list):
plate_text_list = []
plateboxes, det_time = self.platedetector.predict_image(image_list)
for idx, boxes_pcar in enumerate(plateboxes):
plate_pcar_list = []
for box in boxes_pcar:
plate_images = get_rotate_crop_image(image_list[idx], box)
plate_texts = self.textrecognizer.predict_text([plate_images])
plate_pcar_list.append(plate_texts)
plate_text_list.append(plate_pcar_list)
return self.check_plate(plate_text_list)
def check_plate(self, text_list):
plate_all = {"plate": []}
for text_pcar in text_list:
platelicense = ""
for text_info in text_pcar:
text = text_info[0][0][0]
if len(text) > 2 and len(text) < 10:
platelicense = self.replace_cn_code(text)
plate_all["plate"].append(platelicense)
return plate_all
def replace_cn_code(self, text):
simcode = {
'': 'ZJ-',
'': 'GD-',
'': 'BJ-',
'': 'TJ-',
'': 'HE-',
'': 'SX-',
'': 'NM-',
'': 'LN-',
'': 'HLJ-',
'': 'SH-',
'': 'JL-',
'': 'JS-',
'': 'AH-',
'': 'JX-',
'': 'SD-',
'': 'HA-',
'': 'HB-',
'': 'HN-',
'': 'GX-',
'': 'HI-',
'': 'CQ-',
'': 'SC-',
'': 'GZ-',
'': 'YN-',
'': 'XZ-',
'': 'SN-',
'': 'GS-',
'': 'QH-',
'': 'NX-',
'': 'FJ-',
'·': ' '
}
for _char in text:
if _char in simcode:
text = text.replace(_char, simcode[_char])
return text
def main():
cfg = merge_cfg(FLAGS)
print_arguments(cfg)
vehicleplate_cfg = cfg['VEHICLE_PLATE']
detector = PlateRecognizer(FLAGS, vehicleplate_cfg)
# predict from image
img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
for img in img_list:
image = cv2.imread(img)
results = detector.get_platelicense([image])
print(results)
if __name__ == '__main__':
paddle.enable_static()
parser = argsparser()
FLAGS = parser.parse_args()
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
], "device should be CPU, GPU, NPU or XPU"
main()

View File

@@ -0,0 +1,505 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import sys
import platform
import cv2
import numpy as np
import paddle
from PIL import Image, ImageDraw, ImageFont
import math
from paddle import inference
import time
import ast
def create_predictor(args, cfg, mode):
if mode == "det":
model_dir = cfg['det_model_dir']
else:
model_dir = cfg['rec_model_dir']
if model_dir is None:
print("not find {} model file path {}".format(mode, model_dir))
sys.exit(0)
model_file_path = model_dir + "/inference.pdmodel"
params_file_path = model_dir + "/inference.pdiparams"
if not os.path.exists(model_file_path):
raise ValueError("not find model file path {}".format(model_file_path))
if not os.path.exists(params_file_path):
raise ValueError("not find params file path {}".format(
params_file_path))
config = inference.Config(model_file_path, params_file_path)
batch_size = 1
if args.device == "GPU":
gpu_id = get_infer_gpuid()
if gpu_id is None:
print(
"GPU is not found in current device by nvidia-smi. Please check your device or ignore it if run on jetson."
)
config.enable_use_gpu(500, 0)
precision_map = {
'trt_int8': inference.PrecisionType.Int8,
'trt_fp32': inference.PrecisionType.Float32,
'trt_fp16': inference.PrecisionType.Half
}
min_subgraph_size = 15
if args.run_mode in precision_map.keys():
config.enable_tensorrt_engine(
workspace_size=(1 << 25) * batch_size,
max_batch_size=batch_size,
min_subgraph_size=min_subgraph_size,
precision_mode=precision_map[args.run_mode])
use_dynamic_shape = True
if mode == "det":
min_input_shape = {
"x": [1, 3, 50, 50],
"conv2d_92.tmp_0": [1, 120, 20, 20],
"conv2d_91.tmp_0": [1, 24, 10, 10],
"conv2d_59.tmp_0": [1, 96, 20, 20],
"nearest_interp_v2_1.tmp_0": [1, 256, 10, 10],
"nearest_interp_v2_2.tmp_0": [1, 256, 20, 20],
"conv2d_124.tmp_0": [1, 256, 20, 20],
"nearest_interp_v2_3.tmp_0": [1, 64, 20, 20],
"nearest_interp_v2_4.tmp_0": [1, 64, 20, 20],
"nearest_interp_v2_5.tmp_0": [1, 64, 20, 20],
"elementwise_add_7": [1, 56, 2, 2],
"nearest_interp_v2_0.tmp_0": [1, 256, 2, 2]
}
max_input_shape = {
"x": [1, 3, 1536, 1536],
"conv2d_92.tmp_0": [1, 120, 400, 400],
"conv2d_91.tmp_0": [1, 24, 200, 200],
"conv2d_59.tmp_0": [1, 96, 400, 400],
"nearest_interp_v2_1.tmp_0": [1, 256, 200, 200],
"conv2d_124.tmp_0": [1, 256, 400, 400],
"nearest_interp_v2_2.tmp_0": [1, 256, 400, 400],
"nearest_interp_v2_3.tmp_0": [1, 64, 400, 400],
"nearest_interp_v2_4.tmp_0": [1, 64, 400, 400],
"nearest_interp_v2_5.tmp_0": [1, 64, 400, 400],
"elementwise_add_7": [1, 56, 400, 400],
"nearest_interp_v2_0.tmp_0": [1, 256, 400, 400]
}
opt_input_shape = {
"x": [1, 3, 640, 640],
"conv2d_92.tmp_0": [1, 120, 160, 160],
"conv2d_91.tmp_0": [1, 24, 80, 80],
"conv2d_59.tmp_0": [1, 96, 160, 160],
"nearest_interp_v2_1.tmp_0": [1, 256, 80, 80],
"nearest_interp_v2_2.tmp_0": [1, 256, 160, 160],
"conv2d_124.tmp_0": [1, 256, 160, 160],
"nearest_interp_v2_3.tmp_0": [1, 64, 160, 160],
"nearest_interp_v2_4.tmp_0": [1, 64, 160, 160],
"nearest_interp_v2_5.tmp_0": [1, 64, 160, 160],
"elementwise_add_7": [1, 56, 40, 40],
"nearest_interp_v2_0.tmp_0": [1, 256, 40, 40]
}
min_pact_shape = {
"nearest_interp_v2_26.tmp_0": [1, 256, 20, 20],
"nearest_interp_v2_27.tmp_0": [1, 64, 20, 20],
"nearest_interp_v2_28.tmp_0": [1, 64, 20, 20],
"nearest_interp_v2_29.tmp_0": [1, 64, 20, 20]
}
max_pact_shape = {
"nearest_interp_v2_26.tmp_0": [1, 256, 400, 400],
"nearest_interp_v2_27.tmp_0": [1, 64, 400, 400],
"nearest_interp_v2_28.tmp_0": [1, 64, 400, 400],
"nearest_interp_v2_29.tmp_0": [1, 64, 400, 400]
}
opt_pact_shape = {
"nearest_interp_v2_26.tmp_0": [1, 256, 160, 160],
"nearest_interp_v2_27.tmp_0": [1, 64, 160, 160],
"nearest_interp_v2_28.tmp_0": [1, 64, 160, 160],
"nearest_interp_v2_29.tmp_0": [1, 64, 160, 160]
}
min_input_shape.update(min_pact_shape)
max_input_shape.update(max_pact_shape)
opt_input_shape.update(opt_pact_shape)
elif mode == "rec":
imgH = int(cfg['rec_image_shape'][-2])
min_input_shape = {"x": [1, 3, imgH, 10]}
max_input_shape = {"x": [batch_size, 3, imgH, 2304]}
opt_input_shape = {"x": [batch_size, 3, imgH, 320]}
config.exp_disable_tensorrt_ops(["transpose2"])
elif mode == "cls":
min_input_shape = {"x": [1, 3, 48, 10]}
max_input_shape = {"x": [batch_size, 3, 48, 1024]}
opt_input_shape = {"x": [batch_size, 3, 48, 320]}
else:
use_dynamic_shape = False
if use_dynamic_shape:
config.set_trt_dynamic_shape_info(
min_input_shape, max_input_shape, opt_input_shape)
else:
config.disable_gpu()
if hasattr(args, "cpu_threads"):
config.set_cpu_math_library_num_threads(args.cpu_threads)
else:
# default cpu threads as 10
config.set_cpu_math_library_num_threads(10)
if args.enable_mkldnn:
# cache 10 different shapes for mkldnn to avoid memory leak
config.set_mkldnn_cache_capacity(10)
config.enable_mkldnn()
if args.run_mode == "fp16":
config.enable_mkldnn_bfloat16()
# enable memory optim
config.enable_memory_optim()
config.disable_glog_info()
config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
config.delete_pass("matmul_transpose_reshape_fuse_pass")
if mode == 'table':
config.delete_pass("fc_fuse_pass") # not supported for table
config.switch_use_feed_fetch_ops(False)
config.switch_ir_optim(True)
# create predictor
predictor = inference.create_predictor(config)
input_names = predictor.get_input_names()
for name in input_names:
input_tensor = predictor.get_input_handle(name)
output_tensors = get_output_tensors(cfg, mode, predictor)
return predictor, input_tensor, output_tensors, config
def get_output_tensors(cfg, mode, predictor):
output_names = predictor.get_output_names()
output_tensors = []
output_name = 'softmax_0.tmp_0'
if output_name in output_names:
return [predictor.get_output_handle(output_name)]
else:
for output_name in output_names:
output_tensor = predictor.get_output_handle(output_name)
output_tensors.append(output_tensor)
return output_tensors
def get_infer_gpuid():
sysstr = platform.system()
if sysstr == "Windows":
return 0
if not paddle.device.is_compiled_with_rocm():
cmd = "env | grep CUDA_VISIBLE_DEVICES"
else:
cmd = "env | grep HIP_VISIBLE_DEVICES"
env_cuda = os.popen(cmd).readlines()
if len(env_cuda) == 0:
return 0
else:
gpu_id = env_cuda[0].strip().split("=")[1]
return int(gpu_id[0])
def draw_e2e_res(dt_boxes, strs, img_path):
src_im = cv2.imread(img_path)
for box, str in zip(dt_boxes, strs):
box = box.astype(np.int32).reshape((-1, 1, 2))
cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
cv2.putText(
src_im,
str,
org=(int(box[0, 0, 0]), int(box[0, 0, 1])),
fontFace=cv2.FONT_HERSHEY_COMPLEX,
fontScale=0.7,
color=(0, 255, 0),
thickness=1)
return src_im
def draw_text_det_res(dt_boxes, img_path):
src_im = cv2.imread(img_path)
for box in dt_boxes:
box = np.array(box).astype(np.int32).reshape(-1, 2)
cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
return src_im
def resize_img(img, input_size=600):
"""
resize img and limit the longest side of the image to input_size
"""
img = np.array(img)
im_shape = img.shape
im_size_max = np.max(im_shape[0:2])
im_scale = float(input_size) / float(im_size_max)
img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale)
return img
def draw_ocr(image,
boxes,
txts=None,
scores=None,
drop_score=0.5,
font_path="./doc/fonts/simfang.ttf"):
"""
Visualize the results of OCR detection and recognition
args:
image(Image|array): RGB image
boxes(list): boxes with shape(N, 4, 2)
txts(list): the texts
scores(list): txxs corresponding scores
drop_score(float): only scores greater than drop_threshold will be visualized
font_path: the path of font which is used to draw text
return(array):
the visualized img
"""
if scores is None:
scores = [1] * len(boxes)
box_num = len(boxes)
for i in range(box_num):
if scores is not None and (scores[i] < drop_score or
math.isnan(scores[i])):
continue
box = np.reshape(np.array(boxes[i]), [-1, 1, 2]).astype(np.int64)
image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2)
if txts is not None:
img = np.array(resize_img(image, input_size=600))
txt_img = text_visual(
txts,
scores,
img_h=img.shape[0],
img_w=600,
threshold=drop_score,
font_path=font_path)
img = np.concatenate([np.array(img), np.array(txt_img)], axis=1)
return img
return image
def draw_ocr_box_txt(image,
boxes,
txts,
scores=None,
drop_score=0.5,
font_path="./doc/simfang.ttf"):
h, w = image.height, image.width
img_left = image.copy()
img_right = Image.new('RGB', (w, h), (255, 255, 255))
import random
random.seed(0)
draw_left = ImageDraw.Draw(img_left)
draw_right = ImageDraw.Draw(img_right)
for idx, (box, txt) in enumerate(zip(boxes, txts)):
if scores is not None and scores[idx] < drop_score:
continue
color = (random.randint(0, 255), random.randint(0, 255),
random.randint(0, 255))
draw_left.polygon(box, fill=color)
draw_right.polygon(
[
box[0][0], box[0][1], box[1][0], box[1][1], box[2][0],
box[2][1], box[3][0], box[3][1]
],
outline=color)
box_height = math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][
1])**2)
box_width = math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][
1])**2)
if box_height > 2 * box_width:
font_size = max(int(box_width * 0.9), 10)
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
cur_y = box[0][1]
for c in txt:
char_size = font.getsize(c)
draw_right.text(
(box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font)
cur_y += char_size[1]
else:
font_size = max(int(box_height * 0.8), 10)
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
draw_right.text(
[box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
img_left = Image.blend(image, img_left, 0.5)
img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
img_show.paste(img_left, (0, 0, w, h))
img_show.paste(img_right, (w, 0, w * 2, h))
return np.array(img_show)
def str_count(s):
"""
Count the number of Chinese characters,
a single English character and a single number
equal to half the length of Chinese characters.
args:
s(string): the input of string
return(int):
the number of Chinese characters
"""
import string
count_zh = count_pu = 0
s_len = len(s)
en_dg_count = 0
for c in s:
if c in string.ascii_letters or c.isdigit() or c.isspace():
en_dg_count += 1
elif c.isalpha():
count_zh += 1
else:
count_pu += 1
return s_len - math.ceil(en_dg_count / 2)
def text_visual(texts,
scores,
img_h=400,
img_w=600,
threshold=0.,
font_path="./doc/simfang.ttf"):
"""
create new blank img and draw txt on it
args:
texts(list): the text will be draw
scores(list|None): corresponding score of each txt
img_h(int): the height of blank img
img_w(int): the width of blank img
font_path: the path of font which is used to draw text
return(array):
"""
if scores is not None:
assert len(texts) == len(
scores), "The number of txts and corresponding scores must match"
def create_blank_img():
blank_img = np.ones(shape=[img_h, img_w], dtype=np.int8) * 255
blank_img[:, img_w - 1:] = 0
blank_img = Image.fromarray(blank_img).convert("RGB")
draw_txt = ImageDraw.Draw(blank_img)
return blank_img, draw_txt
blank_img, draw_txt = create_blank_img()
font_size = 20
txt_color = (0, 0, 0)
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
gap = font_size + 5
txt_img_list = []
count, index = 1, 0
for idx, txt in enumerate(texts):
index += 1
if scores[idx] < threshold or math.isnan(scores[idx]):
index -= 1
continue
first_line = True
while str_count(txt) >= img_w // font_size - 4:
tmp = txt
txt = tmp[:img_w // font_size - 4]
if first_line:
new_txt = str(index) + ': ' + txt
first_line = False
else:
new_txt = ' ' + txt
draw_txt.text((0, gap * count), new_txt, txt_color, font=font)
txt = tmp[img_w // font_size - 4:]
if count >= img_h // gap - 1:
txt_img_list.append(np.array(blank_img))
blank_img, draw_txt = create_blank_img()
count = 0
count += 1
if first_line:
new_txt = str(index) + ': ' + txt + ' ' + '%.3f' % (scores[idx])
else:
new_txt = " " + txt + " " + '%.3f' % (scores[idx])
draw_txt.text((0, gap * count), new_txt, txt_color, font=font)
# whether add new blank img or not
if count >= img_h // gap - 1 and idx + 1 < len(texts):
txt_img_list.append(np.array(blank_img))
blank_img, draw_txt = create_blank_img()
count = 0
count += 1
txt_img_list.append(np.array(blank_img))
if len(txt_img_list) == 1:
blank_img = np.array(txt_img_list[0])
else:
blank_img = np.concatenate(txt_img_list, axis=1)
return np.array(blank_img)
def base64_to_cv2(b64str):
import base64
data = base64.b64decode(b64str.encode('utf8'))
data = np.fromstring(data, np.uint8)
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
return data
def draw_boxes(image, boxes, scores=None, drop_score=0.5):
if scores is None:
scores = [1] * len(boxes)
for (box, score) in zip(boxes, scores):
if score < drop_score:
continue
box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64)
image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2)
return image
def get_rotate_crop_image(img, points):
'''
img_height, img_width = img.shape[0:2]
left = int(np.min(points[:, 0]))
right = int(np.max(points[:, 0]))
top = int(np.min(points[:, 1]))
bottom = int(np.max(points[:, 1]))
img_crop = img[top:bottom, left:right, :].copy()
points[:, 0] = points[:, 0] - left
points[:, 1] = points[:, 1] - top
'''
assert len(points) == 4, "shape of points must be 4*2"
img_crop_width = int(
max(
np.linalg.norm(points[0] - points[1]),
np.linalg.norm(points[2] - points[3])))
img_crop_height = int(
max(
np.linalg.norm(points[0] - points[3]),
np.linalg.norm(points[1] - points[2])))
pts_std = np.float32([[0, 0], [img_crop_width, 0],
[img_crop_width, img_crop_height],
[0, img_crop_height]])
M = cv2.getPerspectiveTransform(points, pts_std)
dst_img = cv2.warpPerspective(
img,
M, (img_crop_width, img_crop_height),
borderMode=cv2.BORDER_REPLICATE,
flags=cv2.INTER_CUBIC)
dst_img_height, dst_img_width = dst_img.shape[0:2]
if dst_img_height * 1.0 / dst_img_width >= 1.5:
dst_img = np.rot90(dst_img)
return dst_img
def check_gpu(use_gpu):
if use_gpu and not paddle.is_compiled_with_cuda():
use_gpu = False
return use_gpu
if __name__ == '__main__':
pass

View File

@@ -0,0 +1,81 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import math
class VehiclePressingRecognizer(object):
def __init__(self, cfg):
self.cfg = cfg
def judge(self, Ax1, Ay1, Ax2, Ay2, Bx1, By1, Bx2, By2):
if (max(Ax1,Ax2)>=min(Bx1,Bx2) and min(Ax1,Ax2)<=max(Bx1,Bx2)) and \
(max(Ay1,Ay2)>=min(By1,By2) and min(Ay1,Ay2)<=max(By1,By2)):
if ((Bx1-Ax1)*(Ay2-Ay1)-(By1-Ay1)*(Ax2-Ax1)) * ((Bx2-Ax1)*(Ay2-Ay1)-(By2-Ay1)*(Ax2-Ax1))<=0 \
and ((Ax1-Bx1)*(By2-By1)-(Ay1-By1)*(Bx2-Bx1)) * ((Ax2-Bx1)*(By2-By1)-(Ay2-By1)*(Bx2-Bx1)) <=0:
return True
else:
return False
else:
return False
def is_intersect(self, line, bbox):
Ax1, Ay1, Ax2, Ay2 = line
xmin, ymin, xmax, ymax = bbox
bottom = self.judge(Ax1, Ay1, Ax2, Ay2, xmin, ymax, xmax, ymax)
return bottom
def run(self, lanes, det_res):
intersect_bbox_list = []
start_idx, boxes_num_i = 0, 0
for i in range(len(lanes)):
lane = lanes[i]
if det_res is not None:
det_res_i = {}
boxes_num_i = det_res['boxes_num'][i]
det_res_i['boxes'] = det_res['boxes'][start_idx:start_idx +
boxes_num_i, :]
intersect_bbox = []
for line in lane:
for bbox in det_res_i['boxes']:
if self.is_intersect(line, bbox[2:]):
intersect_bbox.append(bbox)
intersect_bbox_list.append(intersect_bbox)
start_idx += boxes_num_i
return intersect_bbox_list
def mot_run(self, lanes, det_res):
intersect_bbox_list = []
if det_res is None:
return intersect_bbox_list
lanes_res = lanes['output']
for i in range(len(lanes_res)):
lane = lanes_res[i]
for line in lane:
for bbox in det_res:
if self.is_intersect(line, bbox[3:]):
intersect_bbox_list.append(bbox)
return intersect_bbox_list

View File

@@ -0,0 +1,320 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import math
class VehicleRetrogradeRecognizer(object):
def __init__(self, cfg):
self.cfg = cfg
self.filter_horizontal_flag = self.cfg['filter_horizontal_flag']
self.deviation = self.cfg['deviation']
self.move_scale = self.cfg['move_scale']
self.keep_right_flag = self.cfg['keep_right_flag']
self.center_traj_retrograde = [{}] #retrograde recognizer record use
self.fence_line = None if len(self.cfg[
'fence_line']) == 0 else self.cfg['fence_line']
def update_center_traj(self, mot_res, max_len):
from collections import deque, defaultdict
if mot_res is not None:
ids = mot_res['boxes'][:, 0]
scores = mot_res['boxes'][:, 2]
boxes = mot_res['boxes'][:, 3:]
boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
else:
boxes = np.zeros([0, 4])
ids = np.zeros([0])
scores = np.zeros([0])
# single class, still need to be defaultdict type for ploting
num_classes = 1
online_tlwhs = defaultdict(list)
online_scores = defaultdict(list)
online_ids = defaultdict(list)
online_tlwhs[0] = boxes
online_ids[0] = ids
if mot_res is not None:
for cls_id in range(num_classes):
tlwhs = online_tlwhs[cls_id]
obj_ids = online_ids[cls_id]
for i, tlwh in enumerate(tlwhs):
x1, y1, w, h = tlwh
center = tuple(map(int, (x1 + w / 2., y1 + h)))
obj_id = int(obj_ids[i])
if self.center_traj_retrograde is not None:
if obj_id not in self.center_traj_retrograde[cls_id]:
self.center_traj_retrograde[cls_id][obj_id] = deque(
maxlen=max_len)
self.center_traj_retrograde[cls_id][obj_id].append(
center)
def get_angle(self, array):
x1, y1, x2, y2 = array
a_x = x2 - x1
a_y = y2 - y1
angle1 = math.atan2(a_y, a_x)
angle1 = int(angle1 * 180 / math.pi)
a_x = x2 - x1 if y2 >= y1 else x1 - x2
a_y = y2 - y1 if y2 >= y1 else y1 - y2
angle2 = math.atan2(a_y, a_x)
angle2 = int(angle2 * 180 / math.pi)
if angle2 > 90:
angle2 = 180 - angle2
return angle1, angle2
def is_move(self, array, frame_shape):
x1, y1, x2, y2 = array
h, w, _ = frame_shape
if abs(x1 - x2) > w * self.move_scale or abs(y1 -
y2) > h * self.move_scale:
return True
else:
return False
def get_distance_point2line(self, point, line):
line_point1, line_point2 = np.array(line[0:2]), np.array(line[2:])
vec1 = line_point1 - point
vec2 = line_point2 - point
distance = np.abs(np.cross(vec1, vec2)) / np.linalg.norm(line_point1 -
line_point2)
return distance
def driving_direction(self, line1, line2, is_init=False):
x1, y1 = line1[2] - line1[0], line1[3] - line1[1]
x2, y2 = line2[0] - line1[0], line2[1] - line1[1]
result = x1 * y2 - x2 * y1
distance = self.get_distance_point2line([x2, y2], line1)
if result < 0:
result = 1
elif result == 0:
if line2[3] >= line2[1]:
return -1
else:
return 1
else:
result = -1
return result, distance
def get_long_fence_line(self, h, w, line):
x1, y1, x2, y2 = line
if x1 == x2:
return [x1, 0, x1, h]
if y1 == y2:
return [0, y1, w, y1]
k = (y2 - y1) / (x2 - x1)
b = y1 - k * x1
if k == 1 and b == 0:
return [0, 0, w, h]
if k == -1 and b == 0:
return [w, 0, h, h]
top = [-b / k, 0]
left = [0, b]
right = [w, k * w + b]
bottom = [(h - b) / k, h]
candidate = np.array([top, left, right, bottom])
flag = np.array([0, 0, 0, 0])
if top[0] >= 0 and top[0] <= w:
flag[0] = 1
if left[1] > 0 and left[1] <= h:
flag[1] = 1
if right[1] > 0 and right[1] <= h:
flag[2] = 1
if bottom[0] > 0 and bottom[0] < w:
flag[3] = 1
ind = np.where(flag == 1)
candidate = candidate[ind]
candidate_sort = candidate[candidate[:, 1].argsort()]
return [
int(candidate_sort[0][0]), int(candidate_sort[0][1]),
int(candidate_sort[1][0]), int(candidate_sort[1][1])
]
def init_fence_line(self, lanes, pos_dir_traj, neg_dir_traj, frame_shape):
fence_lines_candidate = None
h, w, _ = frame_shape
abs_distance = h * h + w * w
for lane in lanes[0]:
pos_dir_distansce = h * h + w * w
neg_dir_distansce = h * h + w * w
pos_dir = 0
neg_dir = 0
for traj_line in pos_dir_traj:
dir_result, distansce = self.driving_direction(
lane, traj_line['traj_line'])
if dir_result > 0:
pos_dir_distansce = distansce if distansce < pos_dir_distansce else pos_dir_distansce
pos_dir = 1
else:
neg_dir_distansce = distansce if distansce < neg_dir_distansce else neg_dir_distansce
neg_dir = 1
if pos_dir > 0 and neg_dir > 0:
continue
for traj_line in neg_dir_traj:
dir_result, distansce = self.driving_direction(
lane, traj_line['traj_line'])
if dir_result > 0:
pos_dir_distansce = distansce if distansce < pos_dir_distansce else pos_dir_distansce
pos_dir = 1
else:
neg_dir_distansce = distansce if distansce < neg_dir_distansce else neg_dir_distansce
neg_dir = 1
if pos_dir > 0 and neg_dir > 0:
diff_dir_distance = abs(pos_dir_distansce - neg_dir_distansce)
if diff_dir_distance < abs_distance:
fence_lines_candidate = lane
abs_distance = diff_dir_distance
if fence_lines_candidate is None:
return None
fence_lines_candidate = self.get_long_fence_line(h, w,
fence_lines_candidate)
return fence_lines_candidate
def judge_retrograde(self, traj_line):
line1 = self.fence_line
x1, y1 = line1[2] - line1[0], line1[3] - line1[1]
line2 = traj_line['traj_line']
x2_start_point, y2_start_point = line2[0] - line1[0], line2[1] - line1[
1]
x2_end_point, y2_end_point = line2[2] - line1[0], line2[3] - line1[1]
start_point_dir = x1 * y2_start_point - x2_start_point * y1
end_point_dir = x1 * y2_end_point - x2_end_point * y1
if start_point_dir < 0:
start_point_dir = 1
elif start_point_dir == 0:
if line2[3] >= line2[1]:
start_point_dir = -1
else:
start_point_dir = 1
else:
start_point_dir = -1
if end_point_dir < 0:
end_point_dir = 1
elif end_point_dir == 0:
if line2[3] >= line2[1]:
end_point_dir = -1
else:
end_point_dir = 1
else:
end_point_dir = -1
if self.keep_right_flag:
driver_dir = -1 if (line2[3] - line2[1]) >= 0 else 1
else:
driver_dir = -1 if (line2[3] - line2[1]) <= 0 else 1
return start_point_dir == driver_dir and start_point_dir == end_point_dir
def mot_run(self, lanes_res, det_res, frame_shape):
det = det_res['boxes']
directions = lanes_res['directions']
lanes = lanes_res['output']
if len(directions) > 0:
direction = directions[0]
else:
return [], self.fence_line
if len(det) == 0:
return [], self.fence_line
traj_lines = []
pos_dir_traj = []
neg_dir_traj = []
for i in range(len(det)):
class_id = int(det[i][1])
mot_id = int(det[i][0])
traj_i = self.center_traj_retrograde[class_id][mot_id]
if len(traj_i) < 2:
continue
traj_line = {
'index': i,
'mot_id': mot_id,
'traj_line':
[traj_i[0][0], traj_i[0][1], traj_i[-1][0], traj_i[-1][1]]
}
if not self.is_move(traj_line['traj_line'], frame_shape):
continue
angle, angle_deviation = self.get_angle(traj_line['traj_line'])
if direction is not None and self.filter_horizontal_flag:
if abs(angle_deviation - direction) > self.deviation:
continue
traj_line['angle'] = angle
traj_lines.append(traj_line)
if self.fence_line is None:
if angle >= 0:
pos_dir_traj.append(traj_line)
else:
neg_dir_traj.append(traj_line)
if len(traj_lines) == 0:
return [], self.fence_line
if self.fence_line is None:
if len(pos_dir_traj) < 1 or len(neg_dir_traj) < 1:
return [], None
self.fence_line = self.init_fence_line(lanes, pos_dir_traj,
neg_dir_traj, frame_shape)
return [], self.fence_line
else:
retrograde_list = []
for traj_line in traj_lines:
if self.judge_retrograde(traj_line) == False:
retrograde_list.append(det[traj_line['index']][0])
return retrograde_list, self.fence_line

View File

@@ -0,0 +1,296 @@
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle
from paddle.nn import functional as F
import re
from shapely.geometry import Polygon
import cv2
import copy
def build_post_process(config, global_config=None):
support_dict = ['DBPostProcess', 'CTCLabelDecode']
config = copy.deepcopy(config)
module_name = config.pop('name')
if module_name == "None":
return
if global_config is not None:
config.update(global_config)
assert module_name in support_dict, Exception(
'post process only support {}'.format(support_dict))
module_class = eval(module_name)(**config)
return module_class
class DBPostProcess(object):
"""
The post process for Differentiable Binarization (DB).
"""
def __init__(self,
thresh=0.3,
box_thresh=0.7,
max_candidates=1000,
unclip_ratio=2.0,
use_dilation=False,
score_mode="fast",
**kwargs):
self.thresh = thresh
self.box_thresh = box_thresh
self.max_candidates = max_candidates
self.unclip_ratio = unclip_ratio
self.min_size = 3
self.score_mode = score_mode
assert score_mode in [
"slow", "fast"
], "Score mode must be in [slow, fast] but got: {}".format(score_mode)
self.dilation_kernel = None if not use_dilation else np.array(
[[1, 1], [1, 1]])
def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
'''
_bitmap: single map with shape (1, H, W),
whose values are binarized as {0, 1}
'''
bitmap = _bitmap
height, width = bitmap.shape
outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
cv2.CHAIN_APPROX_SIMPLE)
if len(outs) == 3:
img, contours, _ = outs[0], outs[1], outs[2]
elif len(outs) == 2:
contours, _ = outs[0], outs[1]
num_contours = min(len(contours), self.max_candidates)
boxes = []
scores = []
for index in range(num_contours):
contour = contours[index]
points, sside = self.get_mini_boxes(contour)
if sside < self.min_size:
continue
points = np.array(points)
if self.score_mode == "fast":
score = self.box_score_fast(pred, points.reshape(-1, 2))
else:
score = self.box_score_slow(pred, contour)
if self.box_thresh > score:
continue
box = self.unclip(points).reshape(-1, 1, 2)
box, sside = self.get_mini_boxes(box)
if sside < self.min_size + 2:
continue
box = np.array(box)
box[:, 0] = np.clip(
np.round(box[:, 0] / width * dest_width), 0, dest_width)
box[:, 1] = np.clip(
np.round(box[:, 1] / height * dest_height), 0, dest_height)
boxes.append(box.astype(np.int16))
scores.append(score)
return np.array(boxes, dtype=np.int16), scores
def unclip(self, box):
try:
import pyclipper
except Exception as e:
raise RuntimeError(
'Unable to use vehicleplate postprocess in PP-Vehicle, please install pyclipper, for example: `pip install pyclipper`, see https://github.com/fonttools/pyclipper'
)
unclip_ratio = self.unclip_ratio
poly = Polygon(box)
distance = poly.area * unclip_ratio / poly.length
offset = pyclipper.PyclipperOffset()
offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
expanded = np.array(offset.Execute(distance))
return expanded
def get_mini_boxes(self, contour):
bounding_box = cv2.minAreaRect(contour)
points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
index_1, index_2, index_3, index_4 = 0, 1, 2, 3
if points[1][1] > points[0][1]:
index_1 = 0
index_4 = 1
else:
index_1 = 1
index_4 = 0
if points[3][1] > points[2][1]:
index_2 = 2
index_3 = 3
else:
index_2 = 3
index_3 = 2
box = [
points[index_1], points[index_2], points[index_3], points[index_4]
]
return box, min(bounding_box[1])
def box_score_fast(self, bitmap, _box):
'''
box_score_fast: use bbox mean score as the mean score
'''
h, w = bitmap.shape[:2]
box = _box.copy()
xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)
ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1)
mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
box[:, 0] = box[:, 0] - xmin
box[:, 1] = box[:, 1] - ymin
cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
def box_score_slow(self, bitmap, contour):
'''
box_score_slow: use polyon mean score as the mean score
'''
h, w = bitmap.shape[:2]
contour = contour.copy()
contour = np.reshape(contour, (-1, 2))
xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)
mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
contour[:, 0] = contour[:, 0] - xmin
contour[:, 1] = contour[:, 1] - ymin
cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1)
return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
def __call__(self, outs_dict, shape_list):
pred = outs_dict['maps']
if isinstance(pred, paddle.Tensor):
pred = pred.numpy()
pred = pred[:, 0, :, :]
segmentation = pred > self.thresh
boxes_batch = []
for batch_index in range(pred.shape[0]):
src_h, src_w = shape_list[batch_index]
if self.dilation_kernel is not None:
mask = cv2.dilate(
np.array(segmentation[batch_index]).astype(np.uint8),
self.dilation_kernel)
else:
mask = segmentation[batch_index]
boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask,
src_w, src_h)
boxes_batch.append({'points': boxes})
return boxes_batch
class BaseRecLabelDecode(object):
""" Convert between text-label and text-index """
def __init__(self, character_dict_path=None, use_space_char=False):
self.beg_str = "sos"
self.end_str = "eos"
self.character_str = []
if character_dict_path is None:
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
dict_character = list(self.character_str)
else:
with open(character_dict_path, "rb") as fin:
lines = fin.readlines()
for line in lines:
line = line.decode('utf-8').strip("\n").strip("\r\n")
self.character_str.append(line)
if use_space_char:
self.character_str.append(" ")
dict_character = list(self.character_str)
dict_character = self.add_special_char(dict_character)
self.dict = {}
for i, char in enumerate(dict_character):
self.dict[char] = i
self.character = dict_character
def add_special_char(self, dict_character):
return dict_character
def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
""" convert text-index into text-label. """
result_list = []
ignored_tokens = self.get_ignored_tokens()
batch_size = len(text_index)
for batch_idx in range(batch_size):
selection = np.ones(len(text_index[batch_idx]), dtype=bool)
if is_remove_duplicate:
selection[1:] = text_index[batch_idx][1:] != text_index[
batch_idx][:-1]
for ignored_token in ignored_tokens:
selection &= text_index[batch_idx] != ignored_token
char_list = [
self.character[text_id]
for text_id in text_index[batch_idx][selection]
]
if text_prob is not None:
conf_list = text_prob[batch_idx][selection]
else:
conf_list = [1] * len(selection)
if len(conf_list) == 0:
conf_list = [0]
text = ''.join(char_list)
result_list.append((text, np.mean(conf_list).tolist()))
return result_list
def get_ignored_tokens(self):
return [0] # for ctc blank
class CTCLabelDecode(BaseRecLabelDecode):
""" Convert between text-label and text-index """
def __init__(self, character_dict_path=None, use_space_char=False,
**kwargs):
super(CTCLabelDecode, self).__init__(character_dict_path,
use_space_char)
def __call__(self, preds, label=None, *args, **kwargs):
if isinstance(preds, tuple) or isinstance(preds, list):
preds = preds[-1]
if isinstance(preds, paddle.Tensor):
preds = preds.numpy()
preds_idx = preds.argmax(axis=2)
preds_prob = preds.max(axis=2)
text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True)
if label is None:
return text
label = self.decode(label)
return text, label
def add_special_char(self, dict_character):
dict_character = ['blank'] + dict_character
return dict_character