更换文档检测模型
This commit is contained in:
231
paddle_detection/deploy/pipeline/ppvehicle/lane_seg_infer.py
Normal file
231
paddle_detection/deploy/pipeline/ppvehicle/lane_seg_infer.py
Normal file
@@ -0,0 +1,231 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import argparse
|
||||
import codecs
|
||||
import os
|
||||
|
||||
import yaml
|
||||
import numpy as np
|
||||
import cv2
|
||||
from sklearn.cluster import DBSCAN
|
||||
from pptracking.python.det_infer import load_predictor
|
||||
|
||||
|
||||
class LaneSegPredictor:
|
||||
def __init__(self, lane_seg_config, model_dir):
|
||||
"""
|
||||
Prepare for prediction.
|
||||
The usage and docs of paddle inference, please refer to
|
||||
https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html
|
||||
"""
|
||||
if not os.path.exists(lane_seg_config):
|
||||
raise ValueError("Cannot find : {},".format(lane_seg_config))
|
||||
|
||||
args = yaml.safe_load(open(lane_seg_config))
|
||||
self.model_dir = model_dir
|
||||
self.args = args[args['type']]
|
||||
|
||||
self.shape = None
|
||||
self.filter_horizontal_flag = self.args['filter_horizontal_flag']
|
||||
self.horizontal_filtration_degree = self.args[
|
||||
'horizontal_filtration_degree']
|
||||
self.horizontal_filtering_threshold = self.args[
|
||||
'horizontal_filtering_threshold']
|
||||
|
||||
try:
|
||||
self.predictor, _ = load_predictor(
|
||||
model_dir=self.model_dir,
|
||||
run_mode=self.args['run_mode'],
|
||||
batch_size=self.args['batch_size'],
|
||||
device=self.args['device'],
|
||||
min_subgraph_size=self.args['min_subgraph_size'],
|
||||
use_dynamic_shape=self.args['use_dynamic_shape'],
|
||||
trt_min_shape=self.args['trt_min_shape'],
|
||||
trt_max_shape=self.args['trt_max_shape'],
|
||||
trt_opt_shape=self.args['trt_opt_shape'],
|
||||
trt_calib_mode=self.args['trt_calib_mode'],
|
||||
cpu_threads=self.args['cpu_threads'],
|
||||
enable_mkldnn=self.args['enable_mkldnn'])
|
||||
except Exception as e:
|
||||
print(str(e))
|
||||
exit()
|
||||
|
||||
def run(self, img):
|
||||
|
||||
input_names = self.predictor.get_input_names()
|
||||
input_handle = self.predictor.get_input_handle(input_names[0])
|
||||
output_names = self.predictor.get_output_names()
|
||||
output_handle = self.predictor.get_output_handle(output_names[0])
|
||||
|
||||
img = np.array(img)
|
||||
self.shape = img.shape[1:3]
|
||||
img = self.normalize(img)
|
||||
img = np.transpose(img, (0, 3, 1, 2))
|
||||
input_handle.reshape(img.shape)
|
||||
input_handle.copy_from_cpu(img)
|
||||
|
||||
self.predictor.run()
|
||||
|
||||
results = output_handle.copy_to_cpu()
|
||||
results = self.postprocess(results)
|
||||
|
||||
return self.get_line(results)
|
||||
|
||||
def normalize(self, im, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)):
|
||||
mean = np.array(mean)[np.newaxis, np.newaxis, :]
|
||||
std = np.array(std)[np.newaxis, np.newaxis, :]
|
||||
im = im.astype(np.float32, copy=False) / 255.0
|
||||
im -= mean
|
||||
im /= std
|
||||
return im
|
||||
|
||||
def postprocess(self, pred):
|
||||
|
||||
pred = np.argmax(pred, axis=1)
|
||||
pred[pred == 3] = 0
|
||||
pred[pred > 0] = 255
|
||||
|
||||
return pred
|
||||
|
||||
def get_line(self, results):
|
||||
lines = []
|
||||
directions = []
|
||||
for i in range(results.shape[0]):
|
||||
line, direction = self.hough_line(np.uint8(results[i]))
|
||||
lines.append(line)
|
||||
directions.append(direction)
|
||||
return lines, directions
|
||||
|
||||
def get_distance(self, array_1, array_2):
|
||||
lon_a = array_1[0]
|
||||
lat_a = array_1[1]
|
||||
lon_b = array_2[0]
|
||||
lat_b = array_2[1]
|
||||
|
||||
s = pow(pow((lat_b - lat_a), 2) + pow((lon_b - lon_a), 2), 0.5)
|
||||
return s
|
||||
|
||||
def get_angle(self, array):
|
||||
import math
|
||||
x1, y1, x2, y2 = array
|
||||
a_x = x2 - x1
|
||||
a_y = y2 - y1
|
||||
angle1 = math.atan2(a_y, a_x)
|
||||
angle1 = int(angle1 * 180 / math.pi)
|
||||
if angle1 > 90:
|
||||
angle1 = 180 - angle1
|
||||
return angle1
|
||||
|
||||
def get_proportion(self, lines):
|
||||
|
||||
proportion = 0.0
|
||||
h, w = self.shape
|
||||
for line in lines:
|
||||
x1, y1, x2, y2 = line
|
||||
length = abs(y2 - y1) / h + abs(x2 - x1) / w
|
||||
proportion = proportion + length
|
||||
|
||||
return proportion
|
||||
|
||||
def line_cluster(self, linesP):
|
||||
|
||||
points = []
|
||||
for i in range(0, len(linesP)):
|
||||
l = linesP[i]
|
||||
x_center = (float(
|
||||
(max(l[2], l[0]) - min(l[2], l[0]))) / 2.0 + min(l[2], l[0]))
|
||||
y_center = (float(
|
||||
(max(l[3], l[1]) - min(l[3], l[1]))) / 2.0 + min(l[3], l[1]))
|
||||
points.append([x_center, y_center])
|
||||
|
||||
dbscan = DBSCAN(
|
||||
eps=50, min_samples=2, metric=self.get_distance).fit(points)
|
||||
|
||||
labels = dbscan.labels_
|
||||
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
|
||||
cluster_list = list([] for i in range(n_clusters_))
|
||||
if linesP is not None:
|
||||
for i in range(0, len(linesP)):
|
||||
if labels[i] == -1:
|
||||
continue
|
||||
l = linesP[i]
|
||||
x1, y1, x2, y2 = l
|
||||
if y2 >= y1:
|
||||
cluster_list[labels[i]].append([x1, y1, x2, y2])
|
||||
else:
|
||||
ll = [x2, y2, x1, y1]
|
||||
cluster_list[labels[i]].append(ll)
|
||||
|
||||
return cluster_list
|
||||
|
||||
def hough_line(self,
|
||||
binary_img,
|
||||
min_line=50,
|
||||
min_line_points=50,
|
||||
max_line_gap=10):
|
||||
linesP = cv2.HoughLinesP(binary_img, 1, np.pi / 180, min_line, None,
|
||||
min_line_points, max_line_gap)
|
||||
if linesP is None:
|
||||
return [], None
|
||||
|
||||
coarse_cluster_list = self.line_cluster(linesP[:, 0])
|
||||
filter_lines_output, direction = self.filter_lines(coarse_cluster_list)
|
||||
|
||||
return filter_lines_output, direction
|
||||
|
||||
def filter_lines(self, coarse_cluster_list):
|
||||
|
||||
lines = []
|
||||
angles = []
|
||||
for i in range(len(coarse_cluster_list)):
|
||||
if len(coarse_cluster_list[i]) == 0:
|
||||
continue
|
||||
coarse_cluster_list[i] = np.array(coarse_cluster_list[i])
|
||||
distance = abs(coarse_cluster_list[i][:, 3] - coarse_cluster_list[i]
|
||||
[:, 1]) + abs(coarse_cluster_list[i][:, 2] -
|
||||
coarse_cluster_list[i][:, 0])
|
||||
l = coarse_cluster_list[i][np.argmax(distance)]
|
||||
angles.append(self.get_angle(l))
|
||||
lines.append(l)
|
||||
|
||||
if len(lines) == 0:
|
||||
return [], None
|
||||
if not self.filter_horizontal_flag:
|
||||
return lines, None
|
||||
|
||||
#filter horizontal roads
|
||||
angles = np.array(angles)
|
||||
|
||||
max_angle, min_angle = np.max(angles), np.min(angles)
|
||||
|
||||
if (max_angle - min_angle) < self.horizontal_filtration_degree:
|
||||
return lines, np.mean(angles)
|
||||
|
||||
thr_angle = (
|
||||
max_angle + min_angle) * self.horizontal_filtering_threshold
|
||||
lines = np.array(lines)
|
||||
|
||||
min_angle_line = lines[np.where(angles < thr_angle)]
|
||||
max_angle_line = lines[np.where(angles >= thr_angle)]
|
||||
|
||||
max_angle_line_pro = self.get_proportion(max_angle_line)
|
||||
min_angle_line_pro = self.get_proportion(min_angle_line)
|
||||
|
||||
if max_angle_line_pro >= min_angle_line_pro:
|
||||
angle_list = angles[np.where(angles >= thr_angle)]
|
||||
return max_angle_line, np.mean(angle_list)
|
||||
else:
|
||||
angle_list = angles[np.where(angles < thr_angle)]
|
||||
return min_angle_line, np.mean(angle_list)
|
||||
6623
paddle_detection/deploy/pipeline/ppvehicle/rec_word_dict.txt
Normal file
6623
paddle_detection/deploy/pipeline/ppvehicle/rec_word_dict.txt
Normal file
File diff suppressed because it is too large
Load Diff
150
paddle_detection/deploy/pipeline/ppvehicle/vehicle_attr.py
Normal file
150
paddle_detection/deploy/pipeline/ppvehicle/vehicle_attr.py
Normal file
@@ -0,0 +1,150 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import yaml
|
||||
import glob
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import math
|
||||
import paddle
|
||||
import sys
|
||||
try:
|
||||
from collections.abc import Sequence
|
||||
except Exception:
|
||||
from collections import Sequence
|
||||
|
||||
# add deploy path of PaddleDetection to sys.path
|
||||
parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 3)))
|
||||
sys.path.insert(0, parent_path)
|
||||
|
||||
from paddle.inference import Config, create_predictor
|
||||
from python.utils import argsparser, Timer, get_current_memory_mb
|
||||
from python.benchmark_utils import PaddleInferBenchmark
|
||||
from python.infer import Detector, print_arguments
|
||||
from pipeline.pphuman.attr_infer import AttrDetector
|
||||
|
||||
|
||||
class VehicleAttr(AttrDetector):
|
||||
"""
|
||||
Args:
|
||||
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
|
||||
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU/NPU, default is CPU
|
||||
run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
|
||||
batch_size (int): size of pre batch in inference
|
||||
trt_min_shape (int): min shape for dynamic shape in trt
|
||||
trt_max_shape (int): max shape for dynamic shape in trt
|
||||
trt_opt_shape (int): opt shape for dynamic shape in trt
|
||||
trt_calib_mode (bool): If the model is produced by TRT offline quantitative
|
||||
calibration, trt_calib_mode need to set True
|
||||
cpu_threads (int): cpu threads
|
||||
enable_mkldnn (bool): whether to open MKLDNN
|
||||
type_threshold (float): The threshold of score for vehicle type recognition.
|
||||
color_threshold (float): The threshold of score for vehicle color recognition.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
model_dir,
|
||||
device='CPU',
|
||||
run_mode='paddle',
|
||||
batch_size=1,
|
||||
trt_min_shape=1,
|
||||
trt_max_shape=1280,
|
||||
trt_opt_shape=640,
|
||||
trt_calib_mode=False,
|
||||
cpu_threads=1,
|
||||
enable_mkldnn=False,
|
||||
output_dir='output',
|
||||
color_threshold=0.5,
|
||||
type_threshold=0.5):
|
||||
super(VehicleAttr, self).__init__(
|
||||
model_dir=model_dir,
|
||||
device=device,
|
||||
run_mode=run_mode,
|
||||
batch_size=batch_size,
|
||||
trt_min_shape=trt_min_shape,
|
||||
trt_max_shape=trt_max_shape,
|
||||
trt_opt_shape=trt_opt_shape,
|
||||
trt_calib_mode=trt_calib_mode,
|
||||
cpu_threads=cpu_threads,
|
||||
enable_mkldnn=enable_mkldnn,
|
||||
output_dir=output_dir)
|
||||
self.color_threshold = color_threshold
|
||||
self.type_threshold = type_threshold
|
||||
self.result_history = {}
|
||||
self.color_list = [
|
||||
"yellow", "orange", "green", "gray", "red", "blue", "white",
|
||||
"golden", "brown", "black"
|
||||
]
|
||||
self.type_list = [
|
||||
"sedan", "suv", "van", "hatchback", "mpv", "pickup", "bus", "truck",
|
||||
"estate"
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def init_with_cfg(cls, args, cfg):
|
||||
return cls(model_dir=cfg['model_dir'],
|
||||
batch_size=cfg['batch_size'],
|
||||
color_threshold=cfg['color_threshold'],
|
||||
type_threshold=cfg['type_threshold'],
|
||||
device=args.device,
|
||||
run_mode=args.run_mode,
|
||||
trt_min_shape=args.trt_min_shape,
|
||||
trt_max_shape=args.trt_max_shape,
|
||||
trt_opt_shape=args.trt_opt_shape,
|
||||
trt_calib_mode=args.trt_calib_mode,
|
||||
cpu_threads=args.cpu_threads,
|
||||
enable_mkldnn=args.enable_mkldnn)
|
||||
|
||||
def postprocess(self, inputs, result):
|
||||
# postprocess output of predictor
|
||||
im_results = result['output']
|
||||
batch_res = []
|
||||
for res in im_results:
|
||||
res = res.tolist()
|
||||
attr_res = []
|
||||
color_res_str = "Color: "
|
||||
type_res_str = "Type: "
|
||||
color_idx = np.argmax(res[:10])
|
||||
type_idx = np.argmax(res[10:])
|
||||
|
||||
if res[color_idx] >= self.color_threshold:
|
||||
color_res_str += self.color_list[color_idx]
|
||||
else:
|
||||
color_res_str += "Unknown"
|
||||
attr_res.append(color_res_str)
|
||||
|
||||
if res[type_idx + 10] >= self.type_threshold:
|
||||
type_res_str += self.type_list[type_idx]
|
||||
else:
|
||||
type_res_str += "Unknown"
|
||||
attr_res.append(type_res_str)
|
||||
|
||||
batch_res.append(attr_res)
|
||||
result = {'output': batch_res}
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
paddle.enable_static()
|
||||
parser = argsparser()
|
||||
FLAGS = parser.parse_args()
|
||||
print_arguments(FLAGS)
|
||||
FLAGS.device = FLAGS.device.upper()
|
||||
assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
|
||||
], "device should be CPU, GPU, NPU or XPU"
|
||||
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
|
||||
|
||||
main()
|
||||
331
paddle_detection/deploy/pipeline/ppvehicle/vehicle_plate.py
Normal file
331
paddle_detection/deploy/pipeline/ppvehicle/vehicle_plate.py
Normal file
@@ -0,0 +1,331 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import yaml
|
||||
import glob
|
||||
from functools import reduce
|
||||
|
||||
import time
|
||||
import cv2
|
||||
import numpy as np
|
||||
import math
|
||||
import paddle
|
||||
|
||||
import sys
|
||||
parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 3)))
|
||||
sys.path.insert(0, parent_path)
|
||||
|
||||
from python.infer import get_test_images
|
||||
from python.preprocess import preprocess, NormalizeImage, Permute, Resize_Mult32
|
||||
from pipeline.ppvehicle.vehicle_plateutils import create_predictor, get_infer_gpuid, get_rotate_crop_image, draw_boxes
|
||||
from pipeline.ppvehicle.vehicleplate_postprocess import build_post_process
|
||||
from pipeline.cfg_utils import merge_cfg, print_arguments, argsparser
|
||||
|
||||
|
||||
class PlateDetector(object):
|
||||
def __init__(self, args, cfg):
|
||||
self.args = args
|
||||
self.pre_process_list = {
|
||||
'Resize_Mult32': {
|
||||
'limit_side_len': cfg['det_limit_side_len'],
|
||||
'limit_type': cfg['det_limit_type'],
|
||||
},
|
||||
'NormalizeImage': {
|
||||
'mean': [0.485, 0.456, 0.406],
|
||||
'std': [0.229, 0.224, 0.225],
|
||||
'is_scale': True,
|
||||
},
|
||||
'Permute': {}
|
||||
}
|
||||
postprocess_params = {}
|
||||
postprocess_params['name'] = 'DBPostProcess'
|
||||
postprocess_params["thresh"] = 0.3
|
||||
postprocess_params["box_thresh"] = 0.6
|
||||
postprocess_params["max_candidates"] = 1000
|
||||
postprocess_params["unclip_ratio"] = 1.5
|
||||
postprocess_params["use_dilation"] = False
|
||||
postprocess_params["score_mode"] = "fast"
|
||||
|
||||
self.postprocess_op = build_post_process(postprocess_params)
|
||||
self.predictor, self.input_tensor, self.output_tensors, self.config = create_predictor(
|
||||
args, cfg, 'det')
|
||||
|
||||
def preprocess(self, im_path):
|
||||
preprocess_ops = []
|
||||
for op_type, new_op_info in self.pre_process_list.items():
|
||||
preprocess_ops.append(eval(op_type)(**new_op_info))
|
||||
|
||||
input_im_lst = []
|
||||
input_im_info_lst = []
|
||||
|
||||
im, im_info = preprocess(im_path, preprocess_ops)
|
||||
input_im_lst.append(im)
|
||||
input_im_info_lst.append(im_info['im_shape'] / im_info['scale_factor'])
|
||||
|
||||
return np.stack(input_im_lst, axis=0), input_im_info_lst
|
||||
|
||||
def order_points_clockwise(self, pts):
|
||||
rect = np.zeros((4, 2), dtype="float32")
|
||||
s = pts.sum(axis=1)
|
||||
rect[0] = pts[np.argmin(s)]
|
||||
rect[2] = pts[np.argmax(s)]
|
||||
diff = np.diff(pts, axis=1)
|
||||
rect[1] = pts[np.argmin(diff)]
|
||||
rect[3] = pts[np.argmax(diff)]
|
||||
return rect
|
||||
|
||||
def clip_det_res(self, points, img_height, img_width):
|
||||
for pno in range(points.shape[0]):
|
||||
points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1))
|
||||
points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1))
|
||||
return points
|
||||
|
||||
def filter_tag_det_res(self, dt_boxes, image_shape):
|
||||
img_height, img_width = image_shape[0:2]
|
||||
dt_boxes_new = []
|
||||
for box in dt_boxes:
|
||||
box = self.order_points_clockwise(box)
|
||||
box = self.clip_det_res(box, img_height, img_width)
|
||||
rect_width = int(np.linalg.norm(box[0] - box[1]))
|
||||
rect_height = int(np.linalg.norm(box[0] - box[3]))
|
||||
if rect_width <= 3 or rect_height <= 3:
|
||||
continue
|
||||
dt_boxes_new.append(box)
|
||||
dt_boxes = np.array(dt_boxes_new)
|
||||
return dt_boxes
|
||||
|
||||
def filter_tag_det_res_only_clip(self, dt_boxes, image_shape):
|
||||
img_height, img_width = image_shape[0:2]
|
||||
dt_boxes_new = []
|
||||
for box in dt_boxes:
|
||||
box = self.clip_det_res(box, img_height, img_width)
|
||||
dt_boxes_new.append(box)
|
||||
dt_boxes = np.array(dt_boxes_new)
|
||||
return dt_boxes
|
||||
|
||||
def predict_image(self, img_list):
|
||||
st = time.time()
|
||||
|
||||
dt_batch_boxes = []
|
||||
for image in img_list:
|
||||
img, shape_list = self.preprocess(image)
|
||||
if img is None:
|
||||
return None, 0
|
||||
self.input_tensor.copy_from_cpu(img)
|
||||
self.predictor.run()
|
||||
outputs = []
|
||||
for output_tensor in self.output_tensors:
|
||||
output = output_tensor.copy_to_cpu()
|
||||
outputs.append(output)
|
||||
|
||||
preds = {}
|
||||
preds['maps'] = outputs[0]
|
||||
|
||||
#self.predictor.try_shrink_memory()
|
||||
post_result = self.postprocess_op(preds, shape_list)
|
||||
# print("post_result length:{}".format(len(post_result)))
|
||||
|
||||
org_shape = image.shape
|
||||
dt_boxes = post_result[0]['points']
|
||||
dt_boxes = self.filter_tag_det_res(dt_boxes, org_shape)
|
||||
dt_batch_boxes.append(dt_boxes)
|
||||
|
||||
et = time.time()
|
||||
return dt_batch_boxes, et - st
|
||||
|
||||
|
||||
class TextRecognizer(object):
|
||||
def __init__(self, args, cfg, use_gpu=True):
|
||||
self.rec_image_shape = cfg['rec_image_shape']
|
||||
self.rec_batch_num = cfg['rec_batch_num']
|
||||
word_dict_path = cfg['word_dict_path']
|
||||
use_space_char = True
|
||||
|
||||
postprocess_params = {
|
||||
'name': 'CTCLabelDecode',
|
||||
"character_dict_path": word_dict_path,
|
||||
"use_space_char": use_space_char
|
||||
}
|
||||
self.postprocess_op = build_post_process(postprocess_params)
|
||||
self.predictor, self.input_tensor, self.output_tensors, self.config = \
|
||||
create_predictor(args, cfg, 'rec')
|
||||
self.use_onnx = False
|
||||
|
||||
def resize_norm_img(self, img, max_wh_ratio):
|
||||
imgC, imgH, imgW = self.rec_image_shape
|
||||
|
||||
assert imgC == img.shape[2]
|
||||
imgW = int((imgH * max_wh_ratio))
|
||||
if self.use_onnx:
|
||||
w = self.input_tensor.shape[3:][0]
|
||||
if w is not None and w > 0:
|
||||
imgW = w
|
||||
|
||||
h, w = img.shape[:2]
|
||||
ratio = w / float(h)
|
||||
if math.ceil(imgH * ratio) > imgW:
|
||||
resized_w = imgW
|
||||
else:
|
||||
resized_w = int(math.ceil(imgH * ratio))
|
||||
resized_image = cv2.resize(img, (resized_w, imgH))
|
||||
resized_image = resized_image.astype('float32')
|
||||
resized_image = resized_image.transpose((2, 0, 1)) / 255
|
||||
resized_image -= 0.5
|
||||
resized_image /= 0.5
|
||||
padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
|
||||
padding_im[:, :, 0:resized_w] = resized_image
|
||||
return padding_im
|
||||
|
||||
def predict_text(self, img_list):
|
||||
img_num = len(img_list)
|
||||
# Calculate the aspect ratio of all text bars
|
||||
width_list = []
|
||||
for img in img_list:
|
||||
width_list.append(img.shape[1] / float(img.shape[0]))
|
||||
# Sorting can speed up the recognition process
|
||||
indices = np.argsort(np.array(width_list))
|
||||
rec_res = [['', 0.0]] * img_num
|
||||
batch_num = self.rec_batch_num
|
||||
st = time.time()
|
||||
for beg_img_no in range(0, img_num, batch_num):
|
||||
end_img_no = min(img_num, beg_img_no + batch_num)
|
||||
norm_img_batch = []
|
||||
imgC, imgH, imgW = self.rec_image_shape
|
||||
max_wh_ratio = imgW / imgH
|
||||
# max_wh_ratio = 0
|
||||
for ino in range(beg_img_no, end_img_no):
|
||||
h, w = img_list[indices[ino]].shape[0:2]
|
||||
wh_ratio = w * 1.0 / h
|
||||
max_wh_ratio = max(max_wh_ratio, wh_ratio)
|
||||
for ino in range(beg_img_no, end_img_no):
|
||||
norm_img = self.resize_norm_img(img_list[indices[ino]],
|
||||
max_wh_ratio)
|
||||
norm_img = norm_img[np.newaxis, :]
|
||||
norm_img_batch.append(norm_img)
|
||||
norm_img_batch = np.concatenate(norm_img_batch)
|
||||
norm_img_batch = norm_img_batch.copy()
|
||||
if self.use_onnx:
|
||||
input_dict = {}
|
||||
input_dict[self.input_tensor.name] = norm_img_batch
|
||||
outputs = self.predictor.run(self.output_tensors, input_dict)
|
||||
preds = outputs[0]
|
||||
else:
|
||||
self.input_tensor.copy_from_cpu(norm_img_batch)
|
||||
self.predictor.run()
|
||||
outputs = []
|
||||
for output_tensor in self.output_tensors:
|
||||
output = output_tensor.copy_to_cpu()
|
||||
outputs.append(output)
|
||||
if len(outputs) != 1:
|
||||
preds = outputs
|
||||
else:
|
||||
preds = outputs[0]
|
||||
rec_result = self.postprocess_op(preds)
|
||||
for rno in range(len(rec_result)):
|
||||
rec_res[indices[beg_img_no + rno]] = rec_result[rno]
|
||||
return rec_res, time.time() - st
|
||||
|
||||
|
||||
class PlateRecognizer(object):
|
||||
def __init__(self, args, cfg):
|
||||
use_gpu = args.device.lower() == "gpu"
|
||||
self.platedetector = PlateDetector(args, cfg)
|
||||
self.textrecognizer = TextRecognizer(args, cfg, use_gpu=use_gpu)
|
||||
|
||||
def get_platelicense(self, image_list):
|
||||
plate_text_list = []
|
||||
plateboxes, det_time = self.platedetector.predict_image(image_list)
|
||||
for idx, boxes_pcar in enumerate(plateboxes):
|
||||
plate_pcar_list = []
|
||||
for box in boxes_pcar:
|
||||
plate_images = get_rotate_crop_image(image_list[idx], box)
|
||||
plate_texts = self.textrecognizer.predict_text([plate_images])
|
||||
plate_pcar_list.append(plate_texts)
|
||||
plate_text_list.append(plate_pcar_list)
|
||||
return self.check_plate(plate_text_list)
|
||||
|
||||
def check_plate(self, text_list):
|
||||
plate_all = {"plate": []}
|
||||
for text_pcar in text_list:
|
||||
platelicense = ""
|
||||
for text_info in text_pcar:
|
||||
text = text_info[0][0][0]
|
||||
if len(text) > 2 and len(text) < 10:
|
||||
platelicense = self.replace_cn_code(text)
|
||||
plate_all["plate"].append(platelicense)
|
||||
return plate_all
|
||||
|
||||
def replace_cn_code(self, text):
|
||||
simcode = {
|
||||
'浙': 'ZJ-',
|
||||
'粤': 'GD-',
|
||||
'京': 'BJ-',
|
||||
'津': 'TJ-',
|
||||
'冀': 'HE-',
|
||||
'晋': 'SX-',
|
||||
'蒙': 'NM-',
|
||||
'辽': 'LN-',
|
||||
'黑': 'HLJ-',
|
||||
'沪': 'SH-',
|
||||
'吉': 'JL-',
|
||||
'苏': 'JS-',
|
||||
'皖': 'AH-',
|
||||
'赣': 'JX-',
|
||||
'鲁': 'SD-',
|
||||
'豫': 'HA-',
|
||||
'鄂': 'HB-',
|
||||
'湘': 'HN-',
|
||||
'桂': 'GX-',
|
||||
'琼': 'HI-',
|
||||
'渝': 'CQ-',
|
||||
'川': 'SC-',
|
||||
'贵': 'GZ-',
|
||||
'云': 'YN-',
|
||||
'藏': 'XZ-',
|
||||
'陕': 'SN-',
|
||||
'甘': 'GS-',
|
||||
'青': 'QH-',
|
||||
'宁': 'NX-',
|
||||
'闽': 'FJ-',
|
||||
'·': ' '
|
||||
}
|
||||
for _char in text:
|
||||
if _char in simcode:
|
||||
text = text.replace(_char, simcode[_char])
|
||||
return text
|
||||
|
||||
|
||||
def main():
|
||||
cfg = merge_cfg(FLAGS)
|
||||
print_arguments(cfg)
|
||||
vehicleplate_cfg = cfg['VEHICLE_PLATE']
|
||||
detector = PlateRecognizer(FLAGS, vehicleplate_cfg)
|
||||
# predict from image
|
||||
img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
|
||||
for img in img_list:
|
||||
image = cv2.imread(img)
|
||||
results = detector.get_platelicense([image])
|
||||
print(results)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
paddle.enable_static()
|
||||
parser = argsparser()
|
||||
FLAGS = parser.parse_args()
|
||||
FLAGS.device = FLAGS.device.upper()
|
||||
assert FLAGS.device in ['CPU', 'GPU', 'XPU', 'NPU'
|
||||
], "device should be CPU, GPU, NPU or XPU"
|
||||
|
||||
main()
|
||||
505
paddle_detection/deploy/pipeline/ppvehicle/vehicle_plateutils.py
Normal file
505
paddle_detection/deploy/pipeline/ppvehicle/vehicle_plateutils.py
Normal file
@@ -0,0 +1,505 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import platform
|
||||
import cv2
|
||||
import numpy as np
|
||||
import paddle
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
import math
|
||||
from paddle import inference
|
||||
import time
|
||||
import ast
|
||||
|
||||
|
||||
def create_predictor(args, cfg, mode):
|
||||
if mode == "det":
|
||||
model_dir = cfg['det_model_dir']
|
||||
else:
|
||||
model_dir = cfg['rec_model_dir']
|
||||
|
||||
if model_dir is None:
|
||||
print("not find {} model file path {}".format(mode, model_dir))
|
||||
sys.exit(0)
|
||||
|
||||
model_file_path = model_dir + "/inference.pdmodel"
|
||||
params_file_path = model_dir + "/inference.pdiparams"
|
||||
if not os.path.exists(model_file_path):
|
||||
raise ValueError("not find model file path {}".format(model_file_path))
|
||||
if not os.path.exists(params_file_path):
|
||||
raise ValueError("not find params file path {}".format(
|
||||
params_file_path))
|
||||
|
||||
config = inference.Config(model_file_path, params_file_path)
|
||||
|
||||
batch_size = 1
|
||||
|
||||
if args.device == "GPU":
|
||||
gpu_id = get_infer_gpuid()
|
||||
if gpu_id is None:
|
||||
print(
|
||||
"GPU is not found in current device by nvidia-smi. Please check your device or ignore it if run on jetson."
|
||||
)
|
||||
config.enable_use_gpu(500, 0)
|
||||
|
||||
precision_map = {
|
||||
'trt_int8': inference.PrecisionType.Int8,
|
||||
'trt_fp32': inference.PrecisionType.Float32,
|
||||
'trt_fp16': inference.PrecisionType.Half
|
||||
}
|
||||
min_subgraph_size = 15
|
||||
if args.run_mode in precision_map.keys():
|
||||
config.enable_tensorrt_engine(
|
||||
workspace_size=(1 << 25) * batch_size,
|
||||
max_batch_size=batch_size,
|
||||
min_subgraph_size=min_subgraph_size,
|
||||
precision_mode=precision_map[args.run_mode])
|
||||
use_dynamic_shape = True
|
||||
|
||||
if mode == "det":
|
||||
min_input_shape = {
|
||||
"x": [1, 3, 50, 50],
|
||||
"conv2d_92.tmp_0": [1, 120, 20, 20],
|
||||
"conv2d_91.tmp_0": [1, 24, 10, 10],
|
||||
"conv2d_59.tmp_0": [1, 96, 20, 20],
|
||||
"nearest_interp_v2_1.tmp_0": [1, 256, 10, 10],
|
||||
"nearest_interp_v2_2.tmp_0": [1, 256, 20, 20],
|
||||
"conv2d_124.tmp_0": [1, 256, 20, 20],
|
||||
"nearest_interp_v2_3.tmp_0": [1, 64, 20, 20],
|
||||
"nearest_interp_v2_4.tmp_0": [1, 64, 20, 20],
|
||||
"nearest_interp_v2_5.tmp_0": [1, 64, 20, 20],
|
||||
"elementwise_add_7": [1, 56, 2, 2],
|
||||
"nearest_interp_v2_0.tmp_0": [1, 256, 2, 2]
|
||||
}
|
||||
max_input_shape = {
|
||||
"x": [1, 3, 1536, 1536],
|
||||
"conv2d_92.tmp_0": [1, 120, 400, 400],
|
||||
"conv2d_91.tmp_0": [1, 24, 200, 200],
|
||||
"conv2d_59.tmp_0": [1, 96, 400, 400],
|
||||
"nearest_interp_v2_1.tmp_0": [1, 256, 200, 200],
|
||||
"conv2d_124.tmp_0": [1, 256, 400, 400],
|
||||
"nearest_interp_v2_2.tmp_0": [1, 256, 400, 400],
|
||||
"nearest_interp_v2_3.tmp_0": [1, 64, 400, 400],
|
||||
"nearest_interp_v2_4.tmp_0": [1, 64, 400, 400],
|
||||
"nearest_interp_v2_5.tmp_0": [1, 64, 400, 400],
|
||||
"elementwise_add_7": [1, 56, 400, 400],
|
||||
"nearest_interp_v2_0.tmp_0": [1, 256, 400, 400]
|
||||
}
|
||||
opt_input_shape = {
|
||||
"x": [1, 3, 640, 640],
|
||||
"conv2d_92.tmp_0": [1, 120, 160, 160],
|
||||
"conv2d_91.tmp_0": [1, 24, 80, 80],
|
||||
"conv2d_59.tmp_0": [1, 96, 160, 160],
|
||||
"nearest_interp_v2_1.tmp_0": [1, 256, 80, 80],
|
||||
"nearest_interp_v2_2.tmp_0": [1, 256, 160, 160],
|
||||
"conv2d_124.tmp_0": [1, 256, 160, 160],
|
||||
"nearest_interp_v2_3.tmp_0": [1, 64, 160, 160],
|
||||
"nearest_interp_v2_4.tmp_0": [1, 64, 160, 160],
|
||||
"nearest_interp_v2_5.tmp_0": [1, 64, 160, 160],
|
||||
"elementwise_add_7": [1, 56, 40, 40],
|
||||
"nearest_interp_v2_0.tmp_0": [1, 256, 40, 40]
|
||||
}
|
||||
min_pact_shape = {
|
||||
"nearest_interp_v2_26.tmp_0": [1, 256, 20, 20],
|
||||
"nearest_interp_v2_27.tmp_0": [1, 64, 20, 20],
|
||||
"nearest_interp_v2_28.tmp_0": [1, 64, 20, 20],
|
||||
"nearest_interp_v2_29.tmp_0": [1, 64, 20, 20]
|
||||
}
|
||||
max_pact_shape = {
|
||||
"nearest_interp_v2_26.tmp_0": [1, 256, 400, 400],
|
||||
"nearest_interp_v2_27.tmp_0": [1, 64, 400, 400],
|
||||
"nearest_interp_v2_28.tmp_0": [1, 64, 400, 400],
|
||||
"nearest_interp_v2_29.tmp_0": [1, 64, 400, 400]
|
||||
}
|
||||
opt_pact_shape = {
|
||||
"nearest_interp_v2_26.tmp_0": [1, 256, 160, 160],
|
||||
"nearest_interp_v2_27.tmp_0": [1, 64, 160, 160],
|
||||
"nearest_interp_v2_28.tmp_0": [1, 64, 160, 160],
|
||||
"nearest_interp_v2_29.tmp_0": [1, 64, 160, 160]
|
||||
}
|
||||
min_input_shape.update(min_pact_shape)
|
||||
max_input_shape.update(max_pact_shape)
|
||||
opt_input_shape.update(opt_pact_shape)
|
||||
elif mode == "rec":
|
||||
imgH = int(cfg['rec_image_shape'][-2])
|
||||
min_input_shape = {"x": [1, 3, imgH, 10]}
|
||||
max_input_shape = {"x": [batch_size, 3, imgH, 2304]}
|
||||
opt_input_shape = {"x": [batch_size, 3, imgH, 320]}
|
||||
config.exp_disable_tensorrt_ops(["transpose2"])
|
||||
elif mode == "cls":
|
||||
min_input_shape = {"x": [1, 3, 48, 10]}
|
||||
max_input_shape = {"x": [batch_size, 3, 48, 1024]}
|
||||
opt_input_shape = {"x": [batch_size, 3, 48, 320]}
|
||||
else:
|
||||
use_dynamic_shape = False
|
||||
if use_dynamic_shape:
|
||||
config.set_trt_dynamic_shape_info(
|
||||
min_input_shape, max_input_shape, opt_input_shape)
|
||||
|
||||
else:
|
||||
config.disable_gpu()
|
||||
if hasattr(args, "cpu_threads"):
|
||||
config.set_cpu_math_library_num_threads(args.cpu_threads)
|
||||
else:
|
||||
# default cpu threads as 10
|
||||
config.set_cpu_math_library_num_threads(10)
|
||||
if args.enable_mkldnn:
|
||||
# cache 10 different shapes for mkldnn to avoid memory leak
|
||||
config.set_mkldnn_cache_capacity(10)
|
||||
config.enable_mkldnn()
|
||||
if args.run_mode == "fp16":
|
||||
config.enable_mkldnn_bfloat16()
|
||||
# enable memory optim
|
||||
config.enable_memory_optim()
|
||||
config.disable_glog_info()
|
||||
config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
|
||||
config.delete_pass("matmul_transpose_reshape_fuse_pass")
|
||||
if mode == 'table':
|
||||
config.delete_pass("fc_fuse_pass") # not supported for table
|
||||
config.switch_use_feed_fetch_ops(False)
|
||||
config.switch_ir_optim(True)
|
||||
|
||||
# create predictor
|
||||
predictor = inference.create_predictor(config)
|
||||
input_names = predictor.get_input_names()
|
||||
for name in input_names:
|
||||
input_tensor = predictor.get_input_handle(name)
|
||||
output_tensors = get_output_tensors(cfg, mode, predictor)
|
||||
return predictor, input_tensor, output_tensors, config
|
||||
|
||||
|
||||
def get_output_tensors(cfg, mode, predictor):
|
||||
output_names = predictor.get_output_names()
|
||||
output_tensors = []
|
||||
output_name = 'softmax_0.tmp_0'
|
||||
if output_name in output_names:
|
||||
return [predictor.get_output_handle(output_name)]
|
||||
else:
|
||||
for output_name in output_names:
|
||||
output_tensor = predictor.get_output_handle(output_name)
|
||||
output_tensors.append(output_tensor)
|
||||
return output_tensors
|
||||
|
||||
|
||||
def get_infer_gpuid():
|
||||
sysstr = platform.system()
|
||||
if sysstr == "Windows":
|
||||
return 0
|
||||
|
||||
if not paddle.device.is_compiled_with_rocm():
|
||||
cmd = "env | grep CUDA_VISIBLE_DEVICES"
|
||||
else:
|
||||
cmd = "env | grep HIP_VISIBLE_DEVICES"
|
||||
env_cuda = os.popen(cmd).readlines()
|
||||
if len(env_cuda) == 0:
|
||||
return 0
|
||||
else:
|
||||
gpu_id = env_cuda[0].strip().split("=")[1]
|
||||
return int(gpu_id[0])
|
||||
|
||||
|
||||
def draw_e2e_res(dt_boxes, strs, img_path):
|
||||
src_im = cv2.imread(img_path)
|
||||
for box, str in zip(dt_boxes, strs):
|
||||
box = box.astype(np.int32).reshape((-1, 1, 2))
|
||||
cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
|
||||
cv2.putText(
|
||||
src_im,
|
||||
str,
|
||||
org=(int(box[0, 0, 0]), int(box[0, 0, 1])),
|
||||
fontFace=cv2.FONT_HERSHEY_COMPLEX,
|
||||
fontScale=0.7,
|
||||
color=(0, 255, 0),
|
||||
thickness=1)
|
||||
return src_im
|
||||
|
||||
|
||||
def draw_text_det_res(dt_boxes, img_path):
|
||||
src_im = cv2.imread(img_path)
|
||||
for box in dt_boxes:
|
||||
box = np.array(box).astype(np.int32).reshape(-1, 2)
|
||||
cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
|
||||
return src_im
|
||||
|
||||
|
||||
def resize_img(img, input_size=600):
|
||||
"""
|
||||
resize img and limit the longest side of the image to input_size
|
||||
"""
|
||||
img = np.array(img)
|
||||
im_shape = img.shape
|
||||
im_size_max = np.max(im_shape[0:2])
|
||||
im_scale = float(input_size) / float(im_size_max)
|
||||
img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale)
|
||||
return img
|
||||
|
||||
|
||||
def draw_ocr(image,
|
||||
boxes,
|
||||
txts=None,
|
||||
scores=None,
|
||||
drop_score=0.5,
|
||||
font_path="./doc/fonts/simfang.ttf"):
|
||||
"""
|
||||
Visualize the results of OCR detection and recognition
|
||||
args:
|
||||
image(Image|array): RGB image
|
||||
boxes(list): boxes with shape(N, 4, 2)
|
||||
txts(list): the texts
|
||||
scores(list): txxs corresponding scores
|
||||
drop_score(float): only scores greater than drop_threshold will be visualized
|
||||
font_path: the path of font which is used to draw text
|
||||
return(array):
|
||||
the visualized img
|
||||
"""
|
||||
if scores is None:
|
||||
scores = [1] * len(boxes)
|
||||
box_num = len(boxes)
|
||||
for i in range(box_num):
|
||||
if scores is not None and (scores[i] < drop_score or
|
||||
math.isnan(scores[i])):
|
||||
continue
|
||||
box = np.reshape(np.array(boxes[i]), [-1, 1, 2]).astype(np.int64)
|
||||
image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2)
|
||||
if txts is not None:
|
||||
img = np.array(resize_img(image, input_size=600))
|
||||
txt_img = text_visual(
|
||||
txts,
|
||||
scores,
|
||||
img_h=img.shape[0],
|
||||
img_w=600,
|
||||
threshold=drop_score,
|
||||
font_path=font_path)
|
||||
img = np.concatenate([np.array(img), np.array(txt_img)], axis=1)
|
||||
return img
|
||||
return image
|
||||
|
||||
|
||||
def draw_ocr_box_txt(image,
|
||||
boxes,
|
||||
txts,
|
||||
scores=None,
|
||||
drop_score=0.5,
|
||||
font_path="./doc/simfang.ttf"):
|
||||
h, w = image.height, image.width
|
||||
img_left = image.copy()
|
||||
img_right = Image.new('RGB', (w, h), (255, 255, 255))
|
||||
|
||||
import random
|
||||
|
||||
random.seed(0)
|
||||
draw_left = ImageDraw.Draw(img_left)
|
||||
draw_right = ImageDraw.Draw(img_right)
|
||||
for idx, (box, txt) in enumerate(zip(boxes, txts)):
|
||||
if scores is not None and scores[idx] < drop_score:
|
||||
continue
|
||||
color = (random.randint(0, 255), random.randint(0, 255),
|
||||
random.randint(0, 255))
|
||||
draw_left.polygon(box, fill=color)
|
||||
draw_right.polygon(
|
||||
[
|
||||
box[0][0], box[0][1], box[1][0], box[1][1], box[2][0],
|
||||
box[2][1], box[3][0], box[3][1]
|
||||
],
|
||||
outline=color)
|
||||
box_height = math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][
|
||||
1])**2)
|
||||
box_width = math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][
|
||||
1])**2)
|
||||
if box_height > 2 * box_width:
|
||||
font_size = max(int(box_width * 0.9), 10)
|
||||
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
|
||||
cur_y = box[0][1]
|
||||
for c in txt:
|
||||
char_size = font.getsize(c)
|
||||
draw_right.text(
|
||||
(box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font)
|
||||
cur_y += char_size[1]
|
||||
else:
|
||||
font_size = max(int(box_height * 0.8), 10)
|
||||
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
|
||||
draw_right.text(
|
||||
[box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
|
||||
img_left = Image.blend(image, img_left, 0.5)
|
||||
img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
|
||||
img_show.paste(img_left, (0, 0, w, h))
|
||||
img_show.paste(img_right, (w, 0, w * 2, h))
|
||||
return np.array(img_show)
|
||||
|
||||
|
||||
def str_count(s):
|
||||
"""
|
||||
Count the number of Chinese characters,
|
||||
a single English character and a single number
|
||||
equal to half the length of Chinese characters.
|
||||
args:
|
||||
s(string): the input of string
|
||||
return(int):
|
||||
the number of Chinese characters
|
||||
"""
|
||||
import string
|
||||
count_zh = count_pu = 0
|
||||
s_len = len(s)
|
||||
en_dg_count = 0
|
||||
for c in s:
|
||||
if c in string.ascii_letters or c.isdigit() or c.isspace():
|
||||
en_dg_count += 1
|
||||
elif c.isalpha():
|
||||
count_zh += 1
|
||||
else:
|
||||
count_pu += 1
|
||||
return s_len - math.ceil(en_dg_count / 2)
|
||||
|
||||
|
||||
def text_visual(texts,
|
||||
scores,
|
||||
img_h=400,
|
||||
img_w=600,
|
||||
threshold=0.,
|
||||
font_path="./doc/simfang.ttf"):
|
||||
"""
|
||||
create new blank img and draw txt on it
|
||||
args:
|
||||
texts(list): the text will be draw
|
||||
scores(list|None): corresponding score of each txt
|
||||
img_h(int): the height of blank img
|
||||
img_w(int): the width of blank img
|
||||
font_path: the path of font which is used to draw text
|
||||
return(array):
|
||||
"""
|
||||
if scores is not None:
|
||||
assert len(texts) == len(
|
||||
scores), "The number of txts and corresponding scores must match"
|
||||
|
||||
def create_blank_img():
|
||||
blank_img = np.ones(shape=[img_h, img_w], dtype=np.int8) * 255
|
||||
blank_img[:, img_w - 1:] = 0
|
||||
blank_img = Image.fromarray(blank_img).convert("RGB")
|
||||
draw_txt = ImageDraw.Draw(blank_img)
|
||||
return blank_img, draw_txt
|
||||
|
||||
blank_img, draw_txt = create_blank_img()
|
||||
|
||||
font_size = 20
|
||||
txt_color = (0, 0, 0)
|
||||
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
|
||||
|
||||
gap = font_size + 5
|
||||
txt_img_list = []
|
||||
count, index = 1, 0
|
||||
for idx, txt in enumerate(texts):
|
||||
index += 1
|
||||
if scores[idx] < threshold or math.isnan(scores[idx]):
|
||||
index -= 1
|
||||
continue
|
||||
first_line = True
|
||||
while str_count(txt) >= img_w // font_size - 4:
|
||||
tmp = txt
|
||||
txt = tmp[:img_w // font_size - 4]
|
||||
if first_line:
|
||||
new_txt = str(index) + ': ' + txt
|
||||
first_line = False
|
||||
else:
|
||||
new_txt = ' ' + txt
|
||||
draw_txt.text((0, gap * count), new_txt, txt_color, font=font)
|
||||
txt = tmp[img_w // font_size - 4:]
|
||||
if count >= img_h // gap - 1:
|
||||
txt_img_list.append(np.array(blank_img))
|
||||
blank_img, draw_txt = create_blank_img()
|
||||
count = 0
|
||||
count += 1
|
||||
if first_line:
|
||||
new_txt = str(index) + ': ' + txt + ' ' + '%.3f' % (scores[idx])
|
||||
else:
|
||||
new_txt = " " + txt + " " + '%.3f' % (scores[idx])
|
||||
draw_txt.text((0, gap * count), new_txt, txt_color, font=font)
|
||||
# whether add new blank img or not
|
||||
if count >= img_h // gap - 1 and idx + 1 < len(texts):
|
||||
txt_img_list.append(np.array(blank_img))
|
||||
blank_img, draw_txt = create_blank_img()
|
||||
count = 0
|
||||
count += 1
|
||||
txt_img_list.append(np.array(blank_img))
|
||||
if len(txt_img_list) == 1:
|
||||
blank_img = np.array(txt_img_list[0])
|
||||
else:
|
||||
blank_img = np.concatenate(txt_img_list, axis=1)
|
||||
return np.array(blank_img)
|
||||
|
||||
|
||||
def base64_to_cv2(b64str):
|
||||
import base64
|
||||
data = base64.b64decode(b64str.encode('utf8'))
|
||||
data = np.fromstring(data, np.uint8)
|
||||
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
|
||||
return data
|
||||
|
||||
|
||||
def draw_boxes(image, boxes, scores=None, drop_score=0.5):
|
||||
if scores is None:
|
||||
scores = [1] * len(boxes)
|
||||
for (box, score) in zip(boxes, scores):
|
||||
if score < drop_score:
|
||||
continue
|
||||
box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64)
|
||||
image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2)
|
||||
return image
|
||||
|
||||
|
||||
def get_rotate_crop_image(img, points):
|
||||
'''
|
||||
img_height, img_width = img.shape[0:2]
|
||||
left = int(np.min(points[:, 0]))
|
||||
right = int(np.max(points[:, 0]))
|
||||
top = int(np.min(points[:, 1]))
|
||||
bottom = int(np.max(points[:, 1]))
|
||||
img_crop = img[top:bottom, left:right, :].copy()
|
||||
points[:, 0] = points[:, 0] - left
|
||||
points[:, 1] = points[:, 1] - top
|
||||
'''
|
||||
assert len(points) == 4, "shape of points must be 4*2"
|
||||
img_crop_width = int(
|
||||
max(
|
||||
np.linalg.norm(points[0] - points[1]),
|
||||
np.linalg.norm(points[2] - points[3])))
|
||||
img_crop_height = int(
|
||||
max(
|
||||
np.linalg.norm(points[0] - points[3]),
|
||||
np.linalg.norm(points[1] - points[2])))
|
||||
pts_std = np.float32([[0, 0], [img_crop_width, 0],
|
||||
[img_crop_width, img_crop_height],
|
||||
[0, img_crop_height]])
|
||||
M = cv2.getPerspectiveTransform(points, pts_std)
|
||||
dst_img = cv2.warpPerspective(
|
||||
img,
|
||||
M, (img_crop_width, img_crop_height),
|
||||
borderMode=cv2.BORDER_REPLICATE,
|
||||
flags=cv2.INTER_CUBIC)
|
||||
dst_img_height, dst_img_width = dst_img.shape[0:2]
|
||||
if dst_img_height * 1.0 / dst_img_width >= 1.5:
|
||||
dst_img = np.rot90(dst_img)
|
||||
return dst_img
|
||||
|
||||
|
||||
def check_gpu(use_gpu):
|
||||
if use_gpu and not paddle.is_compiled_with_cuda():
|
||||
use_gpu = False
|
||||
return use_gpu
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pass
|
||||
@@ -0,0 +1,81 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import math
|
||||
|
||||
|
||||
class VehiclePressingRecognizer(object):
|
||||
def __init__(self, cfg):
|
||||
self.cfg = cfg
|
||||
|
||||
def judge(self, Ax1, Ay1, Ax2, Ay2, Bx1, By1, Bx2, By2):
|
||||
|
||||
if (max(Ax1,Ax2)>=min(Bx1,Bx2) and min(Ax1,Ax2)<=max(Bx1,Bx2)) and \
|
||||
(max(Ay1,Ay2)>=min(By1,By2) and min(Ay1,Ay2)<=max(By1,By2)):
|
||||
|
||||
if ((Bx1-Ax1)*(Ay2-Ay1)-(By1-Ay1)*(Ax2-Ax1)) * ((Bx2-Ax1)*(Ay2-Ay1)-(By2-Ay1)*(Ax2-Ax1))<=0 \
|
||||
and ((Ax1-Bx1)*(By2-By1)-(Ay1-By1)*(Bx2-Bx1)) * ((Ax2-Bx1)*(By2-By1)-(Ay2-By1)*(Bx2-Bx1)) <=0:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
|
||||
def is_intersect(self, line, bbox):
|
||||
Ax1, Ay1, Ax2, Ay2 = line
|
||||
|
||||
xmin, ymin, xmax, ymax = bbox
|
||||
|
||||
bottom = self.judge(Ax1, Ay1, Ax2, Ay2, xmin, ymax, xmax, ymax)
|
||||
return bottom
|
||||
|
||||
def run(self, lanes, det_res):
|
||||
intersect_bbox_list = []
|
||||
start_idx, boxes_num_i = 0, 0
|
||||
|
||||
for i in range(len(lanes)):
|
||||
lane = lanes[i]
|
||||
if det_res is not None:
|
||||
det_res_i = {}
|
||||
boxes_num_i = det_res['boxes_num'][i]
|
||||
det_res_i['boxes'] = det_res['boxes'][start_idx:start_idx +
|
||||
boxes_num_i, :]
|
||||
intersect_bbox = []
|
||||
|
||||
for line in lane:
|
||||
for bbox in det_res_i['boxes']:
|
||||
if self.is_intersect(line, bbox[2:]):
|
||||
intersect_bbox.append(bbox)
|
||||
intersect_bbox_list.append(intersect_bbox)
|
||||
|
||||
start_idx += boxes_num_i
|
||||
|
||||
return intersect_bbox_list
|
||||
|
||||
def mot_run(self, lanes, det_res):
|
||||
|
||||
intersect_bbox_list = []
|
||||
if det_res is None:
|
||||
return intersect_bbox_list
|
||||
lanes_res = lanes['output']
|
||||
for i in range(len(lanes_res)):
|
||||
lane = lanes_res[i]
|
||||
for line in lane:
|
||||
for bbox in det_res:
|
||||
if self.is_intersect(line, bbox[3:]):
|
||||
intersect_bbox_list.append(bbox)
|
||||
return intersect_bbox_list
|
||||
320
paddle_detection/deploy/pipeline/ppvehicle/vehicle_retrograde.py
Normal file
320
paddle_detection/deploy/pipeline/ppvehicle/vehicle_retrograde.py
Normal file
@@ -0,0 +1,320 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
import math
|
||||
|
||||
|
||||
class VehicleRetrogradeRecognizer(object):
|
||||
def __init__(self, cfg):
|
||||
self.cfg = cfg
|
||||
self.filter_horizontal_flag = self.cfg['filter_horizontal_flag']
|
||||
self.deviation = self.cfg['deviation']
|
||||
self.move_scale = self.cfg['move_scale']
|
||||
self.keep_right_flag = self.cfg['keep_right_flag']
|
||||
self.center_traj_retrograde = [{}] #retrograde recognizer record use
|
||||
self.fence_line = None if len(self.cfg[
|
||||
'fence_line']) == 0 else self.cfg['fence_line']
|
||||
|
||||
def update_center_traj(self, mot_res, max_len):
|
||||
from collections import deque, defaultdict
|
||||
if mot_res is not None:
|
||||
ids = mot_res['boxes'][:, 0]
|
||||
scores = mot_res['boxes'][:, 2]
|
||||
boxes = mot_res['boxes'][:, 3:]
|
||||
boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
|
||||
boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
|
||||
else:
|
||||
boxes = np.zeros([0, 4])
|
||||
ids = np.zeros([0])
|
||||
scores = np.zeros([0])
|
||||
|
||||
# single class, still need to be defaultdict type for ploting
|
||||
num_classes = 1
|
||||
online_tlwhs = defaultdict(list)
|
||||
online_scores = defaultdict(list)
|
||||
online_ids = defaultdict(list)
|
||||
online_tlwhs[0] = boxes
|
||||
online_ids[0] = ids
|
||||
|
||||
if mot_res is not None:
|
||||
for cls_id in range(num_classes):
|
||||
tlwhs = online_tlwhs[cls_id]
|
||||
obj_ids = online_ids[cls_id]
|
||||
for i, tlwh in enumerate(tlwhs):
|
||||
x1, y1, w, h = tlwh
|
||||
center = tuple(map(int, (x1 + w / 2., y1 + h)))
|
||||
obj_id = int(obj_ids[i])
|
||||
if self.center_traj_retrograde is not None:
|
||||
if obj_id not in self.center_traj_retrograde[cls_id]:
|
||||
self.center_traj_retrograde[cls_id][obj_id] = deque(
|
||||
maxlen=max_len)
|
||||
self.center_traj_retrograde[cls_id][obj_id].append(
|
||||
center)
|
||||
|
||||
def get_angle(self, array):
|
||||
|
||||
x1, y1, x2, y2 = array
|
||||
a_x = x2 - x1
|
||||
a_y = y2 - y1
|
||||
angle1 = math.atan2(a_y, a_x)
|
||||
angle1 = int(angle1 * 180 / math.pi)
|
||||
|
||||
a_x = x2 - x1 if y2 >= y1 else x1 - x2
|
||||
a_y = y2 - y1 if y2 >= y1 else y1 - y2
|
||||
angle2 = math.atan2(a_y, a_x)
|
||||
angle2 = int(angle2 * 180 / math.pi)
|
||||
if angle2 > 90:
|
||||
angle2 = 180 - angle2
|
||||
|
||||
return angle1, angle2
|
||||
|
||||
def is_move(self, array, frame_shape):
|
||||
x1, y1, x2, y2 = array
|
||||
h, w, _ = frame_shape
|
||||
|
||||
if abs(x1 - x2) > w * self.move_scale or abs(y1 -
|
||||
y2) > h * self.move_scale:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def get_distance_point2line(self, point, line):
|
||||
|
||||
line_point1, line_point2 = np.array(line[0:2]), np.array(line[2:])
|
||||
vec1 = line_point1 - point
|
||||
vec2 = line_point2 - point
|
||||
distance = np.abs(np.cross(vec1, vec2)) / np.linalg.norm(line_point1 -
|
||||
line_point2)
|
||||
|
||||
return distance
|
||||
|
||||
def driving_direction(self, line1, line2, is_init=False):
|
||||
x1, y1 = line1[2] - line1[0], line1[3] - line1[1]
|
||||
x2, y2 = line2[0] - line1[0], line2[1] - line1[1]
|
||||
result = x1 * y2 - x2 * y1
|
||||
|
||||
distance = self.get_distance_point2line([x2, y2], line1)
|
||||
|
||||
if result < 0:
|
||||
result = 1
|
||||
elif result == 0:
|
||||
if line2[3] >= line2[1]:
|
||||
return -1
|
||||
else:
|
||||
return 1
|
||||
else:
|
||||
result = -1
|
||||
|
||||
return result, distance
|
||||
|
||||
def get_long_fence_line(self, h, w, line):
|
||||
|
||||
x1, y1, x2, y2 = line
|
||||
if x1 == x2:
|
||||
return [x1, 0, x1, h]
|
||||
if y1 == y2:
|
||||
return [0, y1, w, y1]
|
||||
k = (y2 - y1) / (x2 - x1)
|
||||
b = y1 - k * x1
|
||||
|
||||
if k == 1 and b == 0:
|
||||
return [0, 0, w, h]
|
||||
if k == -1 and b == 0:
|
||||
return [w, 0, h, h]
|
||||
|
||||
top = [-b / k, 0]
|
||||
left = [0, b]
|
||||
right = [w, k * w + b]
|
||||
bottom = [(h - b) / k, h]
|
||||
candidate = np.array([top, left, right, bottom])
|
||||
|
||||
flag = np.array([0, 0, 0, 0])
|
||||
|
||||
if top[0] >= 0 and top[0] <= w:
|
||||
flag[0] = 1
|
||||
if left[1] > 0 and left[1] <= h:
|
||||
flag[1] = 1
|
||||
if right[1] > 0 and right[1] <= h:
|
||||
flag[2] = 1
|
||||
if bottom[0] > 0 and bottom[0] < w:
|
||||
flag[3] = 1
|
||||
|
||||
ind = np.where(flag == 1)
|
||||
candidate = candidate[ind]
|
||||
candidate_sort = candidate[candidate[:, 1].argsort()]
|
||||
|
||||
return [
|
||||
int(candidate_sort[0][0]), int(candidate_sort[0][1]),
|
||||
int(candidate_sort[1][0]), int(candidate_sort[1][1])
|
||||
]
|
||||
|
||||
def init_fence_line(self, lanes, pos_dir_traj, neg_dir_traj, frame_shape):
|
||||
|
||||
fence_lines_candidate = None
|
||||
h, w, _ = frame_shape
|
||||
abs_distance = h * h + w * w
|
||||
|
||||
for lane in lanes[0]:
|
||||
pos_dir_distansce = h * h + w * w
|
||||
neg_dir_distansce = h * h + w * w
|
||||
pos_dir = 0
|
||||
neg_dir = 0
|
||||
|
||||
for traj_line in pos_dir_traj:
|
||||
dir_result, distansce = self.driving_direction(
|
||||
lane, traj_line['traj_line'])
|
||||
if dir_result > 0:
|
||||
pos_dir_distansce = distansce if distansce < pos_dir_distansce else pos_dir_distansce
|
||||
pos_dir = 1
|
||||
else:
|
||||
neg_dir_distansce = distansce if distansce < neg_dir_distansce else neg_dir_distansce
|
||||
neg_dir = 1
|
||||
|
||||
if pos_dir > 0 and neg_dir > 0:
|
||||
continue
|
||||
|
||||
for traj_line in neg_dir_traj:
|
||||
|
||||
dir_result, distansce = self.driving_direction(
|
||||
lane, traj_line['traj_line'])
|
||||
|
||||
if dir_result > 0:
|
||||
pos_dir_distansce = distansce if distansce < pos_dir_distansce else pos_dir_distansce
|
||||
pos_dir = 1
|
||||
else:
|
||||
neg_dir_distansce = distansce if distansce < neg_dir_distansce else neg_dir_distansce
|
||||
neg_dir = 1
|
||||
|
||||
if pos_dir > 0 and neg_dir > 0:
|
||||
diff_dir_distance = abs(pos_dir_distansce - neg_dir_distansce)
|
||||
if diff_dir_distance < abs_distance:
|
||||
fence_lines_candidate = lane
|
||||
abs_distance = diff_dir_distance
|
||||
|
||||
if fence_lines_candidate is None:
|
||||
return None
|
||||
|
||||
fence_lines_candidate = self.get_long_fence_line(h, w,
|
||||
fence_lines_candidate)
|
||||
|
||||
return fence_lines_candidate
|
||||
|
||||
def judge_retrograde(self, traj_line):
|
||||
|
||||
line1 = self.fence_line
|
||||
x1, y1 = line1[2] - line1[0], line1[3] - line1[1]
|
||||
|
||||
line2 = traj_line['traj_line']
|
||||
x2_start_point, y2_start_point = line2[0] - line1[0], line2[1] - line1[
|
||||
1]
|
||||
x2_end_point, y2_end_point = line2[2] - line1[0], line2[3] - line1[1]
|
||||
|
||||
start_point_dir = x1 * y2_start_point - x2_start_point * y1
|
||||
end_point_dir = x1 * y2_end_point - x2_end_point * y1
|
||||
|
||||
if start_point_dir < 0:
|
||||
start_point_dir = 1
|
||||
|
||||
elif start_point_dir == 0:
|
||||
if line2[3] >= line2[1]:
|
||||
start_point_dir = -1
|
||||
else:
|
||||
start_point_dir = 1
|
||||
else:
|
||||
start_point_dir = -1
|
||||
|
||||
if end_point_dir < 0:
|
||||
end_point_dir = 1
|
||||
|
||||
elif end_point_dir == 0:
|
||||
if line2[3] >= line2[1]:
|
||||
end_point_dir = -1
|
||||
else:
|
||||
end_point_dir = 1
|
||||
else:
|
||||
end_point_dir = -1
|
||||
|
||||
if self.keep_right_flag:
|
||||
driver_dir = -1 if (line2[3] - line2[1]) >= 0 else 1
|
||||
else:
|
||||
driver_dir = -1 if (line2[3] - line2[1]) <= 0 else 1
|
||||
|
||||
return start_point_dir == driver_dir and start_point_dir == end_point_dir
|
||||
|
||||
def mot_run(self, lanes_res, det_res, frame_shape):
|
||||
|
||||
det = det_res['boxes']
|
||||
directions = lanes_res['directions']
|
||||
lanes = lanes_res['output']
|
||||
if len(directions) > 0:
|
||||
direction = directions[0]
|
||||
else:
|
||||
return [], self.fence_line
|
||||
|
||||
if len(det) == 0:
|
||||
return [], self.fence_line
|
||||
|
||||
traj_lines = []
|
||||
pos_dir_traj = []
|
||||
neg_dir_traj = []
|
||||
for i in range(len(det)):
|
||||
class_id = int(det[i][1])
|
||||
mot_id = int(det[i][0])
|
||||
traj_i = self.center_traj_retrograde[class_id][mot_id]
|
||||
if len(traj_i) < 2:
|
||||
continue
|
||||
|
||||
traj_line = {
|
||||
'index': i,
|
||||
'mot_id': mot_id,
|
||||
'traj_line':
|
||||
[traj_i[0][0], traj_i[0][1], traj_i[-1][0], traj_i[-1][1]]
|
||||
}
|
||||
|
||||
if not self.is_move(traj_line['traj_line'], frame_shape):
|
||||
continue
|
||||
angle, angle_deviation = self.get_angle(traj_line['traj_line'])
|
||||
if direction is not None and self.filter_horizontal_flag:
|
||||
if abs(angle_deviation - direction) > self.deviation:
|
||||
continue
|
||||
|
||||
traj_line['angle'] = angle
|
||||
traj_lines.append(traj_line)
|
||||
|
||||
if self.fence_line is None:
|
||||
if angle >= 0:
|
||||
pos_dir_traj.append(traj_line)
|
||||
else:
|
||||
neg_dir_traj.append(traj_line)
|
||||
|
||||
if len(traj_lines) == 0:
|
||||
return [], self.fence_line
|
||||
|
||||
if self.fence_line is None:
|
||||
|
||||
if len(pos_dir_traj) < 1 or len(neg_dir_traj) < 1:
|
||||
return [], None
|
||||
|
||||
self.fence_line = self.init_fence_line(lanes, pos_dir_traj,
|
||||
neg_dir_traj, frame_shape)
|
||||
return [], self.fence_line
|
||||
|
||||
else:
|
||||
retrograde_list = []
|
||||
for traj_line in traj_lines:
|
||||
if self.judge_retrograde(traj_line) == False:
|
||||
retrograde_list.append(det[traj_line['index']][0])
|
||||
|
||||
return retrograde_list, self.fence_line
|
||||
@@ -0,0 +1,296 @@
|
||||
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
from paddle.nn import functional as F
|
||||
import re
|
||||
from shapely.geometry import Polygon
|
||||
import cv2
|
||||
import copy
|
||||
|
||||
|
||||
def build_post_process(config, global_config=None):
|
||||
support_dict = ['DBPostProcess', 'CTCLabelDecode']
|
||||
|
||||
config = copy.deepcopy(config)
|
||||
module_name = config.pop('name')
|
||||
if module_name == "None":
|
||||
return
|
||||
if global_config is not None:
|
||||
config.update(global_config)
|
||||
assert module_name in support_dict, Exception(
|
||||
'post process only support {}'.format(support_dict))
|
||||
module_class = eval(module_name)(**config)
|
||||
return module_class
|
||||
|
||||
|
||||
class DBPostProcess(object):
|
||||
"""
|
||||
The post process for Differentiable Binarization (DB).
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
thresh=0.3,
|
||||
box_thresh=0.7,
|
||||
max_candidates=1000,
|
||||
unclip_ratio=2.0,
|
||||
use_dilation=False,
|
||||
score_mode="fast",
|
||||
**kwargs):
|
||||
self.thresh = thresh
|
||||
self.box_thresh = box_thresh
|
||||
self.max_candidates = max_candidates
|
||||
self.unclip_ratio = unclip_ratio
|
||||
self.min_size = 3
|
||||
self.score_mode = score_mode
|
||||
assert score_mode in [
|
||||
"slow", "fast"
|
||||
], "Score mode must be in [slow, fast] but got: {}".format(score_mode)
|
||||
|
||||
self.dilation_kernel = None if not use_dilation else np.array(
|
||||
[[1, 1], [1, 1]])
|
||||
|
||||
def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
|
||||
'''
|
||||
_bitmap: single map with shape (1, H, W),
|
||||
whose values are binarized as {0, 1}
|
||||
'''
|
||||
|
||||
bitmap = _bitmap
|
||||
height, width = bitmap.shape
|
||||
|
||||
outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
|
||||
cv2.CHAIN_APPROX_SIMPLE)
|
||||
if len(outs) == 3:
|
||||
img, contours, _ = outs[0], outs[1], outs[2]
|
||||
elif len(outs) == 2:
|
||||
contours, _ = outs[0], outs[1]
|
||||
|
||||
num_contours = min(len(contours), self.max_candidates)
|
||||
|
||||
boxes = []
|
||||
scores = []
|
||||
for index in range(num_contours):
|
||||
contour = contours[index]
|
||||
points, sside = self.get_mini_boxes(contour)
|
||||
if sside < self.min_size:
|
||||
continue
|
||||
points = np.array(points)
|
||||
if self.score_mode == "fast":
|
||||
score = self.box_score_fast(pred, points.reshape(-1, 2))
|
||||
else:
|
||||
score = self.box_score_slow(pred, contour)
|
||||
if self.box_thresh > score:
|
||||
continue
|
||||
|
||||
box = self.unclip(points).reshape(-1, 1, 2)
|
||||
box, sside = self.get_mini_boxes(box)
|
||||
if sside < self.min_size + 2:
|
||||
continue
|
||||
box = np.array(box)
|
||||
|
||||
box[:, 0] = np.clip(
|
||||
np.round(box[:, 0] / width * dest_width), 0, dest_width)
|
||||
box[:, 1] = np.clip(
|
||||
np.round(box[:, 1] / height * dest_height), 0, dest_height)
|
||||
boxes.append(box.astype(np.int16))
|
||||
scores.append(score)
|
||||
return np.array(boxes, dtype=np.int16), scores
|
||||
|
||||
def unclip(self, box):
|
||||
try:
|
||||
import pyclipper
|
||||
except Exception as e:
|
||||
raise RuntimeError(
|
||||
'Unable to use vehicleplate postprocess in PP-Vehicle, please install pyclipper, for example: `pip install pyclipper`, see https://github.com/fonttools/pyclipper'
|
||||
)
|
||||
unclip_ratio = self.unclip_ratio
|
||||
poly = Polygon(box)
|
||||
distance = poly.area * unclip_ratio / poly.length
|
||||
offset = pyclipper.PyclipperOffset()
|
||||
offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
|
||||
expanded = np.array(offset.Execute(distance))
|
||||
return expanded
|
||||
|
||||
def get_mini_boxes(self, contour):
|
||||
bounding_box = cv2.minAreaRect(contour)
|
||||
points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
|
||||
|
||||
index_1, index_2, index_3, index_4 = 0, 1, 2, 3
|
||||
if points[1][1] > points[0][1]:
|
||||
index_1 = 0
|
||||
index_4 = 1
|
||||
else:
|
||||
index_1 = 1
|
||||
index_4 = 0
|
||||
if points[3][1] > points[2][1]:
|
||||
index_2 = 2
|
||||
index_3 = 3
|
||||
else:
|
||||
index_2 = 3
|
||||
index_3 = 2
|
||||
|
||||
box = [
|
||||
points[index_1], points[index_2], points[index_3], points[index_4]
|
||||
]
|
||||
return box, min(bounding_box[1])
|
||||
|
||||
def box_score_fast(self, bitmap, _box):
|
||||
'''
|
||||
box_score_fast: use bbox mean score as the mean score
|
||||
'''
|
||||
h, w = bitmap.shape[:2]
|
||||
box = _box.copy()
|
||||
xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
|
||||
xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
|
||||
ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)
|
||||
ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1)
|
||||
|
||||
mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
|
||||
box[:, 0] = box[:, 0] - xmin
|
||||
box[:, 1] = box[:, 1] - ymin
|
||||
cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
|
||||
return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
|
||||
|
||||
def box_score_slow(self, bitmap, contour):
|
||||
'''
|
||||
box_score_slow: use polyon mean score as the mean score
|
||||
'''
|
||||
h, w = bitmap.shape[:2]
|
||||
contour = contour.copy()
|
||||
contour = np.reshape(contour, (-1, 2))
|
||||
|
||||
xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
|
||||
xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
|
||||
ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
|
||||
ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)
|
||||
|
||||
mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
|
||||
|
||||
contour[:, 0] = contour[:, 0] - xmin
|
||||
contour[:, 1] = contour[:, 1] - ymin
|
||||
|
||||
cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1)
|
||||
return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
|
||||
|
||||
def __call__(self, outs_dict, shape_list):
|
||||
pred = outs_dict['maps']
|
||||
if isinstance(pred, paddle.Tensor):
|
||||
pred = pred.numpy()
|
||||
pred = pred[:, 0, :, :]
|
||||
segmentation = pred > self.thresh
|
||||
|
||||
boxes_batch = []
|
||||
for batch_index in range(pred.shape[0]):
|
||||
src_h, src_w = shape_list[batch_index]
|
||||
if self.dilation_kernel is not None:
|
||||
mask = cv2.dilate(
|
||||
np.array(segmentation[batch_index]).astype(np.uint8),
|
||||
self.dilation_kernel)
|
||||
else:
|
||||
mask = segmentation[batch_index]
|
||||
boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask,
|
||||
src_w, src_h)
|
||||
|
||||
boxes_batch.append({'points': boxes})
|
||||
return boxes_batch
|
||||
|
||||
|
||||
class BaseRecLabelDecode(object):
|
||||
""" Convert between text-label and text-index """
|
||||
|
||||
def __init__(self, character_dict_path=None, use_space_char=False):
|
||||
self.beg_str = "sos"
|
||||
self.end_str = "eos"
|
||||
|
||||
self.character_str = []
|
||||
if character_dict_path is None:
|
||||
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
|
||||
dict_character = list(self.character_str)
|
||||
else:
|
||||
with open(character_dict_path, "rb") as fin:
|
||||
lines = fin.readlines()
|
||||
for line in lines:
|
||||
line = line.decode('utf-8').strip("\n").strip("\r\n")
|
||||
self.character_str.append(line)
|
||||
if use_space_char:
|
||||
self.character_str.append(" ")
|
||||
dict_character = list(self.character_str)
|
||||
|
||||
dict_character = self.add_special_char(dict_character)
|
||||
self.dict = {}
|
||||
for i, char in enumerate(dict_character):
|
||||
self.dict[char] = i
|
||||
self.character = dict_character
|
||||
|
||||
def add_special_char(self, dict_character):
|
||||
return dict_character
|
||||
|
||||
def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
|
||||
""" convert text-index into text-label. """
|
||||
result_list = []
|
||||
ignored_tokens = self.get_ignored_tokens()
|
||||
batch_size = len(text_index)
|
||||
for batch_idx in range(batch_size):
|
||||
selection = np.ones(len(text_index[batch_idx]), dtype=bool)
|
||||
if is_remove_duplicate:
|
||||
selection[1:] = text_index[batch_idx][1:] != text_index[
|
||||
batch_idx][:-1]
|
||||
for ignored_token in ignored_tokens:
|
||||
selection &= text_index[batch_idx] != ignored_token
|
||||
|
||||
char_list = [
|
||||
self.character[text_id]
|
||||
for text_id in text_index[batch_idx][selection]
|
||||
]
|
||||
if text_prob is not None:
|
||||
conf_list = text_prob[batch_idx][selection]
|
||||
else:
|
||||
conf_list = [1] * len(selection)
|
||||
if len(conf_list) == 0:
|
||||
conf_list = [0]
|
||||
|
||||
text = ''.join(char_list)
|
||||
result_list.append((text, np.mean(conf_list).tolist()))
|
||||
return result_list
|
||||
|
||||
def get_ignored_tokens(self):
|
||||
return [0] # for ctc blank
|
||||
|
||||
|
||||
class CTCLabelDecode(BaseRecLabelDecode):
|
||||
""" Convert between text-label and text-index """
|
||||
|
||||
def __init__(self, character_dict_path=None, use_space_char=False,
|
||||
**kwargs):
|
||||
super(CTCLabelDecode, self).__init__(character_dict_path,
|
||||
use_space_char)
|
||||
|
||||
def __call__(self, preds, label=None, *args, **kwargs):
|
||||
if isinstance(preds, tuple) or isinstance(preds, list):
|
||||
preds = preds[-1]
|
||||
if isinstance(preds, paddle.Tensor):
|
||||
preds = preds.numpy()
|
||||
preds_idx = preds.argmax(axis=2)
|
||||
preds_prob = preds.max(axis=2)
|
||||
text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True)
|
||||
if label is None:
|
||||
return text
|
||||
label = self.decode(label)
|
||||
return text, label
|
||||
|
||||
def add_special_char(self, dict_character):
|
||||
dict_character = ['blank'] + dict_character
|
||||
return dict_character
|
||||
Reference in New Issue
Block a user