调整涂抹的ocr模型参数;优化关键词
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
from paddlenlp.utils.doc_parser import DocParser
|
from paddleocr import PaddleOCR
|
||||||
|
|
||||||
"""
|
"""
|
||||||
项目配置
|
项目配置
|
||||||
@@ -19,11 +19,11 @@ UPLOAD_TRY_TIMES = 3
|
|||||||
"""
|
"""
|
||||||
NAME_KEYS = [
|
NAME_KEYS = [
|
||||||
{"key": "姓名", "length": 4},
|
{"key": "姓名", "length": 4},
|
||||||
{"key": "交款人", "length": 4},
|
{"key": "娃名", "length": 4},
|
||||||
{"key": "文款人", "length": 4},
|
{"key": "款人", "length": 4},
|
||||||
{"key": "购买方名称", "length": 4},
|
{"key": "购买方名称", "length": 4},
|
||||||
]
|
]
|
||||||
ID_CARD_NUM_KEYS = [{"key": "身份证号", "length": 19}, ]
|
ID_CARD_NUM_KEYS = [{"key": "身份证号", "length": 19}, ]
|
||||||
|
|
||||||
# OCR = PaddleOCR(use_angle_cls=False, show_log=False, gpu_id=1)
|
# 如果不希望识别出空格,可以设置use_space_char=False。做此项设置一定要测试,2.7.3版本此项设置有bug,会导致识别失败
|
||||||
OCR = DocParser(use_gpu=True, layout_analysis=False)
|
OCR = PaddleOCR(use_angle_cls=False, show_log=False, det_db_box_thresh=0.3)
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ def find_boxes(content, layout, offset=0, length=None, improve=False, image_path
|
|||||||
captured_image, offset_x, offset_y = image_util.expand_to_a4_size(captured_image, True)
|
captured_image, offset_x, offset_y = image_util.expand_to_a4_size(captured_image, True)
|
||||||
cv2.imwrite(temp_file.name, captured_image)
|
cv2.imwrite(temp_file.name, captured_image)
|
||||||
try:
|
try:
|
||||||
layouts = OCR.parse({"doc": temp_file.name})["layout"]
|
layouts = util.get_ocr_layout(OCR, temp_file.name)
|
||||||
except TypeError:
|
except TypeError:
|
||||||
# 如果是类型错误,大概率是没识别到文字
|
# 如果是类型错误,大概率是没识别到文字
|
||||||
layouts = []
|
layouts = []
|
||||||
@@ -91,8 +91,8 @@ def get_mask_layout(image, name, id_card_num):
|
|||||||
result = []
|
result = []
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
# layouts = util.get_ocr_layout(OCR, temp_file.name)
|
layouts = util.get_ocr_layout(OCR, temp_file.name)
|
||||||
layouts = OCR.parse({"doc": temp_file.name})["layout"]
|
# layouts = OCR.parse({"doc": temp_file.name})["layout"]
|
||||||
except TypeError:
|
except TypeError:
|
||||||
# 如果是类型错误,大概率是没识别到文字
|
# 如果是类型错误,大概率是没识别到文字
|
||||||
layouts = []
|
layouts = []
|
||||||
@@ -118,6 +118,10 @@ def get_mask_layout(image, name, id_card_num):
|
|||||||
keys = []
|
keys = []
|
||||||
if find_name_by_key:
|
if find_name_by_key:
|
||||||
keys += NAME_KEYS
|
keys += NAME_KEYS
|
||||||
|
if layout[1].startswith("名:"):
|
||||||
|
result += find_boxes("名:", layout, offset=2, length=3)
|
||||||
|
if layout[1].startswith("名:"):
|
||||||
|
result += find_boxes("名:", layout, offset=2, length=3)
|
||||||
if find_id_card_num_by_key:
|
if find_id_card_num_by_key:
|
||||||
keys += ID_CARD_NUM_KEYS
|
keys += ID_CARD_NUM_KEYS
|
||||||
for key in keys:
|
for key in keys:
|
||||||
@@ -135,8 +139,7 @@ def handle_image_for_mask(split_result):
|
|||||||
expand_img, offset_x, offset_y = image_util.expand_to_a4_size(split_result["img"], True)
|
expand_img, offset_x, offset_y = image_util.expand_to_a4_size(split_result["img"], True)
|
||||||
split_result["x_offset"] -= offset_x
|
split_result["x_offset"] -= offset_x
|
||||||
split_result["y_offset"] -= offset_y
|
split_result["y_offset"] -= offset_y
|
||||||
gray_image = cv2.cvtColor(expand_img, cv2.COLOR_BGR2GRAY)
|
return expand_img, split_result["x_offset"], split_result["y_offset"]
|
||||||
return gray_image, split_result["x_offset"], split_result["y_offset"]
|
|
||||||
|
|
||||||
|
|
||||||
def photo_mask(pk_phhd, name, id_card_num):
|
def photo_mask(pk_phhd, name, id_card_num):
|
||||||
|
|||||||
Reference in New Issue
Block a user