添加形近字处理

This commit is contained in:
2024-07-24 13:03:33 +08:00
parent d971dcfe1b
commit db0d44d3bf
2 changed files with 20 additions and 5 deletions

View File

@@ -21,5 +21,10 @@ NAME_KEYS = [
] ]
ID_CARD_NUM_KEYS = [{"key": "身份证号", "length": 19}, ] ID_CARD_NUM_KEYS = [{"key": "身份证号", "length": 19}, ]
# 形近字,只对姓名涂抹生效
SIMILAR_CHAR = {
"": [""]
}
# 如果不希望识别出空格可以设置use_space_char=False。做此项设置一定要测试2.7.3版本此项设置有bug会导致识别失败 # 如果不希望识别出空格可以设置use_space_char=False。做此项设置一定要测试2.7.3版本此项设置有bug会导致识别失败
OCR = PaddleOCR(use_angle_cls=False, show_log=False, det_db_box_thresh=0.3) OCR = PaddleOCR(use_angle_cls=False, show_log=False, det_db_box_thresh=0.3)

View File

@@ -7,7 +7,7 @@ from sqlalchemy import update, and_
from db import MysqlSession from db import MysqlSession
from db.mysql import ZxPhrec, ZxPhhd from db.mysql import ZxPhrec, ZxPhhd
from photo_mask import OCR, PHHD_BATCH_SIZE, SLEEP_MINUTES, NAME_KEYS, ID_CARD_NUM_KEYS from photo_mask import OCR, PHHD_BATCH_SIZE, SLEEP_MINUTES, NAME_KEYS, ID_CARD_NUM_KEYS, SIMILAR_CHAR
from ucloud import BUCKET, ufile from ucloud import BUCKET, ufile
from util import image_util, util from util import image_util, util
@@ -103,13 +103,22 @@ def get_mask_layout(image, name, id_card_num):
# 无识别结果 # 无识别结果
return result return result
else: else:
# 涂抹 # 处理形近字
name_list = [name]
# 移除名字中重复的字
unique_name = "".join(dict.fromkeys(name))
for char in unique_name:
if char in SIMILAR_CHAR:
for sc in SIMILAR_CHAR[char]:
name_list.append(name.replace(char, sc))
for layout in layouts: for layout in layouts:
find_name_by_key = True find_name_by_key = True
find_id_card_num_by_key = True find_id_card_num_by_key = True
if name in layout[1]: for name in name_list:
result += find_boxes(name, layout, improve=True, image_path=temp_file.name) if name in layout[1]:
find_name_by_key = False result += find_boxes(name, layout, improve=True, image_path=temp_file.name)
find_name_by_key = False
if id_card_num in layout[1]: if id_card_num in layout[1]:
result += find_boxes(id_card_num, layout, improve=True, image_path=temp_file.name) result += find_boxes(id_card_num, layout, improve=True, image_path=temp_file.name)
find_id_card_num_by_key = False find_id_card_num_by_key = False
@@ -146,6 +155,7 @@ def mask_photo(img_url, name, id_card_num, color=(255, 255, 255)):
is_masked = False is_masked = False
# 打开图片 # 打开图片
image = image_util.read(img_url) image = image_util.read(img_url)
# image = image_util.rotate(image, 180)
split_results = image_util.split(image) split_results = image_util.split(image)
for split_result in split_results: for split_result in split_results:
to_mask_img, x_offset, y_offset = handle_image_for_mask(split_result) to_mask_img, x_offset, y_offset = handle_image_for_mask(split_result)