添加形近字处理

This commit is contained in:
2024-07-24 13:03:33 +08:00
parent d971dcfe1b
commit db0d44d3bf
2 changed files with 20 additions and 5 deletions

View File

@@ -21,5 +21,10 @@ NAME_KEYS = [
]
ID_CARD_NUM_KEYS = [{"key": "身份证号", "length": 19}, ]
# 形近字,只对姓名涂抹生效
SIMILAR_CHAR = {
"": [""]
}
# 如果不希望识别出空格可以设置use_space_char=False。做此项设置一定要测试2.7.3版本此项设置有bug会导致识别失败
OCR = PaddleOCR(use_angle_cls=False, show_log=False, det_db_box_thresh=0.3)

View File

@@ -7,7 +7,7 @@ from sqlalchemy import update, and_
from db import MysqlSession
from db.mysql import ZxPhrec, ZxPhhd
from photo_mask import OCR, PHHD_BATCH_SIZE, SLEEP_MINUTES, NAME_KEYS, ID_CARD_NUM_KEYS
from photo_mask import OCR, PHHD_BATCH_SIZE, SLEEP_MINUTES, NAME_KEYS, ID_CARD_NUM_KEYS, SIMILAR_CHAR
from ucloud import BUCKET, ufile
from util import image_util, util
@@ -103,10 +103,19 @@ def get_mask_layout(image, name, id_card_num):
# 无识别结果
return result
else:
# 涂抹
# 处理形近字
name_list = [name]
# 移除名字中重复的字
unique_name = "".join(dict.fromkeys(name))
for char in unique_name:
if char in SIMILAR_CHAR:
for sc in SIMILAR_CHAR[char]:
name_list.append(name.replace(char, sc))
for layout in layouts:
find_name_by_key = True
find_id_card_num_by_key = True
for name in name_list:
if name in layout[1]:
result += find_boxes(name, layout, improve=True, image_path=temp_file.name)
find_name_by_key = False
@@ -146,6 +155,7 @@ def mask_photo(img_url, name, id_card_num, color=(255, 255, 255)):
is_masked = False
# 打开图片
image = image_util.read(img_url)
# image = image_util.rotate(image, 180)
split_results = image_util.split(image)
for split_result in split_results:
to_mask_img, x_offset, y_offset = handle_image_for_mask(split_result)