添加形近字处理
This commit is contained in:
@@ -21,5 +21,10 @@ NAME_KEYS = [
|
|||||||
]
|
]
|
||||||
ID_CARD_NUM_KEYS = [{"key": "身份证号", "length": 19}, ]
|
ID_CARD_NUM_KEYS = [{"key": "身份证号", "length": 19}, ]
|
||||||
|
|
||||||
|
# 形近字,只对姓名涂抹生效
|
||||||
|
SIMILAR_CHAR = {
|
||||||
|
"凤": ["风"]
|
||||||
|
}
|
||||||
|
|
||||||
# 如果不希望识别出空格,可以设置use_space_char=False。做此项设置一定要测试,2.7.3版本此项设置有bug,会导致识别失败
|
# 如果不希望识别出空格,可以设置use_space_char=False。做此项设置一定要测试,2.7.3版本此项设置有bug,会导致识别失败
|
||||||
OCR = PaddleOCR(use_angle_cls=False, show_log=False, det_db_box_thresh=0.3)
|
OCR = PaddleOCR(use_angle_cls=False, show_log=False, det_db_box_thresh=0.3)
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from sqlalchemy import update, and_
|
|||||||
|
|
||||||
from db import MysqlSession
|
from db import MysqlSession
|
||||||
from db.mysql import ZxPhrec, ZxPhhd
|
from db.mysql import ZxPhrec, ZxPhhd
|
||||||
from photo_mask import OCR, PHHD_BATCH_SIZE, SLEEP_MINUTES, NAME_KEYS, ID_CARD_NUM_KEYS
|
from photo_mask import OCR, PHHD_BATCH_SIZE, SLEEP_MINUTES, NAME_KEYS, ID_CARD_NUM_KEYS, SIMILAR_CHAR
|
||||||
from ucloud import BUCKET, ufile
|
from ucloud import BUCKET, ufile
|
||||||
from util import image_util, util
|
from util import image_util, util
|
||||||
|
|
||||||
@@ -103,13 +103,22 @@ def get_mask_layout(image, name, id_card_num):
|
|||||||
# 无识别结果
|
# 无识别结果
|
||||||
return result
|
return result
|
||||||
else:
|
else:
|
||||||
# 涂抹
|
# 处理形近字
|
||||||
|
name_list = [name]
|
||||||
|
# 移除名字中重复的字
|
||||||
|
unique_name = "".join(dict.fromkeys(name))
|
||||||
|
for char in unique_name:
|
||||||
|
if char in SIMILAR_CHAR:
|
||||||
|
for sc in SIMILAR_CHAR[char]:
|
||||||
|
name_list.append(name.replace(char, sc))
|
||||||
|
|
||||||
for layout in layouts:
|
for layout in layouts:
|
||||||
find_name_by_key = True
|
find_name_by_key = True
|
||||||
find_id_card_num_by_key = True
|
find_id_card_num_by_key = True
|
||||||
if name in layout[1]:
|
for name in name_list:
|
||||||
result += find_boxes(name, layout, improve=True, image_path=temp_file.name)
|
if name in layout[1]:
|
||||||
find_name_by_key = False
|
result += find_boxes(name, layout, improve=True, image_path=temp_file.name)
|
||||||
|
find_name_by_key = False
|
||||||
if id_card_num in layout[1]:
|
if id_card_num in layout[1]:
|
||||||
result += find_boxes(id_card_num, layout, improve=True, image_path=temp_file.name)
|
result += find_boxes(id_card_num, layout, improve=True, image_path=temp_file.name)
|
||||||
find_id_card_num_by_key = False
|
find_id_card_num_by_key = False
|
||||||
@@ -146,6 +155,7 @@ def mask_photo(img_url, name, id_card_num, color=(255, 255, 255)):
|
|||||||
is_masked = False
|
is_masked = False
|
||||||
# 打开图片
|
# 打开图片
|
||||||
image = image_util.read(img_url)
|
image = image_util.read(img_url)
|
||||||
|
# image = image_util.rotate(image, 180)
|
||||||
split_results = image_util.split(image)
|
split_results = image_util.split(image)
|
||||||
for split_result in split_results:
|
for split_result in split_results:
|
||||||
to_mask_img, x_offset, y_offset = handle_image_for_mask(split_result)
|
to_mask_img, x_offset, y_offset = handle_image_for_mask(split_result)
|
||||||
|
|||||||
Reference in New Issue
Block a user