From db0d44d3bf90e89fda87b9ce2ec8c192fc5e2234 Mon Sep 17 00:00:00 2001 From: liuyebo <1515783401@qq.com> Date: Wed, 24 Jul 2024 13:03:33 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E5=BD=A2=E8=BF=91=E5=AD=97?= =?UTF-8?q?=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- photo_mask/__init__.py | 5 +++++ photo_mask/photo_mask.py | 20 +++++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/photo_mask/__init__.py b/photo_mask/__init__.py index 423fa7d..1bd77f1 100644 --- a/photo_mask/__init__.py +++ b/photo_mask/__init__.py @@ -21,5 +21,10 @@ NAME_KEYS = [ ] ID_CARD_NUM_KEYS = [{"key": "身份证号", "length": 19}, ] +# 形近字,只对姓名涂抹生效 +SIMILAR_CHAR = { + "凤": ["风"] +} + # 如果不希望识别出空格,可以设置use_space_char=False。做此项设置一定要测试,2.7.3版本此项设置有bug,会导致识别失败 OCR = PaddleOCR(use_angle_cls=False, show_log=False, det_db_box_thresh=0.3) diff --git a/photo_mask/photo_mask.py b/photo_mask/photo_mask.py index 5b4f967..b6639ca 100644 --- a/photo_mask/photo_mask.py +++ b/photo_mask/photo_mask.py @@ -7,7 +7,7 @@ from sqlalchemy import update, and_ from db import MysqlSession from db.mysql import ZxPhrec, ZxPhhd -from photo_mask import OCR, PHHD_BATCH_SIZE, SLEEP_MINUTES, NAME_KEYS, ID_CARD_NUM_KEYS +from photo_mask import OCR, PHHD_BATCH_SIZE, SLEEP_MINUTES, NAME_KEYS, ID_CARD_NUM_KEYS, SIMILAR_CHAR from ucloud import BUCKET, ufile from util import image_util, util @@ -103,13 +103,22 @@ def get_mask_layout(image, name, id_card_num): # 无识别结果 return result else: - # 涂抹 + # 处理形近字 + name_list = [name] + # 移除名字中重复的字 + unique_name = "".join(dict.fromkeys(name)) + for char in unique_name: + if char in SIMILAR_CHAR: + for sc in SIMILAR_CHAR[char]: + name_list.append(name.replace(char, sc)) + for layout in layouts: find_name_by_key = True find_id_card_num_by_key = True - if name in layout[1]: - result += find_boxes(name, layout, improve=True, image_path=temp_file.name) - find_name_by_key = False + for name in name_list: + if name in layout[1]: + result += find_boxes(name, layout, improve=True, image_path=temp_file.name) + find_name_by_key = False if id_card_num in layout[1]: result += find_boxes(id_card_num, layout, improve=True, image_path=temp_file.name) find_id_card_num_by_key = False @@ -146,6 +155,7 @@ def mask_photo(img_url, name, id_card_num, color=(255, 255, 255)): is_masked = False # 打开图片 image = image_util.read(img_url) + # image = image_util.rotate(image, 180) split_results = image_util.split(image) for split_result in split_results: to_mask_img, x_offset, y_offset = handle_image_for_mask(split_result)