调整关键词匹配为正则匹配

This commit is contained in:
2024-07-24 16:48:35 +08:00
parent 9198f5ee66
commit 45669bcde1
3 changed files with 14 additions and 8 deletions

View File

@@ -13,10 +13,12 @@ SEND_ERROR_EMAIL = True
""" """
关键词配置 关键词配置
""" """
# 使用正则匹配
NAME_KEYS = [ NAME_KEYS = [
{"key": "姓名", "length": 4}, {"key": "^名[:|]", "length": 3},
{"key": "", "length": 4}, {"key": "[姓|娃]", "length": 4},
{"key": "款人", "length": 4}, {"key": "款人", "length": 4},
{"key": "交.人", "length": 4},
{"key": "购买方名称", "length": 4}, {"key": "购买方名称", "length": 4},
] ]
ID_CARD_NUM_KEYS = [{"key": "身份证号", "length": 19}, ] ID_CARD_NUM_KEYS = [{"key": "身份证号", "length": 19}, ]

View File

@@ -1,4 +1,5 @@
import logging.config import logging.config
import re
import tempfile import tempfile
from time import sleep from time import sleep
@@ -127,15 +128,13 @@ def get_mask_layout(image, name, id_card_num):
keys = [] keys = []
if find_name_by_key: if find_name_by_key:
keys += NAME_KEYS keys += NAME_KEYS
if layout[1].startswith("名:"):
result += find_boxes("名:", layout, offset=2, length=3)
if layout[1].startswith("名:"):
result += find_boxes("名:", layout, offset=2, length=3)
if find_id_card_num_by_key: if find_id_card_num_by_key:
keys += ID_CARD_NUM_KEYS keys += ID_CARD_NUM_KEYS
for key in keys: for key in keys:
if key["key"] in layout[1]: matches = re.findall(key["key"], layout[1])
result += find_boxes(key["key"], layout, offset=len(key["key"]), length=key["length"]) for match in matches:
if match in layout[1]:
result += find_boxes(match, layout, offset=len(match), length=key["length"])
return result return result
except Exception as e: except Exception as e:
logging.error("涂抹时出错!", exc_info=e) logging.error("涂抹时出错!", exc_info=e)

View File

@@ -25,6 +25,11 @@ if __name__ == '__main__':
ocr_error = (session.query(ZxIeOcrerror.pk_phrec, ZxIeOcrerror.cXm, ZxIeOcrerror.cSfzh, ZxIeOcrerror.cfjaddress) ocr_error = (session.query(ZxIeOcrerror.pk_phrec, ZxIeOcrerror.cXm, ZxIeOcrerror.cSfzh, ZxIeOcrerror.cfjaddress)
.filter(ZxIeOcrerror.checktime.is_(None)).order_by(ZxIeOcrerror.pk_phrec.desc()).limit(1).one()) .filter(ZxIeOcrerror.checktime.is_(None)).order_by(ZxIeOcrerror.pk_phrec.desc()).limit(1).one())
session.close() session.close()
# ocr_error = ZxIeOcrerror()
# ocr_error.pk_phrec = 24594061
# ocr_error.cXm = ""
# ocr_error.cSfzh = ""
# ocr_error.cfjaddress = ""
final_img_url = ufile.get_private_url(ocr_error.cfjaddress, "drg100") final_img_url = ufile.get_private_url(ocr_error.cfjaddress, "drg100")
final_image = image_util.read(final_img_url) final_image = image_util.read(final_img_url)