Files
fcb_photo_review/photo_mask/__init__.py
2024-08-02 09:02:28 +08:00

49 lines
1.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from paddleocr import PaddleOCR
"""
项目配置
"""
# 每次从数据库获取的案子数量
PHHD_BATCH_SIZE = 20
# 没有查询到案子的等待时间(分钟)
SLEEP_MINUTES = 5
# 是否发送异常提醒邮件
SEND_ERROR_EMAIL = True
"""
关键词配置
"""
# 使用正则匹配
# key: 正则表达式
# length: 涂抹的长度
# offset: 涂抹的偏移量,基于正则匹配的开头偏移。如果不写,默认偏移量为匹配到的字符串长度,即从匹配到的字符串结尾开始涂抹
NAME_KEYS = [
{"key": "^名[:|]$", "length": 3},
{"key": "[姓|娃|生|性]名", "length": 4},
{"key": "[款|献]人", "length": 4},
{"key": "[交|文][^病]?人", "length": 4},
{"key": "购买方名称", "length": 4},
{"key": "[|(]个人[|)]", "length": 3, "offset": -3},
{"key": "客户", "length": 4},
{"key": "病人[姓]?名", "length": 4},
]
ID_CARD_NUM_KEYS = [
{"key": "[身]?份证号", "length": 19},
{"key": "身份号码", "length": 19},
]
# 形近字,只对姓名涂抹生效
SIMILAR_CHAR = {
"": [""]
}
# 如果不希望识别出空格可以设置use_space_char=False。做此项设置一定要测试2.7.3版本此项设置有bug会导致识别失败
OCR = PaddleOCR(
gpu_id=1,
show_log=False,
det_db_thresh=0.1,
det_db_box_thresh=0.3,
det_limit_side_len=1248,
drop_score=0.3
)