49 lines
1.4 KiB
Python
49 lines
1.4 KiB
Python
from paddleocr import PaddleOCR
|
||
|
||
"""
|
||
项目配置
|
||
"""
|
||
# 每次从数据库获取的案子数量
|
||
PHHD_BATCH_SIZE = 20
|
||
# 没有查询到案子的等待时间(分钟)
|
||
SLEEP_MINUTES = 5
|
||
# 是否发送异常提醒邮件
|
||
SEND_ERROR_EMAIL = True
|
||
|
||
"""
|
||
关键词配置
|
||
"""
|
||
# 使用正则匹配
|
||
# key: 正则表达式
|
||
# length: 涂抹的长度
|
||
# offset: 涂抹的偏移量,基于正则匹配的开头偏移。如果不写,默认偏移量为匹配到的字符串长度,即从匹配到的字符串结尾开始涂抹
|
||
NAME_KEYS = [
|
||
{"key": "^名[:|:]$", "length": 3},
|
||
{"key": "[姓|娃|生|性]名", "length": 4},
|
||
{"key": "[款|献]人", "length": 4},
|
||
{"key": "[交|文][^病]?人", "length": 4},
|
||
{"key": "购买方名称", "length": 4},
|
||
{"key": "[(|(]个人[)|)]", "length": 3, "offset": -3},
|
||
{"key": "客户", "length": 4},
|
||
{"key": "病人[姓]?名", "length": 4},
|
||
]
|
||
ID_CARD_NUM_KEYS = [
|
||
{"key": "[身]?份证号", "length": 19},
|
||
{"key": "身份号码", "length": 19},
|
||
]
|
||
|
||
# 形近字,只对姓名涂抹生效
|
||
SIMILAR_CHAR = {
|
||
"凤": ["风"]
|
||
}
|
||
|
||
# 如果不希望识别出空格,可以设置use_space_char=False。做此项设置一定要测试,2.7.3版本此项设置有bug,会导致识别失败
|
||
OCR = PaddleOCR(
|
||
gpu_id=1,
|
||
show_log=False,
|
||
det_db_thresh=0.1,
|
||
det_db_box_thresh=0.3,
|
||
det_limit_side_len=1248,
|
||
drop_score=0.3
|
||
)
|