36 lines
980 B
Python
36 lines
980 B
Python
from paddleocr import PaddleOCR
|
||
|
||
"""
|
||
项目配置
|
||
"""
|
||
# 每次从数据库获取的案子数量
|
||
PHHD_BATCH_SIZE = 20
|
||
# 没有查询到案子的等待时间(分钟)
|
||
SLEEP_MINUTES = 5
|
||
# 是否发送异常提醒邮件
|
||
SEND_ERROR_EMAIL = True
|
||
|
||
"""
|
||
关键词配置
|
||
"""
|
||
# 使用正则匹配
|
||
NAME_KEYS = [
|
||
{"key": "^名[:|:]$", "length": 3}, # 可能误涂
|
||
{"key": "[姓|娃]名", "length": 4},
|
||
{"key": "款人", "length": 4},
|
||
{"key": "交[^病]人", "length": 4},
|
||
{"key": "购买方名称", "length": 4},
|
||
]
|
||
ID_CARD_NUM_KEYS = [
|
||
{"key": "身份证", "length": 20},
|
||
{"key": "份证号", "length": 19},
|
||
]
|
||
|
||
# 形近字,只对姓名涂抹生效
|
||
SIMILAR_CHAR = {
|
||
"凤": ["风"]
|
||
}
|
||
|
||
# 如果不希望识别出空格,可以设置use_space_char=False。做此项设置一定要测试,2.7.3版本此项设置有bug,会导致识别失败
|
||
OCR = PaddleOCR(use_angle_cls=False, show_log=False, det_db_box_thresh=0.3, det_limit_side_len=1248, drop_score=0.3)
|