from paddleocr import PaddleOCR """ 项目配置 """ # 每次从数据库获取的案子数量 PHHD_BATCH_SIZE = 20 # 没有查询到案子的等待时间(分钟) SLEEP_MINUTES = 5 # 是否发送异常提醒邮件 SEND_ERROR_EMAIL = True """ 关键词配置 """ # 使用正则匹配 # key: 正则表达式 # length: 涂抹的长度 # offset: 涂抹的偏移量,基于正则匹配的开头偏移。如果不写,默认偏移量为匹配到的字符串长度,即从匹配到的字符串结尾开始涂抹 NAME_KEYS = [ {"key": "^名[:|:]$", "length": 3}, {"key": "[姓|娃|生]名", "length": 4}, {"key": "[款|献]人", "length": 4}, {"key": "[交|文][^病]?人", "length": 4}, {"key": "购买方名称", "length": 4}, {"key": "[(|(]个人[)|)]", "length": 3, "offset": -3}, {"key": "客户", "length": 4}, ] ID_CARD_NUM_KEYS = [ {"key": "[身]?份证号", "length": 19}, {"key": "身份号码", "length": 19}, ] # 形近字,只对姓名涂抹生效 SIMILAR_CHAR = { "凤": ["风"] } # 如果不希望识别出空格,可以设置use_space_char=False。做此项设置一定要测试,2.7.3版本此项设置有bug,会导致识别失败 OCR = PaddleOCR(show_log=False, det_db_thresh=0.1, det_db_box_thresh=0.3, det_limit_side_len=1248, drop_score=0.3)