from paddleocr import PaddleOCR """ 项目配置 """ # 每次从数据库获取的案子数量 PHHD_BATCH_SIZE = 20 # 没有查询到案子的等待时间(分钟) SLEEP_MINUTES = 5 # 是否发送异常提醒邮件 SEND_ERROR_EMAIL = True """ 关键词配置 """ # 使用正则匹配 NAME_KEYS = [ {"key": "^名[:|:]$", "length": 3}, # 可能误涂 {"key": "[姓|娃]名", "length": 4}, {"key": "款人", "length": 4}, {"key": "交[^病]人", "length": 4}, {"key": "购买方名称", "length": 4}, ] ID_CARD_NUM_KEYS = [ {"key": "身份证", "length": 20}, {"key": "份证号", "length": 19}, ] # 形近字,只对姓名涂抹生效 SIMILAR_CHAR = { "凤": ["风"] } # 如果不希望识别出空格,可以设置use_space_char=False。做此项设置一定要测试,2.7.3版本此项设置有bug,会导致识别失败 OCR = PaddleOCR(use_angle_cls=False, show_log=False, det_db_box_thresh=0.3, det_limit_side_len=1248, drop_score=0.3)