from paddlenlp import Taskflow from paddleocr import PaddleOCR """ 项目配置 """ # 项目奔溃的重试次数 RETRY_TIME = 1 # 每次从数据库获取的案子数量 PHHD_BATCH_SIZE = 10 # 没有查询到案子的等待时间(分钟) SLEEP_MINUTES = 5 # 是否发送报错邮件 SEND_ERROR_EMAIL = True # 是否开启布局分析 LAYOUT_ANALYSIS = False """ 信息抽取关键词配置 """ # 患者姓名 PATIENT_NAME = ["患者姓名"] # 入院日期 ADMISSION_DATE = ["入院日期"] # 出院日期 DISCHARGE_DATE = ["出院日期"] # 发生医疗费 MEDICAL_EXPENSES = ["费用总额"] # 个人现金支付 PERSONAL_CASH_PAYMENT = ["个人现金支付"] # 个人账户支付 PERSONAL_ACCOUNT_PAYMENT = ["个人账户支付"] # 个人自费金额 PERSONAL_FUNDED_AMOUNT = ["自费金额"] # 医保类别 MEDICAL_INSURANCE_TYPE = ["医保类型"] # 就诊医院 HOSPITAL = ["医院"] # 就诊科室 DEPARTMENT = ["科室"] # 主治医生 DOCTOR = ["主治医生"] # 住院号 ADMISSION_ID = ["住院号"] # 医保结算单号码 SETTLEMENT_ID = ["医保结算单号码"] # 年龄 AGE = ["年龄"] # 大写总额 UPPERCASE_MEDICAL_EXPENSES = ["大写总额"] SETTLEMENT_LIST_SCHEMA = \ (PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES + PERSONAL_CASH_PAYMENT + PERSONAL_ACCOUNT_PAYMENT + PERSONAL_FUNDED_AMOUNT + MEDICAL_INSURANCE_TYPE + ADMISSION_ID + SETTLEMENT_ID + UPPERCASE_MEDICAL_EXPENSES) DISCHARGE_RECORD_SCHEMA = \ HOSPITAL + DEPARTMENT + PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + DOCTOR + ADMISSION_ID + AGE COST_LIST_SCHEMA = PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES """ 模型配置 """ SETTLEMENT_IE = Taskflow("information_extraction", schema=SETTLEMENT_LIST_SCHEMA, model="uie-x-base", task_path="model/settlement_list_model", layout_analysis=LAYOUT_ANALYSIS, precision='fp16') DISCHARGE_IE = Taskflow("information_extraction", schema=DISCHARGE_RECORD_SCHEMA, model="uie-x-base", task_path="model/discharge_record_model", layout_analysis=LAYOUT_ANALYSIS, precision='fp16') COST_IE = Taskflow("information_extraction", schema=COST_LIST_SCHEMA, model="uie-x-base", device_id=1, task_path="model/cost_list_model", layout_analysis=LAYOUT_ANALYSIS, precision='fp16') OCR = PaddleOCR(use_angle_cls=False, show_log=False, gpu_id=1, det_db_box_thresh=0.3)