115 lines
3.3 KiB
Python
115 lines
3.3 KiB
Python
import jieba
|
||
from paddlenlp import Taskflow
|
||
from paddleocr import PaddleOCR
|
||
|
||
'''
|
||
项目配置
|
||
'''
|
||
# 每次从数据库获取的案子数量
|
||
PHHD_BATCH_SIZE = 10
|
||
# 没有查询到案子的等待时间(分钟)
|
||
SLEEP_MINUTES = 5
|
||
# 是否发送报错邮件
|
||
SEND_ERROR_EMAIL = True
|
||
# 是否开启布局分析
|
||
LAYOUT_ANALYSIS = False
|
||
|
||
"""
|
||
信息抽取关键词配置
|
||
"""
|
||
# 患者姓名
|
||
PATIENT_NAME = ['患者姓名']
|
||
# 入院日期
|
||
ADMISSION_DATE = ['入院日期']
|
||
# 出院日期
|
||
DISCHARGE_DATE = ['出院日期']
|
||
# 发生医疗费
|
||
MEDICAL_EXPENSES = ['费用总额']
|
||
# 个人现金支付
|
||
PERSONAL_CASH_PAYMENT = ['个人现金支付']
|
||
# 个人账户支付
|
||
PERSONAL_ACCOUNT_PAYMENT = ['个人账户支付']
|
||
# 个人自费金额
|
||
PERSONAL_FUNDED_AMOUNT = ['自费金额', '个人自费']
|
||
# 医保类别
|
||
MEDICAL_INSURANCE_TYPE = ['医保类型']
|
||
# 就诊医院
|
||
HOSPITAL = ['医院']
|
||
# 就诊科室
|
||
DEPARTMENT = ['科室']
|
||
# 主治医生
|
||
DOCTOR = ['主治医生']
|
||
# 住院号
|
||
ADMISSION_ID = ['住院号']
|
||
# 医保结算单号码
|
||
SETTLEMENT_ID = ['医保结算单号码']
|
||
# 年龄
|
||
AGE = ['年龄']
|
||
# 大写总额
|
||
UPPERCASE_MEDICAL_EXPENSES = ['大写总额']
|
||
|
||
SETTLEMENT_LIST_SCHEMA = \
|
||
(PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES + PERSONAL_CASH_PAYMENT
|
||
+ PERSONAL_ACCOUNT_PAYMENT + PERSONAL_FUNDED_AMOUNT + MEDICAL_INSURANCE_TYPE + ADMISSION_ID + SETTLEMENT_ID
|
||
+ UPPERCASE_MEDICAL_EXPENSES)
|
||
|
||
DISCHARGE_RECORD_SCHEMA = \
|
||
HOSPITAL + DEPARTMENT + PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + DOCTOR + ADMISSION_ID + AGE
|
||
|
||
COST_LIST_SCHEMA = PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES
|
||
|
||
'''
|
||
别名配置
|
||
'''
|
||
# 使用别名中的value替换key。考虑到效率问题,只会替换第一个匹配到的key。
|
||
HOSPITAL_ALIAS = {
|
||
'沐阳': ['沭阳'],
|
||
'连水': ['涟水'],
|
||
'唯宁': ['睢宁'], # 雕宁
|
||
'九〇四': ['904'],
|
||
'漂水': ['溧水'],
|
||
}
|
||
DEPARTMENT_ALIAS = {
|
||
'耳鼻喉': ['耳鼻咽喉'],
|
||
'急症': ['急诊'],
|
||
}
|
||
|
||
'''
|
||
搜索过滤配置
|
||
'''
|
||
# 默认会过滤单字
|
||
HOSPITAL_FILTER = ['医院', '人民', '第一', '第二', '第三', '大学', '附属']
|
||
|
||
DEPARTMENT_FILTER = ['医', '伤', '西', '新']
|
||
|
||
'''
|
||
分词配置
|
||
'''
|
||
jieba.suggest_freq(('肿瘤', '医院'), True)
|
||
jieba.suggest_freq(('骨', '伤'), True)
|
||
jieba.suggest_freq(('感染', '性'), True)
|
||
jieba.suggest_freq(('胆', '道'), True)
|
||
jieba.suggest_freq(('脾', '胃'), True)
|
||
|
||
'''
|
||
模型配置
|
||
'''
|
||
SETTLEMENT_IE = Taskflow('information_extraction', schema=SETTLEMENT_LIST_SCHEMA, model='uie-x-base',
|
||
task_path='model/settlement_list_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
|
||
DISCHARGE_IE = Taskflow('information_extraction', schema=DISCHARGE_RECORD_SCHEMA, model='uie-x-base',
|
||
task_path='model/discharge_record_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
|
||
COST_IE = Taskflow('information_extraction', schema=COST_LIST_SCHEMA, model='uie-x-base', device_id=1,
|
||
task_path='model/cost_list_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
|
||
|
||
OCR = PaddleOCR(
|
||
gpu_id=1,
|
||
use_angle_cls=False,
|
||
show_log=False,
|
||
det_db_thresh=0.1,
|
||
det_db_box_thresh=0.3,
|
||
det_limit_side_len=1248,
|
||
drop_score=0.3,
|
||
rec_model_dir='model/ocr/openatom_rec_repsvtr_ch_infer',
|
||
rec_algorithm='SVTR_LCNet',
|
||
)
|