Files
fcb_photo_review/photo_review/__init__.py
2024-08-20 17:15:06 +08:00

103 lines
2.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import socket
import jieba
from paddlenlp import Taskflow
from paddleocr import PaddleOCR
# 主机名
HOSTNAME = socket.gethostname()
"""
项目配置
"""
# 每次从数据库获取的案子数量
PHHD_BATCH_SIZE = 10
# 没有查询到案子的等待时间(分钟)
SLEEP_MINUTES = 5
# 是否发送报错邮件
SEND_ERROR_EMAIL = True
# 是否开启布局分析
LAYOUT_ANALYSIS = False
"""
信息抽取关键词配置
"""
# 患者姓名
PATIENT_NAME = ["患者姓名"]
# 入院日期
ADMISSION_DATE = ["入院日期"]
# 出院日期
DISCHARGE_DATE = ["出院日期"]
# 发生医疗费
MEDICAL_EXPENSES = ["费用总额"]
# 个人现金支付
PERSONAL_CASH_PAYMENT = ["个人现金支付"]
# 个人账户支付
PERSONAL_ACCOUNT_PAYMENT = ["个人账户支付"]
# 个人自费金额
PERSONAL_FUNDED_AMOUNT = ["自费金额"]
# 医保类别
MEDICAL_INSURANCE_TYPE = ["医保类型"]
# 就诊医院
HOSPITAL = ["医院"]
# 就诊科室
DEPARTMENT = ["科室"]
# 主治医生
DOCTOR = ["主治医生"]
# 住院号
ADMISSION_ID = ["住院号"]
# 医保结算单号码
SETTLEMENT_ID = ["医保结算单号码"]
# 年龄
AGE = ["年龄"]
# 大写总额
UPPERCASE_MEDICAL_EXPENSES = ["大写总额"]
SETTLEMENT_LIST_SCHEMA = \
(PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES + PERSONAL_CASH_PAYMENT
+ PERSONAL_ACCOUNT_PAYMENT + PERSONAL_FUNDED_AMOUNT + MEDICAL_INSURANCE_TYPE + ADMISSION_ID + SETTLEMENT_ID
+ UPPERCASE_MEDICAL_EXPENSES)
DISCHARGE_RECORD_SCHEMA = \
HOSPITAL + DEPARTMENT + PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + DOCTOR + ADMISSION_ID + AGE
COST_LIST_SCHEMA = PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES
"""
别名配置
"""
# 使用别名中的value替换key。考虑到效率问题只会替换第一个匹配到的key。
HOSPITAL_ALIAS = {
"沐阳": ["沭阳"],
"连水": ["涟水"],
"唯宁": ["睢宁"],
"九〇四": ["904"],
"漂水": ["溧水"],
}
DEPARTMENT_ALIAS = {
"耳鼻喉": ["耳鼻咽喉"]
}
"""
搜索过滤配置
"""
# 默认会过滤单字
HOSPITAL_FILTER = ["医院", "人民", "第一", "第二", "第三", "大学", "附属"]
"""
分词配置
"""
jieba.suggest_freq(('肿瘤', '医院'), True)
"""
模型配置
"""
SETTLEMENT_IE = Taskflow("information_extraction", schema=SETTLEMENT_LIST_SCHEMA, model="uie-x-base",
task_path="model/settlement_list_model", layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
DISCHARGE_IE = Taskflow("information_extraction", schema=DISCHARGE_RECORD_SCHEMA, model="uie-x-base",
task_path="model/discharge_record_model", layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
COST_IE = Taskflow("information_extraction", schema=COST_LIST_SCHEMA, model="uie-x-base", device_id=1,
task_path="model/cost_list_model", layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
OCR = PaddleOCR(use_angle_cls=False, show_log=False, gpu_id=1, det_db_box_thresh=0.3)