新增二维码识别替换高清图片功能

This commit is contained in:
2024-09-05 13:29:17 +08:00
parent 53a3dcd508
commit de631bef2e
6 changed files with 255 additions and 198 deletions

View File

@@ -2,9 +2,9 @@ import jieba
from paddlenlp import Taskflow
from paddleocr import PaddleOCR
"""
'''
项目配置
"""
'''
# 每次从数据库获取的案子数量
PHHD_BATCH_SIZE = 10
# 没有查询到案子的等待时间(分钟)
@@ -18,35 +18,35 @@ LAYOUT_ANALYSIS = False
信息抽取关键词配置
"""
# 患者姓名
PATIENT_NAME = ["患者姓名"]
PATIENT_NAME = ['患者姓名']
# 入院日期
ADMISSION_DATE = ["入院日期"]
ADMISSION_DATE = ['入院日期']
# 出院日期
DISCHARGE_DATE = ["出院日期"]
DISCHARGE_DATE = ['出院日期']
# 发生医疗费
MEDICAL_EXPENSES = ["费用总额"]
MEDICAL_EXPENSES = ['费用总额']
# 个人现金支付
PERSONAL_CASH_PAYMENT = ["个人现金支付"]
PERSONAL_CASH_PAYMENT = ['个人现金支付']
# 个人账户支付
PERSONAL_ACCOUNT_PAYMENT = ["个人账户支付"]
PERSONAL_ACCOUNT_PAYMENT = ['个人账户支付']
# 个人自费金额
PERSONAL_FUNDED_AMOUNT = ["自费金额"]
PERSONAL_FUNDED_AMOUNT = ['自费金额', '个人自费']
# 医保类别
MEDICAL_INSURANCE_TYPE = ["医保类型"]
MEDICAL_INSURANCE_TYPE = ['医保类型']
# 就诊医院
HOSPITAL = ["医院"]
HOSPITAL = ['医院']
# 就诊科室
DEPARTMENT = ["科室"]
DEPARTMENT = ['科室']
# 主治医生
DOCTOR = ["主治医生"]
DOCTOR = ['主治医生']
# 住院号
ADMISSION_ID = ["住院号"]
ADMISSION_ID = ['住院号']
# 医保结算单号码
SETTLEMENT_ID = ["医保结算单号码"]
SETTLEMENT_ID = ['医保结算单号码']
# 年龄
AGE = ["年龄"]
AGE = ['年龄']
# 大写总额
UPPERCASE_MEDICAL_EXPENSES = ["大写总额"]
UPPERCASE_MEDICAL_EXPENSES = ['大写总额']
SETTLEMENT_LIST_SCHEMA = \
(PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES + PERSONAL_CASH_PAYMENT
@@ -58,47 +58,47 @@ DISCHARGE_RECORD_SCHEMA = \
COST_LIST_SCHEMA = PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES
"""
'''
别名配置
"""
'''
# 使用别名中的value替换key。考虑到效率问题只会替换第一个匹配到的key。
HOSPITAL_ALIAS = {
"沐阳": ["沭阳"],
"连水": ["涟水"],
"唯宁": ["睢宁"],
"九〇四": ["904"],
"漂水": ["溧水"],
'沐阳': ['沭阳'],
'连水': ['涟水'],
'唯宁': ['睢宁'], # 雕宁
'九〇四': ['904'],
'漂水': ['溧水'],
}
DEPARTMENT_ALIAS = {
"耳鼻喉": ["耳鼻咽喉"],
"急症": ["急诊"],
'耳鼻喉': ['耳鼻咽喉'],
'急症': ['急诊'],
}
"""
'''
搜索过滤配置
"""
'''
# 默认会过滤单字
HOSPITAL_FILTER = ["医院", "人民", "第一", "第二", "第三", "大学", "附属"]
HOSPITAL_FILTER = ['医院', '人民', '第一', '第二', '第三', '大学', '附属']
DEPARTMENT_FILTER = ["", "", "西", ""]
DEPARTMENT_FILTER = ['', '', '西', '']
"""
'''
分词配置
"""
jieba.suggest_freq(("肿瘤", "医院"), True)
jieba.suggest_freq(("", ""), True)
jieba.suggest_freq(("感染", ""), True)
jieba.suggest_freq(("", ""), True)
jieba.suggest_freq(("", ""), True)
'''
jieba.suggest_freq(('肿瘤', '医院'), True)
jieba.suggest_freq(('', ''), True)
jieba.suggest_freq(('感染', ''), True)
jieba.suggest_freq(('', ''), True)
jieba.suggest_freq(('', ''), True)
"""
'''
模型配置
"""
SETTLEMENT_IE = Taskflow("information_extraction", schema=SETTLEMENT_LIST_SCHEMA, model="uie-x-base",
task_path="model/settlement_list_model", layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
DISCHARGE_IE = Taskflow("information_extraction", schema=DISCHARGE_RECORD_SCHEMA, model="uie-x-base",
task_path="model/discharge_record_model", layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
COST_IE = Taskflow("information_extraction", schema=COST_LIST_SCHEMA, model="uie-x-base", device_id=1,
task_path="model/cost_list_model", layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
'''
SETTLEMENT_IE = Taskflow('information_extraction', schema=SETTLEMENT_LIST_SCHEMA, model='uie-x-base',
task_path='model/settlement_list_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
DISCHARGE_IE = Taskflow('information_extraction', schema=DISCHARGE_RECORD_SCHEMA, model='uie-x-base',
task_path='model/discharge_record_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
COST_IE = Taskflow('information_extraction', schema=COST_LIST_SCHEMA, model='uie-x-base', device_id=1,
task_path='model/cost_list_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
OCR = PaddleOCR(use_angle_cls=False, show_log=False, gpu_id=1, det_db_box_thresh=0.3)