import json from time import sleep from paddlenlp import Taskflow from config.mysql import MysqlSession from photo_review.entity.bd_yljg import BdYljg from photo_review.entity.bd_ylks import BdYlks from photo_review.entity.zx_ie_cost import ZxIeCost from photo_review.entity.zx_ie_discharge import ZxIeDischarge from photo_review.entity.zx_ie_settlement import ZxIeSettlement from photo_review.entity.zx_ocr import ZxOcr from photo_review.entity.zx_phhd import ZxPhhd from photo_review.entity.zx_phrec import ZxPhrec from photo_review.util.data_util import handle_date, handle_decimal from photo_review.util.ucloud import get_private_url # 关键信息提取 def information_extraction(schema, phrecs, task_path): results = {} for phrec in phrecs: pic_path = get_private_url(phrec.cfjaddress) if pic_path: ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path) result = ie({"doc": pic_path}) # 提取完保存每张图片的结果 session = MysqlSession() zx_ocr = ZxOcr(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, cfjaddress=phrec.cfjaddress, content=json.dumps(result, ensure_ascii=False)) session.add(zx_ocr) session.commit() session.close() results.update(result[0]) return results # 从keys中获取准确率最高的value def get_best_value_in_keys(source, keys): # 最终结果 result = None # 最大可能性 most_probability = 0 for key in keys: values = source.get(key) if values: for value in values: text = value.get("text") probability = value.get("probability") if text and probability > most_probability: result = text return result # 从keys中获取所有value组成list def get_values_of_keys(source, keys): result = [] for key in keys: value = source.get(key) if value: value = value[0].get("text") if value: result.append(value) return result def save_or_update_ie(table, pk_phhd, data): data = {k: v for k, v in data.items() if v is not None and v != ""} obj = table(**data) session = MysqlSession() db_data = session.query(table).filter_by(pk_phhd=pk_phhd).one_or_none() if db_data: for k, v in data.items(): setattr(db_data, k, v) else: session.add(obj) session.commit() session.close() def photo_review(pk_phhd): settlement_list = [] discharge_record = [] cost_list = [] session = MysqlSession() phrecs = session.query(ZxPhrec.pk_phrec, ZxPhrec.pk_phhd, ZxPhrec.cRectype, ZxPhrec.cfjaddress) \ .filter(ZxPhrec.pk_phhd == pk_phhd) \ .all() session.close() for phrec in phrecs: if phrec.cRectype == "1": settlement_list.append(phrec) elif phrec.cRectype == "3": discharge_record.append(phrec) elif phrec.cRectype == "4": cost_list.append(phrec) name_key = ["姓名", "交款人"] admission_date_key = ["入院日期", "住院时间", "开始日期", "费用发生时间", "入院时间", "住院日期"] discharge_date_key = ["出院日期", "结束日期", "出院时间"] medical_expenses_key = ["费用总额", "总费用", "医疗费用总额", "总计", "合计", "金额合计", "总金额", "本次住院费用总金额", "价税合计", "合计金额", "费用合计", "项目合计"] personal_cash_payment_key = ["个人现金支付", "个人支付金额", "个人现金支出", "现金支付", "实际现金", "个人负担总金额", "本次现金", "医院收取病人金额", "个人付现", "个人现金", "自费金额"] personal_account_payment_key = ["个人账户支付", "账户支付", "个人账户支出", "个账支付", "账户支出"] personal_funded_amount_key = ["自费", "全自费金额", "个人自费", "范围外费用", "超限价自费费用", "目录外自费", "自费总额", "自费费用"] medical_insurance_type_key = ["医保类型"] hospital_key = ["医院", "就诊医院", "医院名称", "医学中心"] department_key = ["科别", "病人科室", "住院科别", "科室", "住院科室", "科室名称"] doctor_key = ["主治医师", "住院医师", "医师", "主治及以上医师签名", "主治医生签名", "医生签名", "主治医师签名", "医师签名", "上级医师", "主诊医师", "经治医师", "副主任中医师号"] # 基本医保结算单 settlement_list_schema = \ name_key + admission_date_key + discharge_date_key + medical_expenses_key + personal_cash_payment_key \ + personal_account_payment_key + personal_funded_amount_key + medical_insurance_type_key # 出院记录 discharge_record_schema = \ hospital_key + department_key + name_key + admission_date_key + discharge_date_key + doctor_key # 费用清单 cost_list_schema = name_key + admission_date_key + discharge_date_key + medical_expenses_key settlement_list_ie_result = information_extraction(settlement_list_schema, settlement_list, "config/model/settlement_list_model") settlement_data = { "pk_phhd": pk_phhd, "name": get_best_value_in_keys(settlement_list_ie_result, name_key), "admission_date_str": get_best_value_in_keys(settlement_list_ie_result, admission_date_key), "discharge_date_str": get_best_value_in_keys(settlement_list_ie_result, discharge_date_key), "medical_expenses_str": get_best_value_in_keys(settlement_list_ie_result, medical_expenses_key), "personal_cash_payment_str": get_best_value_in_keys(settlement_list_ie_result, personal_cash_payment_key), "personal_account_payment_str": get_best_value_in_keys(settlement_list_ie_result, personal_account_payment_key), "personal_funded_amount_str": get_best_value_in_keys(settlement_list_ie_result, personal_funded_amount_key), "medical_insurance_type": get_best_value_in_keys(settlement_list_ie_result, medical_insurance_type_key) } settlement_data["admission_date"] = handle_date(settlement_data["admission_date_str"]) settlement_data["admission_date"] = handle_date(settlement_data["admission_date_str"]) settlement_data["discharge_date"] = handle_date(settlement_data["discharge_date_str"]) settlement_data["medical_expenses"] = handle_decimal(settlement_data["medical_expenses_str"]) settlement_data["personal_cash_payment"] = handle_decimal(settlement_data["personal_cash_payment_str"]) settlement_data["personal_account_payment"] = handle_decimal(settlement_data["personal_account_payment_str"]) settlement_data["personal_funded_amount"] = handle_decimal(settlement_data["personal_funded_amount_str"]) save_or_update_ie(ZxIeSettlement, pk_phhd, settlement_data) discharge_record_ie_result = information_extraction(discharge_record_schema, discharge_record, "config/model/discharge_record_model") discharge_data = { "pk_phhd": pk_phhd, "hospital": get_best_value_in_keys(discharge_record_ie_result, hospital_key), "department": get_best_value_in_keys(discharge_record_ie_result, department_key), "name": get_best_value_in_keys(discharge_record_ie_result, name_key), "admission_date_str": get_best_value_in_keys(discharge_record_ie_result, admission_date_key), "discharge_date_str": get_best_value_in_keys(discharge_record_ie_result, discharge_date_key), "doctor": get_best_value_in_keys(discharge_record_ie_result, doctor_key) } discharge_data["admission_date"] = handle_date(discharge_data["admission_date_str"]) discharge_data["discharge_date"] = handle_date(discharge_data["discharge_date_str"]) hospital_value = get_values_of_keys(discharge_record_ie_result, hospital_key) if hospital_value: session = MysqlSession() yljg = session.query(BdYljg.pk_yljg, BdYljg.name) \ .filter(BdYljg.name.in_(hospital_value)).limit(1).one_or_none() session.close() if yljg: discharge_data["pk_yljg"] = yljg.pk_yljg discharge_data["hospital"] = yljg.name department_value = get_values_of_keys(discharge_record_ie_result, department_key) if department_value: session = MysqlSession() ylks = session.query(BdYlks.pk_ylks, BdYlks.name) \ .filter(BdYlks.name.in_(department_value)).limit(1).one_or_none() session.close() if ylks: discharge_data["pk_ylks"] = ylks.pk_ylks discharge_data["department"] = ylks.name save_or_update_ie(ZxIeDischarge, pk_phhd, discharge_data) cost_list_ie_result = information_extraction(cost_list_schema, cost_list, "config/model/cost_list_model") cost_data = { "pk_phhd": pk_phhd, "name": get_best_value_in_keys(cost_list_ie_result, name_key), "admission_date_str": get_best_value_in_keys(cost_list_ie_result, admission_date_key), "discharge_date_str": get_best_value_in_keys(cost_list_ie_result, discharge_date_key), "medical_expenses_str": get_best_value_in_keys(cost_list_ie_result, medical_expenses_key) } cost_data["admission_date"] = handle_date(cost_data["admission_date_str"]) cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"]) cost_data["medical_expenses"] = handle_date(cost_data["medical_expenses_str"]) save_or_update_ie(ZxIeCost, pk_phhd, cost_data) def main(): # 最后处理的报销案子pk last_pk_phhd = 0 # 持续检测新案子 while 1: session = MysqlSession() phhds = session.query(ZxPhhd.pk_phhd) \ .filter(ZxPhhd.pk_phhd > last_pk_phhd) \ .filter(ZxPhhd.cStatus == '2') \ .limit(1) \ .all() session.close() if phhds: for phhd in phhds: pk_phhd = phhd.pk_phhd photo_review(pk_phhd) last_pk_phhd = pk_phhd else: # 没有查询到新案子,等待 5 分钟后再查 sleep(5 * 60)