From d64a71b2bbafcf1f4e6c496986f0caf1148ad7a9 Mon Sep 17 00:00:00 2001 From: liuyebo <1515783401@qq.com> Date: Mon, 20 May 2024 17:22:24 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=EF=BC=9A=E5=B0=86=E7=BB=93?= =?UTF-8?q?=E6=9E=9C=E4=BF=9D=E5=AD=98=E5=88=B0zx=5Focr=EF=BC=9B=E4=BC=98?= =?UTF-8?q?=E5=8C=96=E5=80=BC=E7=9A=84=E6=8A=BD=E5=8F=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- photo_review/photo_review.py | 83 ++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 33 deletions(-) diff --git a/photo_review/photo_review.py b/photo_review/photo_review.py index 01f0a7d..af66b6f 100644 --- a/photo_review/photo_review.py +++ b/photo_review/photo_review.py @@ -1,3 +1,4 @@ +import json from time import sleep from paddlenlp import Taskflow @@ -8,6 +9,7 @@ from photo_review.entity.bd_ylks import BdYlks from photo_review.entity.zx_ie_cost import ZxIeCost from photo_review.entity.zx_ie_discharge import ZxIeDischarge from photo_review.entity.zx_ie_settlement import ZxIeSettlement +from photo_review.entity.zx_ocr import ZxOcr from photo_review.entity.zx_phhd import ZxPhhd from photo_review.entity.zx_phrec import ZxPhrec from photo_review.util.data_util import handle_date, handle_decimal @@ -15,26 +17,41 @@ from photo_review.util.ucloud import get_private_url # 关键信息提取 -def information_extraction(schema, pictures, task_path): +def information_extraction(schema, phrecs, task_path): results = {} - for picture in pictures: - pic_path = get_private_url(picture) + for phrec in phrecs: + pic_path = get_private_url(phrec.cfjaddress) if pic_path: ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path) result = ie({"doc": pic_path}) + + # 提取完保存每张图片的结果 + session = MysqlSession() + zx_ocr = ZxOcr(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, cfjaddress=phrec.cfjaddress, + content=json.dumps(result, ensure_ascii=False)) + session.add(zx_ocr) + session.commit() + session.close() + results.update(result[0]) return results -# 从keys中获取第一个不为空的value -def get_value_in_keys(source, keys): +# 从keys中获取准确率最高的value +def get_best_value_in_keys(source, keys): + # 最终结果 + result = None + # 最大可能性 + most_probability = 0 for key in keys: - value = source.get(key) - if value: - value = value[0].get("text") - if value: - return value - return None + values = source.get(key) + if values: + for value in values: + text = value.get("text") + probability = value.get("probability") + if text and probability > most_probability: + result = text + return result # 从keys中获取所有value组成list @@ -69,17 +86,17 @@ def photo_review(pk_phhd): cost_list = [] session = MysqlSession() - phrecs = session.query(ZxPhrec.pk_phrec, ZxPhrec.cRectype, ZxPhrec.cfjaddress) \ + phrecs = session.query(ZxPhrec.pk_phrec, ZxPhrec.pk_phhd, ZxPhrec.cRectype, ZxPhrec.cfjaddress) \ .filter(ZxPhrec.pk_phhd == pk_phhd) \ .all() session.close() for phrec in phrecs: if phrec.cRectype == "1": - settlement_list.append(phrec.cfjaddress) + settlement_list.append(phrec) elif phrec.cRectype == "3": - discharge_record.append(phrec.cfjaddress) + discharge_record.append(phrec) elif phrec.cRectype == "4": - cost_list.append(phrec.cfjaddress) + cost_list.append(phrec) name_key = ["姓名", "交款人"] admission_date_key = ["入院日期", "住院时间", "开始日期", "费用发生时间", "入院时间", "住院日期"] @@ -110,14 +127,14 @@ def photo_review(pk_phhd): "config/model/settlement_list_model") settlement_data = { "pk_phhd": pk_phhd, - "name": get_value_in_keys(settlement_list_ie_result, name_key), - "admission_date_str": get_value_in_keys(settlement_list_ie_result, admission_date_key), - "discharge_date_str": get_value_in_keys(settlement_list_ie_result, discharge_date_key), - "medical_expenses_str": get_value_in_keys(settlement_list_ie_result, medical_expenses_key), - "personal_cash_payment_str": get_value_in_keys(settlement_list_ie_result, personal_cash_payment_key), - "personal_account_payment_str": get_value_in_keys(settlement_list_ie_result, personal_account_payment_key), - "personal_funded_amount_str": get_value_in_keys(settlement_list_ie_result, personal_funded_amount_key), - "medical_insurance_type": get_value_in_keys(settlement_list_ie_result, medical_insurance_type_key) + "name": get_best_value_in_keys(settlement_list_ie_result, name_key), + "admission_date_str": get_best_value_in_keys(settlement_list_ie_result, admission_date_key), + "discharge_date_str": get_best_value_in_keys(settlement_list_ie_result, discharge_date_key), + "medical_expenses_str": get_best_value_in_keys(settlement_list_ie_result, medical_expenses_key), + "personal_cash_payment_str": get_best_value_in_keys(settlement_list_ie_result, personal_cash_payment_key), + "personal_account_payment_str": get_best_value_in_keys(settlement_list_ie_result, personal_account_payment_key), + "personal_funded_amount_str": get_best_value_in_keys(settlement_list_ie_result, personal_funded_amount_key), + "medical_insurance_type": get_best_value_in_keys(settlement_list_ie_result, medical_insurance_type_key) } settlement_data["admission_date"] = handle_date(settlement_data["admission_date_str"]) settlement_data["admission_date"] = handle_date(settlement_data["admission_date_str"]) @@ -132,12 +149,12 @@ def photo_review(pk_phhd): "config/model/discharge_record_model") discharge_data = { "pk_phhd": pk_phhd, - "hospital": get_value_in_keys(discharge_record_ie_result, hospital_key), - "department": get_value_in_keys(discharge_record_ie_result, department_key), - "name": get_value_in_keys(discharge_record_ie_result, name_key), - "admission_date_str": get_value_in_keys(discharge_record_ie_result, admission_date_key), - "discharge_date_str": get_value_in_keys(discharge_record_ie_result, discharge_date_key), - "doctor": get_value_in_keys(discharge_record_ie_result, doctor_key) + "hospital": get_best_value_in_keys(discharge_record_ie_result, hospital_key), + "department": get_best_value_in_keys(discharge_record_ie_result, department_key), + "name": get_best_value_in_keys(discharge_record_ie_result, name_key), + "admission_date_str": get_best_value_in_keys(discharge_record_ie_result, admission_date_key), + "discharge_date_str": get_best_value_in_keys(discharge_record_ie_result, discharge_date_key), + "doctor": get_best_value_in_keys(discharge_record_ie_result, doctor_key) } discharge_data["admission_date"] = handle_date(discharge_data["admission_date_str"]) discharge_data["discharge_date"] = handle_date(discharge_data["discharge_date_str"]) @@ -164,10 +181,10 @@ def photo_review(pk_phhd): cost_list_ie_result = information_extraction(cost_list_schema, cost_list, "config/model/cost_list_model") cost_data = { "pk_phhd": pk_phhd, - "name": get_value_in_keys(cost_list_ie_result, name_key), - "admission_date_str": get_value_in_keys(cost_list_ie_result, admission_date_key), - "discharge_date_str": get_value_in_keys(cost_list_ie_result, discharge_date_key), - "medical_expenses_str": get_value_in_keys(cost_list_ie_result, medical_expenses_key) + "name": get_best_value_in_keys(cost_list_ie_result, name_key), + "admission_date_str": get_best_value_in_keys(cost_list_ie_result, admission_date_key), + "discharge_date_str": get_best_value_in_keys(cost_list_ie_result, discharge_date_key), + "medical_expenses_str": get_best_value_in_keys(cost_list_ie_result, medical_expenses_key) } cost_data["admission_date"] = handle_date(cost_data["admission_date_str"]) cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"])