From 732bfb828a81b98f3820e3899c61f24b22e660f5 Mon Sep 17 00:00:00 2001 From: liuyebo <1515783401@qq.com> Date: Tue, 2 Jul 2024 08:50:21 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BD=BF=E7=94=A8=E5=B9=B6=E5=8F=91=E8=B0=83?= =?UTF-8?q?=E7=94=A8api=E6=96=B9=E5=BC=8F=E8=8E=B7=E5=8F=96=E4=BF=A1?= =?UTF-8?q?=E6=81=AF=E6=8A=BD=E5=8F=96=E7=BB=93=E6=9E=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config/keys.py | 2 +- photo_review/enumeration/task.py | 26 +++++++ photo_review/photo_review.py | 116 ++++++++++++++----------------- 3 files changed, 79 insertions(+), 65 deletions(-) create mode 100644 photo_review/enumeration/task.py diff --git a/config/keys.py b/config/keys.py index a06c338..8645717 100644 --- a/config/keys.py +++ b/config/keys.py @@ -30,4 +30,4 @@ SETTLEMENT_LIST_SCHEMA = \ DISCHARGE_RECORD_SCHEMA = \ HOSPITAL + DEPARTMENT + PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + DOCTOR -COST_LIST_SCHEMA = PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES \ No newline at end of file +COST_LIST_SCHEMA = PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES diff --git a/photo_review/enumeration/task.py b/photo_review/enumeration/task.py new file mode 100644 index 0000000..b09e8d6 --- /dev/null +++ b/photo_review/enumeration/task.py @@ -0,0 +1,26 @@ +from enum import Enum + +from config.keys import SETTLEMENT_LIST_SCHEMA, DISCHARGE_RECORD_SCHEMA, COST_LIST_SCHEMA + + +class TaskEnum(Enum): + SETTLEMENT = "SETTLEMENT" + DISCHARGE = "DISCHARGE" + COST = "COST" + + def request_url(self): + base_url = "http://192.168.5.9:8000" + if self == TaskEnum.SETTLEMENT: + return base_url + "/nlp/ie/settlement" + elif self == TaskEnum.DISCHARGE: + return base_url + "/nlp/ie/discharge" + elif self == TaskEnum.COST: + return base_url + "/nlp/ie/cost" + + def schema(self): + if self == TaskEnum.SETTLEMENT: + return SETTLEMENT_LIST_SCHEMA + elif self == TaskEnum.DISCHARGE: + return DISCHARGE_RECORD_SCHEMA + elif self == TaskEnum.COST: + return COST_LIST_SCHEMA diff --git a/photo_review/photo_review.py b/photo_review/photo_review.py index 35f6d8a..5c31e18 100644 --- a/photo_review/photo_review.py +++ b/photo_review/photo_review.py @@ -1,4 +1,4 @@ -import json +import concurrent.futures import logging import math import os @@ -10,30 +10,27 @@ import urllib.request import cv2 import numpy as np import paddleclas -from paddlenlp import Taskflow -from paddleocr import PaddleOCR +import requests sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from time import sleep from sqlalchemy import update from config.keys import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \ - PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \ - SETTLEMENT_LIST_SCHEMA, DISCHARGE_RECORD_SCHEMA, COST_LIST_SCHEMA + PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR from config.mysql import MysqlSession -from config.photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, LAYOUT_ANALYSIS +from config.photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES from photo_review.entity.bd_yljg import BdYljg from photo_review.entity.bd_ylks import BdYlks from photo_review.entity.zx_ie_cost import ZxIeCost from photo_review.entity.zx_ie_discharge import ZxIeDischarge from photo_review.entity.zx_ie_settlement import ZxIeSettlement -from photo_review.entity.zx_ocr import ZxOcr from photo_review.entity.zx_phhd import ZxPhhd from photo_review.entity.zx_phrec import ZxPhrec from photo_review.util.data_util import handle_date, handle_decimal, parse_department, handle_name, \ handle_insurance_type, handle_original_data, handle_hospital, handle_department from photo_review.util.util import get_default_datetime -from ucloud import ucloud +from photo_review.enumeration.task import TaskEnum # 获取图片 @@ -178,6 +175,18 @@ def get_ocr_layout(ocr, img_path): return layout +def request_ie_result(task_enum, phrec, identity): + url = task_enum.request_url() + payload = {"image_name": phrec.cfjaddress, "schema": task_enum.schema(), "pk_phhd": phrec.pk_phhd, + "pk_phrec": phrec.pk_phrec, "identity": identity} + response = requests.post(url, json=payload) + + if response.status_code == 200: + return response.json()["data"] + else: + raise Exception(f"请求信息抽取结果失败,状态码:{response.status_code}") + + def ie_temp_image(ie, ocr, image): with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file: cv2.imwrite(temp_file.name, image) @@ -201,39 +210,13 @@ def ie_temp_image(ie, ocr, image): # 关键信息提取 -def information_extraction(ie, phrecs): +def information_extraction(task_enum, phrecs): result = {} # 同一批图的标识 identity = int(time.time()) - ocr = PaddleOCR(use_angle_cls=False, lang="ch", show_log=False) for phrec in phrecs: - pic_path = ucloud.get_private_url(phrec.cfjaddress) - if not pic_path: - continue - split_result = split_image(pic_path) - - for img in split_result: - angles = get_image_rotation_angles(img["img"]) - rotated_img = rotate_image(img["img"], int(angles[0])) - ie_results = [{"result": ie_temp_image(ie, ocr, rotated_img), "angle": angles[0]}] - if not ie_results[0] or len(ie_results[0]) < len(ie.kwargs.get("schema")): - rotated_img = rotate_image(img["img"], int(angles[1])) - ie_results.append({"result": ie_temp_image(ie, ocr, rotated_img), "angle": angles[1]}) - - now = get_default_datetime() - for ie_result in ie_results: - result_json = json.dumps(ie_result["result"], ensure_ascii=False) - if len(result_json) > 5000: - result_json = result_json[:5000] - session = MysqlSession() - zx_ocr = ZxOcr(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, id=identity, cfjaddress=phrec.cfjaddress, - content=result_json, rotation_angle=ie_result["angle"], x_offset=img["x_offset"], - y_offset=img["y_offset"], create_time=now, update_time=now) - session.add(zx_ocr) - session.commit() - session.close() - - result = merge_result(result, ie_result["result"]) + ie_result = request_ie_result(task_enum, phrec, identity) + result = merge_result(result, ie_result) return result @@ -290,27 +273,8 @@ def save_or_update_ie(table, pk_phhd, data): session.close() -def photo_review(pk_phhd): - settlement_list = [] - discharge_record = [] - cost_list = [] - - session = MysqlSession() - phrecs = session.query(ZxPhrec.pk_phrec, ZxPhrec.pk_phhd, ZxPhrec.cRectype, ZxPhrec.cfjaddress) \ - .filter(ZxPhrec.pk_phhd == pk_phhd) \ - .all() - session.close() - for phrec in phrecs: - if phrec.cRectype == "1": - settlement_list.append(phrec) - elif phrec.cRectype == "3": - discharge_record.append(phrec) - elif phrec.cRectype == "4": - cost_list.append(phrec) - - settlement_list_ie_result = information_extraction( - Taskflow("information_extraction", schema=SETTLEMENT_LIST_SCHEMA, model="uie-x-base", - task_path="config/model/settlement_list_model", layout_analysis=LAYOUT_ANALYSIS), settlement_list) +def settlement_task(pk_phhd, settlement_list): + settlement_list_ie_result = information_extraction(TaskEnum.SETTLEMENT, settlement_list) settlement_data = { "pk_phhd": pk_phhd, "name": handle_name(get_best_value_in_keys(settlement_list_ie_result, PATIENT_NAME)), @@ -336,9 +300,9 @@ def photo_review(pk_phhd): settlement_data["personal_funded_amount"] = handle_decimal(settlement_data["personal_funded_amount_str"]) save_or_update_ie(ZxIeSettlement, pk_phhd, settlement_data) - discharge_record_ie_result = information_extraction( - Taskflow("information_extraction", schema=DISCHARGE_RECORD_SCHEMA, model="uie-x-base", - task_path="config/model/discharge_record_model", layout_analysis=LAYOUT_ANALYSIS), discharge_record) + +def discharge_task(pk_phhd, discharge_record): + discharge_record_ie_result = information_extraction(TaskEnum.DISCHARGE, discharge_record) discharge_data = { "pk_phhd": pk_phhd, "hospital": handle_hospital(get_best_value_in_keys(discharge_record_ie_result, HOSPITAL)), @@ -375,9 +339,9 @@ def photo_review(pk_phhd): discharge_data["department"] = ylks.name save_or_update_ie(ZxIeDischarge, pk_phhd, discharge_data) - cost_list_ie_result = information_extraction( - Taskflow("information_extraction", schema=COST_LIST_SCHEMA, model="uie-x-base", - task_path="config/model/cost_list_model", layout_analysis=LAYOUT_ANALYSIS), cost_list) + +def cost_task(pk_phhd, cost_list): + cost_list_ie_result = information_extraction(TaskEnum.COST, cost_list) cost_data = { "pk_phhd": pk_phhd, "name": handle_name(get_best_value_in_keys(cost_list_ie_result, PATIENT_NAME)), @@ -391,6 +355,30 @@ def photo_review(pk_phhd): save_or_update_ie(ZxIeCost, pk_phhd, cost_data) +def photo_review(pk_phhd): + settlement_list = [] + discharge_record = [] + cost_list = [] + + session = MysqlSession() + phrecs = session.query(ZxPhrec.pk_phrec, ZxPhrec.pk_phhd, ZxPhrec.cRectype, ZxPhrec.cfjaddress) \ + .filter(ZxPhrec.pk_phhd == pk_phhd) \ + .all() + session.close() + for phrec in phrecs: + if phrec.cRectype == "1": + settlement_list.append(phrec) + elif phrec.cRectype == "3": + discharge_record.append(phrec) + elif phrec.cRectype == "4": + cost_list.append(phrec) + + with concurrent.futures.ProcessPoolExecutor() as executor: + executor.submit(settlement_task, pk_phhd, settlement_list) + executor.submit(discharge_task, pk_phhd, discharge_record) + executor.submit(cost_task, pk_phhd, cost_list) + + def main(): # 持续检测新案子 while 1: