优化案子处理逻辑

2024-10-09 09:39:29 +08:00
parent a3fa1e502e
commit 795134f566
10 changed files with 257 additions and 304 deletions
--- a/photo_review.py
+++ b/photo_review.py
@@ -5,12 +5,13 @@ from time import sleep
 from sqlalchemy import update
 from my_email.error_email import send_error_email
 from db import MysqlSession
 from db.mysql import ZxPhhd
 from log import LOGGING_CONFIG
 from my_email.error_email import send_error_email
 from photo_review import auto_photo_review, SEND_ERROR_EMAIL
 # 照片审核自动识别脚本入口
 if __name__ == '__main__':
    program_name = '照片审核自动识别脚本'
    logging.config.dictConfig(LOGGING_CONFIG)
@@ -19,7 +20,7 @@ if __name__ == '__main__':
    parser.add_argument('--clean', default=False, type=bool, help='是否将识别中的案子改为待识别状态')
    args = parser.parse_args()
    if args.clean:
-        # 主要用于启动时，清除仍在涂抹中的案子
+        # 启动时清除仍在识别中的案子
        session = MysqlSession()
        update_flag = (update(ZxPhhd).where(ZxPhhd.exsuccess_flag == '2').values(exsuccess_flag='1'))
        session.execute(update_flag)
@@ -33,7 +34,6 @@ if __name__ == '__main__':
        logging.info(f'【{program_name}】开始运行')
        auto_photo_review.main()
    except Exception as e:
-        error_logger = logging.getLogger('error')
+        logging.getLogger('error').error(traceback.format_exc())
        error_logger.error(traceback.format_exc())
        if SEND_ERROR_EMAIL:
            send_error_email(program_name, repr(e), traceback.format_exc())
--- a/photo_review/auto_photo_review.py
+++ b/photo_review/auto_photo_review.py
@@ -1,7 +1,4 @@
 import json
 import logging
 import os
 import tempfile
 import time
 from collections import defaultdict
 from time import sleep
@@ -10,72 +7,24 @@ import cv2
 import fitz
 import jieba
 import numpy as np
 import requests
 import zxingcpp
 from rapidfuzz import process, fuzz
 from sqlalchemy import update
 from db import MysqlSession
-from db.mysql import BdYljg, BdYlks, ZxIeResult, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec, ZxIeReview
+from db.mysql import BdYljg, BdYlks, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec, ZxIeReview
 from log import HOSTNAME
 from photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, \
    DEPARTMENT_FILTER
-from services.paddle_services import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, \
+from services.paddle_services import IE_KEY
    PERSONAL_CASH_PAYMENT, PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, \
    DEPARTMENT, DOCTOR, ADMISSION_ID, SETTLEMENT_ID, AGE, UPPERCASE_MEDICAL_EXPENSES
 from ucloud import ufile
 from util import image_util, common_util, html_util, model_util
 from util.data_util import handle_date, handle_decimal, parse_department, handle_name, handle_insurance_type, \
    handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, parse_hospital
 # 合并信息抽取结果
 def merge_result(result1, result2):
    for key in result2:
        result1[key] = result1.get(key, []) + result2[key]
    return result1
 def ie_temp_image(ie, ocr, image):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
        cv2.imwrite(temp_file.name, image)
    ie_result = []
    try:
        layout = common_util.get_ocr_layout(ocr, temp_file.name)
        if not layout:
            # 无识别结果
            ie_result = []
        else:
            ie_result = ie({"doc": temp_file.name, "layout": layout})[0]
    except Exception as e:
        logging.error("信息抽取时出错", exc_info=e)
    finally:
        try:
            os.remove(temp_file.name)
        except Exception as e:
            logging.info(f"删除临时文件 {temp_file.name} 时出错", exc_info=e)
    return ie_result
 # 关键信息提取
 def request_ie_result(task_enum, phrecs):
    url = task_enum.request_url()
    identity = int(time.time())
    images = []
    for phrec in phrecs:
        images.append({"name": phrec.cfjaddress, "pk": phrec.pk_phrec})
    payload = {"images": images, "schema": task_enum.schema(), "pk_phhd": phrecs[0].pk_phhd, "identity": identity}
    response = requests.post(url, json=payload)
    if response.status_code == 200:
        return response.json()["data"]
    else:
        raise Exception(f"请求信息抽取结果失败，状态码：{response.status_code}")
 # 尝试从二维码中获取高清图片
-def get_better_image_from_qrcode(image, image_id, dpi=150):
+def get_better_image_from_qrcode(img_path, image_id, dpi=150):
    def _parse_pdf_url(pdf_url_to_parse):
        pdf_file = None
        local_pdf_path = None
@@ -95,7 +44,10 @@ def get_better_image_from_qrcode(image, image_id, dpi=150):
            # 将渲染结果转换为OpenCV兼容的格式
            img = np.frombuffer(pix.samples, dtype=np.uint8).reshape((pix.height, pix.width, -1))
            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
-            return img, page.get_text()
+            img_name, img_ext = image_util.parse_save_path(img_path)
            better_img_path = image_util.get_save_path(f'{img_name}.better.{img_ext}')
            cv2.imwrite(better_img_path, img)
            return better_img_path, page.get_text()
        except Exception as ex:
            logging.getLogger('error').error('解析pdf失败！', exc_info=ex)
            return None, None
@@ -107,7 +59,8 @@ def get_better_image_from_qrcode(image, image_id, dpi=150):
    jsczt_base_url = 'http://einvoice.jsczt.cn'
    try:
-        results = zxingcpp.read_barcodes(image)
+        img = cv2.imread(img_path)
        results = zxingcpp.read_barcodes(img, text_mode=zxingcpp.TextMode.HRI)
    except Exception as e:
        logging.getLogger('error').info('二维码识别失败', exc_info=e)
        results = []
@@ -145,106 +98,52 @@ def get_better_image_from_qrcode(image, image_id, dpi=150):
 # 关键信息提取
-def information_extraction(ie, phrecs, identity):
+def information_extraction(phrec, identity):
-    result = {}
+    """
-    for phrec in phrecs:
+    处理单张图片
    :param phrec:
    :return:
    """
    img_path = image_util.get_img_path(phrec.cfjaddress)
    if not img_path:
        img_url = ufile.get_private_url(phrec.cfjaddress)
        if not img_url:
            continue
        img_path = image_util.save_to_local(img_url)
        image = cv2.imread(img_path)
        # 尝试从二维码中获取高清图片
        better_image, text = get_better_image_from_qrcode(image, phrec.cfjaddress)
        if phrec.cRectype != '1':
            better_image = None  # 非结算单暂时不进行替换
        zx_ie_results = []
        if better_image is not None:
            img_angle = '0'
            image = better_image
            if text:
                info_extract = ie(text)[0]
            else:
                info_extract = ie_temp_image(ie, OCR, image)
            if not info_extract:
                continue
-            ie_result = {'result': info_extract, 'angle': img_angle}
+    # 尝试从二维码中获取高清图片
-            now = common_util.get_default_datetime()
+    better_img_path, text = get_better_image_from_qrcode(img_path, phrec.cfjaddress)
-            result_json = json.dumps(ie_result['result'], ensure_ascii=False)
+    if phrec.cRectype != '1':
-            if len(result_json) > 5000:
+        better_img_path = None  # 非结算单暂时不进行替换
-                result_json = result_json[:5000]
+    if better_img_path is not None:
-            zx_ie_results.append(ZxIeResult(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, id=identity,
+        if text:
-                                            cfjaddress=phrec.cfjaddress, content=result_json,
+            info_extract = model_util.ie_settlement_text(text)[0]
                                            rotation_angle=int(ie_result['angle']),
                                            x_offset=0, y_offset=0, create_time=now,
                                            creator=HOSTNAME, update_time=now, updater=HOSTNAME))
            result = merge_result(result, ie_result['result'])
        else:
-            target_images = model_util.request_book_areas(img_path)  # 识别文档区域并裁剪
+            info_extract = model_util.ie_settlement(better_img_path,
-            angle_count = defaultdict(int, {'0': 0})  # 分割后图片的最优角度统计
+                                                    common_util.ocr_result_to_layout(model_util.ocr(better_img_path)))
            for target_image in target_images:
                dewarped_image = model_util.dewarp(target_image)  # 去扭曲
                angles = model_util.clas_orientation(dewarped_image)
-                split_results = image_util.split(dewarped_image)
+        return '基本医保结算单', info_extract
-                for split_result in split_results:
+    else:
-                    if split_result['img'] is None or split_result['img'].size == 0:
+        target_image = model_util.det_book(img_path)  # 识别文档区域并裁剪
-                        continue
+        dewarped_image = model_util.dewarp(target_image)  # 去扭曲
-                    rotated_img = image_util.rotate(split_result['img'], int(angles[0]))
+        angles = model_util.clas_orientation(dewarped_image)
-                    ie_results = [{'result': ie_temp_image(ie, OCR, rotated_img), 'angle': angles[0]}]
+        rotated_img = image_util.rotate(dewarped_image, int(angles[0]))
-                    if not ie_results[0]['result'] or len(ie_results[0]['result']) < len(ie.kwargs.get('schema')):
+        split_results = image_util.split(rotated_img)
-                        rotated_img = image_util.rotate(split_result['img'], int(angles[1]))
+        ocr_result = []
-                        ie_results.append({'result': ie_temp_image(ie, OCR, rotated_img), 'angle': angles[1]})
+        for split_result in split_results:
-                    now = common_util.get_default_datetime()
+            if split_result['img'] is None:
-                    best_angle = ['0', 0]
+                continue
-                    for ie_result in ie_results:
+            ocr_result += model_util.ocr(rotated_img)
-                        if not ie_result['result']:
+        ocr_text = common_util.ocr_result_to_text(ocr_result)
-                            continue
+        rec_type = model_util.clas_text(ocr_text) if ocr_text else None
        if rec_type == '基本医保结算单':
            info_extract = model_util.ie_settlement(rotated_img, common_util.ocr_result_to_layout(ocr_result))
        elif rec_type == '出院记录':
            info_extract = model_util.ie_discharge(rotated_img, common_util.ocr_result_to_layout(ocr_result))
        elif rec_type == '费用清单':
            info_extract = model_util.ie_cost(rotated_img, common_util.ocr_result_to_layout(ocr_result))
        else:
            info_extract = None
-                        result_json = json.dumps(ie_result['result'], ensure_ascii=False)
+        return rec_type, info_extract
                        if len(result_json) > 5000:
                            result_json = result_json[:5000]
                        zx_ie_results.append(ZxIeResult(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, id=identity,
                                                        cfjaddress=phrec.cfjaddress, content=result_json,
                                                        rotation_angle=int(ie_result['angle']),
                                                        x_offset=split_result['x_offset'],
                                                        y_offset=split_result['y_offset'], create_time=now,
                                                        creator=HOSTNAME, update_time=now, updater=HOSTNAME))
                        result = merge_result(result, ie_result['result'])
                        if len(ie_result['result']) > best_angle[1]:
                            best_angle = [ie_result['angle'], len(ie_result['result'])]
                    angle_count[best_angle[0]] += 1
            img_angle = max(angle_count, key=angle_count.get)
        if img_angle != '0' or better_image is not None:
            image = image_util.rotate(image, int(img_angle))
            with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as temp_file:
                cv2.imwrite(temp_file.name, image)
            try:
                ufile.upload_file(phrec.cfjaddress, temp_file.name)
                if img_angle != '0':
                    logging.info(f'旋转图片[{phrec.cfjaddress}]替换成功，已旋转{img_angle}度。')
                    # 修正旋转角度
                    for zx_ie_result in zx_ie_results:
                        zx_ie_result.rotation_angle -= int(img_angle)
                else:
                    logging.info(f'高清图片[{phrec.cfjaddress}]替换成功！')
            except Exception as e:
                logging.error(f'上传图片({phrec.cfjaddress})失败', exc_info=e)
            finally:
                common_util.delete_temp_file(temp_file.name)
        session = MysqlSession()
        session.add_all(zx_ie_results)
        session.commit()
        session.close()
    return result
 # 从keys中获取准确率最高的value
@@ -359,23 +258,24 @@ def search_department(department):
    return best_match
-def settlement_task(pk_phhd, settlement_list, identity):
+def settlement_task(pk_phhd, settlement_list_ie_result):
    settlement_list_ie_result = information_extraction(SETTLEMENT_IE, settlement_list, identity)
    settlement_data = {
        "pk_phhd": pk_phhd,
-        "name": handle_name(get_best_value_in_keys(settlement_list_ie_result, PATIENT_NAME)),
+        "name": handle_name(get_best_value_in_keys(settlement_list_ie_result, IE_KEY['name'])),
-        "admission_date_str": handle_original_data(get_best_value_in_keys(settlement_list_ie_result, ADMISSION_DATE)),
+        "admission_date_str": handle_original_data(
-        "discharge_date_str": handle_original_data(get_best_value_in_keys(settlement_list_ie_result, DISCHARGE_DATE)),
+            get_best_value_in_keys(settlement_list_ie_result, IE_KEY['admission_date'])),
        "discharge_date_str": handle_original_data(
            get_best_value_in_keys(settlement_list_ie_result, IE_KEY['discharge_date'])),
        "personal_cash_payment_str": handle_original_data(
-            get_best_value_in_keys(settlement_list_ie_result, PERSONAL_CASH_PAYMENT)),
+            get_best_value_in_keys(settlement_list_ie_result, IE_KEY['personal_cash_payment'])),
        "personal_account_payment_str": handle_original_data(
-            get_best_value_in_keys(settlement_list_ie_result, PERSONAL_ACCOUNT_PAYMENT)),
+            get_best_value_in_keys(settlement_list_ie_result, IE_KEY['personal_account_payment'])),
        "personal_funded_amount_str": handle_original_data(
-            get_best_value_in_keys(settlement_list_ie_result, PERSONAL_FUNDED_AMOUNT)),
+            get_best_value_in_keys(settlement_list_ie_result, IE_KEY['personal_funded_amount'])),
        "medical_insurance_type_str": handle_original_data(
-            get_best_value_in_keys(settlement_list_ie_result, MEDICAL_INSURANCE_TYPE)),
+            get_best_value_in_keys(settlement_list_ie_result, IE_KEY['medical_insurance_type'])),
-        "admission_id": handle_id(get_best_value_in_keys(settlement_list_ie_result, ADMISSION_ID)),
+        "admission_id": handle_id(get_best_value_in_keys(settlement_list_ie_result, IE_KEY['admission_id'])),
-        "settlement_id": handle_id(get_best_value_in_keys(settlement_list_ie_result, SETTLEMENT_ID)),
+        "settlement_id": handle_id(get_best_value_in_keys(settlement_list_ie_result, IE_KEY['settlement_id'])),
    }
    settlement_data["admission_date"] = handle_date(settlement_data["admission_date_str"])
    settlement_data["admission_date"] = handle_date(settlement_data["admission_date_str"])
@@ -385,28 +285,30 @@ def settlement_task(pk_phhd, settlement_list, identity):
    settlement_data["personal_funded_amount"] = handle_decimal(settlement_data["personal_funded_amount_str"])
    settlement_data["medical_insurance_type"] = handle_insurance_type(settlement_data["medical_insurance_type_str"])
-    parse_money_result = parse_money(get_best_value_in_keys(settlement_list_ie_result, UPPERCASE_MEDICAL_EXPENSES),
+    parse_money_result = parse_money(
-                                     get_best_value_in_keys(settlement_list_ie_result, MEDICAL_EXPENSES))
+        get_best_value_in_keys(settlement_list_ie_result, IE_KEY['upper_case_medical_expenses']),
        get_best_value_in_keys(settlement_list_ie_result, IE_KEY['medical_expenses']))
    settlement_data["medical_expenses_str"] = handle_original_data(parse_money_result[0])
    settlement_data["medical_expenses"] = parse_money_result[1]
    save_or_update_ie(ZxIeSettlement, pk_phhd, settlement_data)
    return settlement_data
-def discharge_task(pk_phhd, discharge_record, identity):
+def discharge_task(pk_phhd, discharge_record_ie_result):
-    discharge_record_ie_result = information_extraction(DISCHARGE_IE, discharge_record, identity)
+    hospitals = get_values_of_keys(discharge_record_ie_result, IE_KEY['hospital'])
-    hospitals = get_values_of_keys(discharge_record_ie_result, HOSPITAL)
+    departments = get_values_of_keys(discharge_record_ie_result, IE_KEY['department'])
    departments = get_values_of_keys(discharge_record_ie_result, DEPARTMENT)
    discharge_data = {
        "pk_phhd": pk_phhd,
        "hospital": handle_hospital(",".join(hospitals)),
        "department": handle_department(",".join(departments)),
-        "name": handle_name(get_best_value_in_keys(discharge_record_ie_result, PATIENT_NAME)),
+        "name": handle_name(get_best_value_in_keys(discharge_record_ie_result, IE_KEY['name'])),
-        "admission_date_str": handle_original_data(get_best_value_in_keys(discharge_record_ie_result, ADMISSION_DATE)),
+        "admission_date_str": handle_original_data(
-        "discharge_date_str": handle_original_data(get_best_value_in_keys(discharge_record_ie_result, DISCHARGE_DATE)),
+            get_best_value_in_keys(discharge_record_ie_result, IE_KEY['admission_date'])),
-        "doctor": handle_name(get_best_value_in_keys(discharge_record_ie_result, DOCTOR)),
+        "discharge_date_str": handle_original_data(
-        "admission_id": handle_id(get_best_value_in_keys(discharge_record_ie_result, ADMISSION_ID)),
+            get_best_value_in_keys(discharge_record_ie_result, IE_KEY['discharge_date'])),
-        "age": handle_age(get_best_value_in_keys(discharge_record_ie_result, AGE)),
+        "doctor": handle_name(get_best_value_in_keys(discharge_record_ie_result, IE_KEY['doctor'])),
        "admission_id": handle_id(get_best_value_in_keys(discharge_record_ie_result, IE_KEY['admission_id'])),
        "age": handle_age(get_best_value_in_keys(discharge_record_ie_result, IE_KEY['age'])),
    }
    discharge_data["admission_date"] = handle_date(discharge_data["admission_date_str"])
    discharge_data["discharge_date"] = handle_date(discharge_data["discharge_date_str"])
@@ -466,14 +368,16 @@ def discharge_task(pk_phhd, discharge_record, identity):
    return discharge_data
-def cost_task(pk_phhd, cost_list, identity):
+def cost_task(pk_phhd, cost_list_ie_result):
    cost_list_ie_result = information_extraction(COST_IE, cost_list, identity)
    cost_data = {
        "pk_phhd": pk_phhd,
-        "name": handle_name(get_best_value_in_keys(cost_list_ie_result, PATIENT_NAME)),
+        "name": handle_name(get_best_value_in_keys(cost_list_ie_result, IE_KEY['name'])),
-        "admission_date_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, ADMISSION_DATE)),
+        "admission_date_str": handle_original_data(
-        "discharge_date_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, DISCHARGE_DATE)),
+            get_best_value_in_keys(cost_list_ie_result, IE_KEY['admission_date'])),
-        "medical_expenses_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, MEDICAL_EXPENSES))
+        "discharge_date_str": handle_original_data(
            get_best_value_in_keys(cost_list_ie_result, IE_KEY['discharge_date'])),
        "medical_expenses_str": handle_original_data(
            get_best_value_in_keys(cost_list_ie_result, IE_KEY['medical_expenses']))
    }
    cost_data["admission_date"] = handle_date(cost_data["admission_date_str"])
    cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"])
@@ -483,28 +387,39 @@ def cost_task(pk_phhd, cost_list, identity):
 def photo_review(pk_phhd, name):
-    settlement_list = []
+    """
-    discharge_record = []
+    处理单个报销案子
-    cost_list = []
+    :param pk_phhd: 报销单主键
    :param name: 报销人姓名
    """
    settlement_result = defaultdict(list)
    discharge_result = defaultdict(list)
    cost_result = defaultdict(list)
    session = MysqlSession()
-    phrecs = session.query(ZxPhrec.pk_phrec, ZxPhrec.pk_phhd, ZxPhrec.cRectype, ZxPhrec.cfjaddress).filter(
+    phrecs = session.query(ZxPhrec.pk_phrec, ZxPhrec.cRectype, ZxPhrec.cfjaddress).filter(
        ZxPhrec.pk_phhd == pk_phhd
    ).all()
    session.close()
    for phrec in phrecs:
        if phrec.cRectype == "1":
            settlement_list.append(phrec)
        elif phrec.cRectype == "3":
            discharge_record.append(phrec)
        elif phrec.cRectype == "4":
            cost_list.append(phrec)
    # 同一批图的标识
    identity = int(time.time())
-    settlement_data = settlement_task(pk_phhd, settlement_list, identity)
+    for phrec in phrecs:
-    discharge_data = discharge_task(pk_phhd, discharge_record, identity)
+        rec_type, ie_result = information_extraction(phrec, identity)
-    cost_data = cost_task(pk_phhd, cost_list, identity)
+        if rec_type == '基本医保结算单':
            rec_result = settlement_result
        elif rec_type == '出院记录':
            rec_result = discharge_result
        elif rec_type == '费用清单':
            rec_result = cost_result
        else:
            rec_result = None
        if rec_result:
            for key, value in ie_result.items():
                rec_result[key].append(value)
    settlement_data = settlement_task(pk_phhd, settlement_result)
    discharge_data = discharge_task(pk_phhd, discharge_result)
    cost_data = cost_task(pk_phhd, cost_result)
    review_result = {
        'pk_phhd': pk_phhd,
@@ -573,6 +488,9 @@ def photo_review(pk_phhd, name):
 def main():
    """
    照片审核批量控制
    """
    while 1:
        session = MysqlSession()
        phhds = (session.query(ZxPhhd.pk_phhd, ZxPhhd.cXm)
--- a/services/paddle_services/init.py
+++ b/services/paddle_services/init.py
@@ -1,34 +1,20 @@
 """
 信息抽取关键词配置
 """
-
+IE_KEY = {
-# 患者姓名
+    'name': '患者姓名',
-PATIENT_NAME = ['患者姓名']
+    'admission_date': '入院日期',
-# 入院日期
+    'discharge_date': '出院日期',
-ADMISSION_DATE = ['入院日期']
+    'medical_expenses': '费用总额',
-# 出院日期
+    'personal_cash_payment': '个人现金支付',
-DISCHARGE_DATE = ['出院日期']
+    'personal_account_payment': '个人账户支付',
-# 发生医疗费
+    'personal_funded_amount': '自费金额',
-MEDICAL_EXPENSES = ['费用总额']
+    'medical_insurance_type': '医保类型',
-# 个人现金支付
+    'hospital': '医院',
-PERSONAL_CASH_PAYMENT = ['个人现金支付']
+    'department': '科室',
-# 个人账户支付
+    'doctor': '主治医生',
-PERSONAL_ACCOUNT_PAYMENT = ['个人账户支付']
+    'admission_id': '住院号',
-# 个人自费金额
+    'settlement_id': '医保结算单号码',
-PERSONAL_FUNDED_AMOUNT = ['自费金额', '个人自费']
+    'age': '年龄',
-# 医保类别
+    'upper_case_medical_expenses': '大写总额'
-MEDICAL_INSURANCE_TYPE = ['医保类型']
+}
 # 就诊医院
 HOSPITAL = ['医院']
 # 就诊科室
 DEPARTMENT = ['科室']
 # 主治医生
 DOCTOR = ['主治医生']
 # 住院号
 ADMISSION_ID = ['住院号']
 # 医保结算单号码
 SETTLEMENT_ID = ['医保结算单号码']
 # 年龄
 AGE = ['年龄']
 # 大写总额
 UPPERCASE_MEDICAL_EXPENSES = ['大写总额']
--- a/services/paddle_services/clas_text.py
+++ b/services/paddle_services/clas_text.py
@@ -19,7 +19,8 @@ def main():
    cls_result = CLAS(text)
    cls_result = cls_result[0].get('predictions')[0]
    if cls_result['score'] < 0.8:
-        raise Exception(f'识别结果置信度过低！text: {text}')
+        logging.info(f"识别结果置信度{cls_result['score']}过低！text: {text}")
        return None
    return cls_result['label']
--- a/services/paddle_services/ie_cost.py
+++ b/services/paddle_services/ie_cost.py
@@ -4,12 +4,14 @@ import logging.config
 from flask import Flask, request
 from paddlenlp import Taskflow
-from __init__ import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES
+from __init__ import IE_KEY
 from log import LOGGING_CONFIG
 from utils import process_request
 app = Flask(__name__)
-COST_LIST_SCHEMA = PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES
+COST_LIST_SCHEMA = tuple(IE_KEY[key] for key in [
    'name', 'admission_date', 'discharge_date', 'medical_expenses'
 ])
 COST = Taskflow('information_extraction', schema=COST_LIST_SCHEMA, model='uie-x-base',
                task_path='model/cost_list_model', layout_analysis=False, precision='fp16')
--- a/services/paddle_services/ie_discharge.py
+++ b/services/paddle_services/ie_discharge.py
@@ -4,14 +4,14 @@ import logging.config
 from flask import Flask, request
 from paddlenlp import Taskflow
-from __init__ import HOSPITAL, DEPARTMENT, PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, DOCTOR, ADMISSION_ID, AGE
+from __init__ import IE_KEY
 from log import LOGGING_CONFIG
 from utils import process_request
 app = Flask(__name__)
-DISCHARGE_RECORD_SCHEMA = (
+DISCHARGE_RECORD_SCHEMA = tuple(IE_KEY[key] for key in [
-        HOSPITAL + DEPARTMENT + PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + DOCTOR + ADMISSION_ID + AGE
+    'hospital', 'department', 'name', 'admission_date', 'discharge_date', 'doctor', 'admission_id', 'age'
-)
+])
 DISCHARGE = Taskflow('information_extraction', schema=DISCHARGE_RECORD_SCHEMA, model='uie-x-base',
                     task_path='model/discharge_record_model', layout_analysis=False, precision='fp16')
--- a/services/paddle_services/ie_settlement.py
+++ b/services/paddle_services/ie_settlement.py
@@ -4,18 +4,16 @@ import logging.config
 from flask import Flask, request
 from paddlenlp import Taskflow
-from __init__ import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
+from __init__ import IE_KEY
    PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, ADMISSION_ID, SETTLEMENT_ID, \
    UPPERCASE_MEDICAL_EXPENSES
 from log import LOGGING_CONFIG
 from utils import process_request
 app = Flask(__name__)
-SETTLEMENT_LIST_SCHEMA = (
+SETTLEMENT_LIST_SCHEMA = tuple(IE_KEY[key] for key in [
-        PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES + PERSONAL_CASH_PAYMENT
+    'name', 'admission_date', 'discharge_date', 'medical_expenses', 'personal_cash_payment',
-        + PERSONAL_ACCOUNT_PAYMENT + PERSONAL_FUNDED_AMOUNT + MEDICAL_INSURANCE_TYPE + ADMISSION_ID + SETTLEMENT_ID
+    'personal_account_payment', 'personal_funded_amount', 'medical_insurance_type', 'admission_id', 'settlement_id',
-        + UPPERCASE_MEDICAL_EXPENSES
+    'uppercase_medical_expenses'
-)
+])
 SETTLEMENT_IE = Taskflow('information_extraction', schema=SETTLEMENT_LIST_SCHEMA, model='uie-x-base',
                         task_path='model/settlement_list_model', layout_analysis=False, precision='fp16')
--- a/util/common_util.py
+++ b/util/common_util.py
@@ -12,6 +12,44 @@ def get_default_datetime():
    return datetime.now().strftime('%Y-%m-%d %H:%M:%S')
 def ocr_result_to_layout(ocr_result):
    def _get_box(old_box):
        new_box = [
            min(old_box[0][0], old_box[3][0]),  # x1
            min(old_box[0][1], old_box[1][1]),  # y1
            max(old_box[1][0], old_box[2][0]),  # x2
            max(old_box[2][1], old_box[3][1]),  # y2
        ]
        return new_box
    def _normal_box(box_data):
        # Ensure the height and width of bbox are greater than zero
        if box_data[3] - box_data[1] < 0 or box_data[2] - box_data[0] < 0:
            return False
        return True
    layout = []
    if not ocr_result:
        return layout
    for segment in ocr_result:
        box = segment[0]
        box = _get_box(box)
        if not _normal_box(box):
            continue
        text = segment[1][0]
        layout.append((box, text))
    return layout
 def ocr_result_to_text(ocr_results):
    text = ''
    for ocr_result in ocr_results:
        text += ocr_result[1][0]
        if len(text) >= 2048:
            break
    return text[:2048]
 def get_ocr_layout(ocr, img_path):
    """
    获取ocr识别的结果，转为合适的layout形式
--- a/util/image_util.py
+++ b/util/image_util.py
@@ -1,7 +1,6 @@
 import logging
 import math
 import os
 import urllib.request
 import cv2
 import numpy
@@ -12,80 +11,59 @@ from tenacity import retry, stop_after_attempt, wait_random
 from log import PROJECT_ROOT
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
       after=lambda x: logging.warning('获取图片失败！'))
 def read(image_path):
    """
    从网络或本地读取图片
    :param image_path: 网络或本地路径
    :return: NumPy数组形式的图片
    """
    if image_path.startswith('http'):
        # 发送HTTP请求并获取图像数据
        resp = urllib.request.urlopen(image_path, timeout=60)
        # 将数据读取为字节流
        image_data = resp.read()
        # 将字节流转换为NumPy数组
        image_np = numpy.frombuffer(image_data, numpy.uint8)
        # 解码NumPy数组为OpenCV图像格式
        image = cv2.imdecode(image_np, cv2.IMREAD_COLOR)
    else:
        image = cv2.imread(image_path)
    return image
 def capture(image, rectangle):
    """
    截取图片
-    :param image: 图片NumPy数组
+    :param image: ndarray
    :param rectangle: 要截取的矩形
-    :return: 截取之后的图片NumPy
+    :return: 截取之后的ndarray图片
    """
    x1, y1, x2, y2 = rectangle
    height, width = image.shape[:2]
-    if x1 < 0:
+    # 确保坐标值在图片范围内
-        x1 = 0
+    x1 = max(0, x1)
-    if y1 < 0:
+    y1 = max(0, y1)
-        y1 = 0
+    x2 = min(width, x2)
-    if x2 > width:
+    y2 = min(height, y2)
        x2 = width
    if y2 > height:
        y2 = height
    return image[int(y1):int(y2), int(x1):int(x2)]
-def split(image, ratio=1.414, overlap=0.05, x_compensation=3):
+def split(img_path, ratio=1.414, overlap=0.05, x_compensation=3):
    """
    分割图片
-    :param image:图片，可以是NumPy数组或文件路径
+    :param img_path:图片路径
    :param ratio: 分割后的比例
    :param overlap: 图片之间的覆盖比例
    :param x_compensation: 横向补偿倍率
    :return: 分割后的图片组(NumPy数组形式)
    """
    split_result = []
-    if isinstance(image, str):
+    image = cv2.imread(img_path)
        image = read(image)
    height, width = image.shape[:2]
    hw_ratio = height / width
    wh_ratio = width / height
    img_name, img_ext = parse_save_path(img_path)
    if hw_ratio > ratio:  # 纵向过长
        new_img_height = width * ratio
        step = width * (ratio - overlap)  # 偏移步长
        for i in range(math.ceil(height / step)):
            offset = round(step * i)
            cropped_img = capture(image, [0, offset, width, offset + new_img_height])
-            split_result.append({'img': cropped_img, 'x_offset': 0, 'y_offset': offset})
+            split_path = get_save_path(f'{img_name}.split_{i}.{img_ext}')
            cv2.imwrite(split_path, cropped_img)
            split_result.append({'img': split_path, 'x_offset': 0, 'y_offset': offset})
    elif wh_ratio > ratio:  # 横向过长
        new_img_width = height * ratio
        step = height * (ratio - overlap * x_compensation)  # 一般文字是横向的，所以横向截取时增大重叠部分
        for i in range(math.ceil(width / step)):
            offset = round(step * i)
            cropped_img = capture(image, [offset, 0, offset + new_img_width, width])
-            split_result.append({'img': cropped_img, 'x_offset': offset, 'y_offset': 0})
+            split_path = get_save_path(f'{img_name}.split_{i}.{img_ext}')
            cv2.imwrite(split_path, cropped_img)
            split_result.append({'img': split_path, 'x_offset': offset, 'y_offset': 0})
    else:
-        split_result.append({'img': image, 'x_offset': 0, 'y_offset': 0})
+        split_result.append({'img': img_path, 'x_offset': 0, 'y_offset': 0})
    return split_result
@@ -108,15 +86,16 @@ def parse_rotation_angles(image):
    return angles
-def rotate(image, angle):
+def rotate(img_path, angle):
    """
    旋转图片
-    :param image: 图片NumPy数组
+    :param img_path: 图片NumPy数组
    :param angle: 逆时针旋转角度
    :return: 旋转后的图片NumPy数组
    """
    if angle == 0:
-        return image
+        return img_path
    image = cv2.imread(img_path)
    height, width = image.shape[:2]
    if angle == 180:
        new_width = width
@@ -132,7 +111,11 @@ def rotate(image, angle):
    matrix[1, 2] += (new_height - height) / 2
    # 参数：原始图像 旋转参数 元素图像宽高
    rotated = cv2.warpAffine(image, matrix, (new_width, new_height))
-    return rotated
+
    img_name, img_ext = parse_save_path(img_path)
    rotated_path = get_save_path(f'{img_name}.rotate_{angle}.{img_ext}')
    cv2.imwrite(rotated_path, rotated)
    return rotated_path
 def invert_rotate_point(point, center, angle):
@@ -260,26 +243,38 @@ def parse_img_url(url):
    :return: 图片名称和图片后缀
    """
    url = url.split('?')[0]
-    return os.path.basename(url).rsplit('.', 1)
+    return os.path.basename(url)
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
       after=lambda x: logging.warning('保存图片失败！'))
-def save_to_local(img_url, save_path=None):
+def save_to_local(img_url):
    """
    保存图片到本地
    :param img_url: 图片url
    :param save_path: 本地保存地址，精确到文件名
    :return: 本地保存地址
    """
    response = requests.get(img_url)
    response.raise_for_status()  # 检查响应状态码是否正常
-    if save_path is None:
+    save_path = get_save_path(parse_img_url(img_url))
        img_name, img_ext = parse_img_url(img_url)
        save_path = os.path.join(PROJECT_ROOT, 'tmp_img', img_name + '.' + img_ext)
    with open(save_path, 'wb') as file:
        file.write(response.content)
    return save_path
 def get_img_path(img_full_name):
    save_path = get_save_path(img_full_name)
    if os.path.exists(save_path):
        return save_path
    return None
 def get_save_path(img_full_name):
    return os.path.join(PROJECT_ROOT, 'tmp_img', img_full_name)
 def parse_save_path(img_path):
    img_full_name = os.path.basename(img_path)
    img_name, img_ext = img_full_name.rsplit('.', 1)
    return img_name, img_ext
--- a/util/model_util.py
+++ b/util/model_util.py
@@ -1,5 +1,6 @@
 import json
 import logging
 import os.path
 import requests
 from tenacity import retry, stop_after_attempt, wait_random
@@ -16,9 +17,10 @@ def ocr(img_path):
    url = 'http://ocr:5001'
    response = requests.post(url, {'img_path': img_path})
    if response.status_code == 200:
-        return response.json()
+        ocr_result = response.json()
-    else:
+        if ocr_result:
-        return None
+            return ocr_result[0]
    return None
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
@@ -40,7 +42,7 @@ def ie_settlement(img_path, layout):
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
       after=lambda x: logging.warning('从文本抽取基本医保结算单失败！'))
-def ie_settlement(text):
+def ie_settlement_text(text):
    """
    请求基本医保结算单信息抽取接口
    :param text: 待抽取文本
@@ -73,7 +75,7 @@ def ie_discharge(img_path, layout):
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
       after=lambda x: logging.warning('从文本抽取出院记录失败！'))
-def ie_discharge(text):
+def ie_discharge_text(text):
    """
    请求出院记录信息抽取接口
    :param text: 待抽取文本
@@ -106,7 +108,7 @@ def ie_cost(img_path, layout):
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
       after=lambda x: logging.warning('从文本抽取费用清单失败！'))
-def ie_cost(text):
+def ie_cost_text(text):
    """
    请求费用清单信息抽取接口
    :param text: 待抽取文本
@@ -147,9 +149,22 @@ def det_book(img_path):
    url = 'http://det_book:5006'
    response = requests.post(url, {'img_path': img_path})
    if response.status_code == 200:
-        return response.json()
+        book_path_list = response.json()
        if len(book_path_list) == 0:
            return img_path
        elif len(book_path_list) == 1:
            return book_path_list[0]
        else:
            max_book = img_path
            max_size = 0
            for book_path in book_path_list:
                book_size = os.path.getsize(book_path)
                if book_size > max_size:
                    max_book = book_path
                    max_size = book_size
            return max_book
    else:
-        return [img_path]
+        return img_path
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,