修正heic图片保存问题

删除多余导入
清除临时图片另外写脚本执行
2024-10-21 09:32:39 +08:00 · 2024-10-21 09:02:30 +08:00 · 2024-10-21 08:45:39 +08:00 · 2024-10-21 08:37:25 +08:00 · 2024-10-18 16:28:52 +08:00 · 2024-10-18 16:21:39 +08:00
2111 changed files with 2011 additions and 1051 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -238,8 +238,11 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
-# 模型通过卷绑定挂载到容器中
+# 通过卷绑定挂载到容器中
-/model
+/log
 /services/paddle_services/log
 /services/paddle_services/model
 /tmp_img
 # docker
 Dockerfile
 docker-compose*.yml
--- a/.gitignore
+++ b/.gitignore
@@ -142,7 +142,11 @@ cython_debug/
 .idea
 ### Model
-model
+services/paddle_services/model
 ### Log Backups
-*.log.*-*-*
+*.log.*-*-*
 ### Tmp Files
 /tmp_img
 /test_img
--- a/11
+++ b/11
@@ -1,5 +1,5 @@
-# 使用官方的paddle镜像作为基础
+# 使用官方的python镜像作为基础
-FROM registry.baidubce.com/paddlepaddle/paddle:2.6.1-gpu-cuda12.0-cudnn8.9-trt8.6
+FROM python:3.10.15-bookworm
 # 设置工作目录
 WORKDIR /app
@@ -13,11 +13,10 @@ ENV PYTHONUNBUFFERED=1 \
 # 安装依赖
 COPY requirements.txt /app/requirements.txt
 COPY packages /app/packages
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo '$TZ' > /etc/timezone \
-    && pip install --no-cache-dir -r requirements.txt \
+    && sed -i 's|deb.debian.org|mirrors.aliyun.com|g' /etc/apt/sources.list.d/debian.sources \
-    && pip uninstall -y onnxruntime onnxruntime-gpu \
+    && apt-get update && apt-get install libgl1 -y \
-    && pip install onnxruntime-gpu==1.18.0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
+    && pip install --no-cache-dir -r requirements.txt
 # 将当前目录内容复制到容器的/app内
 COPY . /app
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
 1. 从Git远程仓库克隆项目到本地。
-2. 将深度学习模型复制到./model目录下，具体请看[模型更新](#模型更新)部分。
+2. 将深度学习模型复制到./services/paddle_services/model目录下，具体请看[模型更新](#模型更新)部分。
 3. 安装docker和docker-compose。
@@ -125,4 +125,6 @@ bash update.sh
    1. 新增文档检测功能
    2. 新增扭曲矫正功能
 21. 版本号：1.14.0
-    1. 新增二维码识别替换高清图片功能
+    1. 新增二维码识别替换高清图片功能
 22. 版本号：2.0.0
    1. 项目架构调整，模型全部采用接口调用
--- a/api_test.py
+++ b/api_test.py
@@ -0,0 +1,4 @@
 import time
 if __name__ == '__main__':
    time.sleep(3600)
--- a/auto_generator.py
+++ b/auto_generator.py
@@ -1,15 +0,0 @@
 # 自动生成数据库表和sqlalchemy对应的Model
 import subprocess
 from db import DB_URL
 if __name__ == '__main__':
    table = input("请输入表名：")
    out_file = f"db/{table}.py"
    command = f"sqlacodegen {DB_URL} --outfile={out_file} --tables={table}"
    try:
        subprocess.run(command, shell=True, check=True)
        print(f"{table}.py文件生成成功！请检查并复制到合适的文件中！")
    except Exception as e:
        print(f"生成{table}.py文件时发生错误: {e}")
--- a/check_ie_result/init.py
+++ b/check_ie_result/init.py
@@ -1 +0,0 @@
--- a/check_ie_result/check_ie_result.py
+++ b/check_ie_result/check_ie_result.py
@@ -1,102 +0,0 @@
 import datetime
 import json
 import os
 from decimal import Decimal
 from io import BytesIO
 from itertools import groupby
 import requests
 from PIL import ImageDraw, Image, ImageFont
 from db import MysqlSession
 from db.mysql import ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxIeResult, ZxPhrec
 from ucloud import ufile
 from util import image_util
 def check_ie_result(pk_phhd):
    os.makedirs(f"./check_result/{pk_phhd}", exist_ok=True)
    json_result = {"pk_phhd": pk_phhd}
    session = MysqlSession()
    phhd = session.query(ZxPhhd.cXm).filter(ZxPhhd.pk_phhd == pk_phhd).one()
    json_result["cXm"] = phhd.cXm
    settlement = (session.query(ZxIeSettlement.pk_ie_settlement, ZxIeSettlement.name, ZxIeSettlement.admission_date,
                                ZxIeSettlement.discharge_date, ZxIeSettlement.medical_expenses,
                                ZxIeSettlement.personal_cash_payment, ZxIeSettlement.personal_account_payment,
                                ZxIeSettlement.personal_funded_amount, ZxIeSettlement.medical_insurance_type,
                                ZxIeSettlement.admission_id, ZxIeSettlement.settlement_id)
                  .filter(ZxIeSettlement.pk_phhd == pk_phhd).one())
    settlement_result = settlement._asdict()
    json_result["settlement"] = settlement_result
    discharge = (session.query(ZxIeDischarge.pk_ie_discharge, ZxIeDischarge.hospital, ZxIeDischarge.pk_yljg,
                               ZxIeDischarge.department, ZxIeDischarge.pk_ylks, ZxIeDischarge.name, ZxIeDischarge.age,
                               ZxIeDischarge.admission_date, ZxIeDischarge.discharge_date, ZxIeDischarge.doctor,
                               ZxIeDischarge.admission_id)
                 .filter(ZxIeDischarge.pk_phhd == pk_phhd).one())
    discharge_result = discharge._asdict()
    json_result["discharge"] = discharge_result
    cost = session.query(ZxIeCost.pk_ie_cost, ZxIeCost.name, ZxIeCost.admission_date, ZxIeCost.discharge_date,
                         ZxIeCost.medical_expenses).filter(ZxIeCost.pk_phhd == pk_phhd).one()
    cost_result = cost._asdict()
    json_result["cost"] = cost_result
    phrecs = session.query(ZxPhrec.pk_phrec, ZxPhrec.pk_phhd, ZxPhrec.cRectype, ZxPhrec.cfjaddress).filter(
        ZxPhrec.pk_phhd == pk_phhd).all()
    for phrec in phrecs:
        img_name = phrec.cfjaddress
        img_path = ufile.get_private_url(img_name)
        response = requests.get(img_path)
        image = Image.open(BytesIO(response.content)).convert("RGB")
        font_size = image.width * image.height / 200000
        font = ImageFont.truetype("./font/simfang.ttf", size=font_size)
        ocr = session.query(ZxIeResult.id, ZxIeResult.content, ZxIeResult.rotation_angle, ZxIeResult.x_offset,
                            ZxIeResult.y_offset).filter(ZxIeResult.pk_phrec == phrec.pk_phrec).all()
        if not ocr:
            os.makedirs(f"./check_result/{pk_phhd}/0", exist_ok=True)
            image.save(f"./check_result/{pk_phhd}/0/{img_name}")
        for _, group_results in groupby(ocr, key=lambda x: x.id):
            draw = ImageDraw.Draw(image)
            for ocr_item in group_results:
                result = json.loads(ocr_item.content)
                rotation_angle = ocr_item.rotation_angle
                x_offset = ocr_item.x_offset
                y_offset = ocr_item.y_offset
                for key in result:
                    for value in result[key]:
                        box = value["bbox"][0]
                        if rotation_angle:
                            box = image_util.invert_rotate_rectangle(box, (image.width / 2, image.height / 2),
                                                                     rotation_angle)
                        if x_offset:
                            box[0] += x_offset
                            box[2] += x_offset
                        if y_offset:
                            box[1] += y_offset
                            box[3] += y_offset
                        draw.rectangle(box, outline="red", width=2)  # 绘制矩形
                        draw.text((box[0], box[1] - font_size), key, fill="blue", font=font)  # 在矩形上方绘制文本
                        draw.text((box[0], box[3]), value["text"], fill="blue", font=font)  # 在矩形下方绘制文本
                os.makedirs(f"./check_result/{pk_phhd}/{ocr_item.id}", exist_ok=True)
                image.save(f"./check_result/{pk_phhd}/{ocr_item.id}/{img_name}")
    session.close()
    # 自定义JSON处理器
    def default(obj):
        if isinstance(obj, Decimal):
            return float(obj)
        if isinstance(obj, datetime.date):
            return obj.strftime("%Y-%m-%d")
    with open(f"./check_result/{pk_phhd}/result.json", "w", encoding="utf-8") as json_file:
        json.dump(json_result, json_file, indent=4, ensure_ascii=False, default=default)
 if __name__ == '__main__':
    check_ie_result(0)
--- a/db/mysql.py
+++ b/db/mysql.py
@@ -63,6 +63,8 @@ class ZxIeCost(Base):
    discharge_date = Column(Date, comment='出院日期')
    medical_expenses_str = Column(String(255), comment='费用总额字符串')
    medical_expenses = Column(DECIMAL(18, 2), comment='费用总额')
    page_nums = Column(String(255), comment='页码')
    page_count = Column(TINYINT(4), comment='页数')
    create_time = Column(DateTime, server_default=text("CURRENT_TIMESTAMP"), comment='创建时间')
    creator = Column(String(255), comment='创建人')
    update_time = Column(DateTime, server_default=text("CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP"),
@@ -120,7 +122,7 @@ class ZxIeResult(Base):
    pk_ocr = Column(INTEGER(11), primary_key=True, comment='图片OCR识别主键')
    pk_phhd = Column(INTEGER(11), nullable=False, comment='报销单主键')
    pk_phrec = Column(INTEGER(11), nullable=False, comment='图片主键')
-    id = Column(INTEGER(11), nullable=False, comment='识别批次')
+    id = Column(CHAR(32), nullable=False, comment='识别批次')
    cfjaddress = Column(String(200), nullable=False, comment='云存储文件名')
    content = Column(String(5000), comment='OCR识别内容')
    rotation_angle = Column(INTEGER(11), comment='旋转角度')
@@ -412,17 +414,19 @@ class ZxIeReview(Base):
    pk_ie_review = Column(INTEGER(11), primary_key=True, comment='自动审核主键')
    pk_phhd = Column(INTEGER(11), nullable=False, comment='报销案子主键')
    success = Column(BIT(1))
    integrity = Column(BIT(1))
    has_settlement = Column(BIT(1))
    has_discharge = Column(BIT(1))
    has_cost = Column(BIT(1))
    full_page = Column(BIT(1))
    page_description = Column(String(255), comment='具体缺页描述')
    consistency = Column(BIT(1), comment='三项资料一致性。0：不一致；1：一致')
    name_match = Column(CHAR(1), server_default=text("'0'"),
-                        comment='三项资料姓名是否一致。0：不一致；1：一致；2：结算单不一致；3：出院记录不一致；4：费用清单不一致；5：与报销申请对象不一致')
+                        comment='三项资料姓名是否一致。0：都不一致；1：一致；2：结算单不一致；3：出院记录不一致；4：费用清单不一致；5：与报销申请对象不一致')
    admission_date_match = Column(CHAR(1), server_default=text("'0'"),
-                                  comment='三项资料入院日期是否一致。0：不一致；1：一致；2：结算单不一致；3：出院记录不一致；4：费用清单不一致')
+                                  comment='三项资料入院日期是否一致。0：都不一致；1：一致；2：结算单不一致；3：出院记录不一致；4：费用清单不一致')
    discharge_date_match = Column(CHAR(1), server_default=text("'0'"),
-                                  comment='三项资料出院日期是否一致。0：不一致；1：一致；2：结算单不一致；3：出院记录不一致；4：费用清单不一致')
+                                  comment='三项资料出院日期是否一致。0：都不一致；1：一致；2：结算单不一致；3：出院记录不一致；4：费用清单不一致')
    create_time = Column(DateTime, server_default=text("CURRENT_TIMESTAMP"), comment='创建时间')
    creator = Column(String(255), comment='创建人')
    update_time = Column(DateTime, server_default=text("CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP"),
--- a/det_api.py
+++ b/det_api.py
@@ -1,32 +0,0 @@
 import base64
 import cv2
 import numpy as np
 from flask import Flask, request, jsonify
 from paddle_detection import detector
 app = Flask(__name__)
@app.route("/det/detect_books", methods=['POST'])
 def detect_books():
    try:
        file = request.files['image']
        image_data = file.read()
        nparr = np.frombuffer(image_data, np.uint8)
        image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
        result = detector.get_book_areas(image)
        encoded_images = []
        for i in result:
            _, encoded_image = cv2.imencode('.jpg', i)
            byte_stream = encoded_image.tobytes()
            img_str = base64.b64encode(byte_stream).decode('utf-8')
            encoded_images.append(img_str)
        return jsonify(encoded_images), 200
    except Exception as e:
        return jsonify({'error': str(e)}), 500
 if __name__ == '__main__':
    app.run("0.0.0.0")
--- a/doc_dewarp/init.py
+++ b/doc_dewarp/init.py
@@ -1,4 +0,0 @@
 from onnxruntime import InferenceSession
 DOC_TR = InferenceSession("model/dewarp_model/doc_tr_pp.onnx",
                          providers=["CUDAExecutionProvider"], provider_options=[{"device_id": 0}])
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,46 +1,32 @@
-x-env:
+x-base:
-  &template
+  &base_template
  image: fcb_photo_review:1.14.6
  restart: always
-x-review:
+x-project:
-  &review_template
+  &project_template
-  <<: *template
+  <<: *base_template
  image: fcb_photo_review:2.0.0
  volumes:
    - ./log:/app/log
-    - ./model:/app/model
+    - ./tmp_img:/app/tmp_img
  deploy:
    resources:
      reservations:
        devices:
          - device_ids: [ '0', '1' ]
            capabilities: [ 'gpu' ]
            driver: 'nvidia'
-x-mask:
+x-paddle:
-  &mask_template
+  &paddle_template
-  <<: *template
+  <<: *base_template
  image: fcb_paddle:0.0.1
  volumes:
-    - ./log:/app/log
+    - ./services/paddle_services/log:/app/log
-  deploy:
+    - ./services/paddle_services/model:/app/model
-    resources:
+    - ./tmp_img:/app/tmp_img
      reservations:
        devices:
          - device_ids: [ '1' ]
            capabilities: [ 'gpu' ]
            driver: 'nvidia'
 services:
-  det_api:
+  ocr:
-    <<: *template
+    <<: *paddle_template
    build:
-      context: .
+      context: ./services/paddle_services
-    container_name: det_api
+    container_name: ocr
-    hostname: det_api
+    hostname: ocr
-    volumes:
+    command: [ '-w', '4', 'ocr:app', '--bind', '0.0.0.0:5001' ]
      - ./log:/app/log
      - ./model:/app/model
 #    command: [ 'det_api.py' ]
    deploy:
      resources:
        reservations:
@@ -49,16 +35,116 @@ services:
              capabilities: [ 'gpu' ]
              driver: 'nvidia'
  ie_settlement:
    <<: *paddle_template
    container_name: ie_settlement
    hostname: ie_settlement
    command: [ '-w', '5', 'ie_settlement:app', '--bind', '0.0.0.0:5002' ]
    deploy:
      resources:
        reservations:
          devices:
            - device_ids: [ '0' ]
              capabilities: [ 'gpu' ]
              driver: 'nvidia'
  ie_discharge:
    <<: *paddle_template
    container_name: ie_discharge
    hostname: ie_discharge
    command: [ '-w', '5', 'ie_discharge:app', '--bind', '0.0.0.0:5003' ]
    deploy:
      resources:
        reservations:
          devices:
            - device_ids: [ '1' ]
              capabilities: [ 'gpu' ]
              driver: 'nvidia'
  ie_cost:
    <<: *paddle_template
    container_name: ie_cost
    hostname: ie_cost
    command: [ '-w', '5', 'ie_cost:app', '--bind', '0.0.0.0:5004' ]
    deploy:
      resources:
        reservations:
          devices:
            - device_ids: [ '1' ]
              capabilities: [ 'gpu' ]
              driver: 'nvidia'
  clas_orientation:
    <<: *paddle_template
    container_name: clas_orientation
    hostname: clas_orientation
    command: [ '-w', '3', 'clas_orientation:app', '--bind', '0.0.0.0:5005' ]
    deploy:
      resources:
        reservations:
          devices:
            - device_ids: [ '0' ]
              capabilities: [ 'gpu' ]
              driver: 'nvidia'
  det_book:
    <<: *paddle_template
    container_name: det_book
    hostname: det_book
    command: [ '-w', '4', 'det_book:app', '--bind', '0.0.0.0:5006' ]
    deploy:
      resources:
        reservations:
          devices:
            - device_ids: [ '1' ]
              capabilities: [ 'gpu' ]
              driver: 'nvidia'
  dewarp:
    <<: *paddle_template
    container_name: dewarp
    hostname: dewarp
    command: [ '-w', '4', 'dewarp:app', '--bind', '0.0.0.0:5007' ]
    deploy:
      resources:
        reservations:
          devices:
            - device_ids: [ '0' ]
              capabilities: [ 'gpu' ]
              driver: 'nvidia'
  #  clas_text:
  #    <<: *paddle_template
  #    container_name: clas_text
  #    hostname: clas_text
  #    command: [ '-w', '3', 'clas_text:app', '--bind', '0.0.0.0:5008' ]
  #    deploy:
  #      resources:
  #        reservations:
  #          devices:
  #            - device_ids: [ '1' ]
  #              capabilities: [ 'gpu' ]
  #              driver: 'nvidia'
  photo_review_1:
-    <<: *review_template
+    <<: *project_template
    build:
      context: .
    container_name: photo_review_1
    hostname: photo_review_1
    depends_on:
-      - det_api
+      - ocr
      - ie_settlement
      - ie_discharge
      - ie_cost
      - clas_orientation
      - det_book
      - dewarp
    #      - clas_text
    command: [ 'photo_review.py', '--clean', 'True' ]
  photo_review_2:
-    <<: *review_template
+    <<: *project_template
    container_name: photo_review_2
    hostname: photo_review_2
    depends_on:
@@ -66,57 +152,41 @@ services:
    command: [ 'photo_review.py' ]
  photo_review_3:
-    <<: *review_template
+    <<: *project_template
    container_name: photo_review_3
    hostname: photo_review_3
    depends_on:
-      - photo_review_2
+      - photo_review_1
    command: [ 'photo_review.py' ]
  photo_review_4:
-    <<: *review_template
+    <<: *project_template
    container_name: photo_review_4
    hostname: photo_review_4
    depends_on:
-      - photo_review_3
+      - photo_review_1
    command: [ 'photo_review.py' ]
  photo_review_5:
-    <<: *review_template
+    <<: *project_template
    container_name: photo_review_5
    hostname: photo_review_5
    depends_on:
-      - photo_review_4
+      - photo_review_1
    command: [ 'photo_review.py' ]
  photo_mask_1:
-    <<: *mask_template
+    <<: *project_template
    container_name: photo_mask_1
    hostname: photo_mask_1
    depends_on:
-      - photo_review_5
+      - photo_review_1
    command: [ 'photo_mask.py', '--clean', 'True' ]
  photo_mask_2:
-    <<: *mask_template
+    <<: *project_template
    container_name: photo_mask_2
    hostname: photo_mask_2
    depends_on:
      - photo_mask_1
-    command: [ 'photo_mask.py' ]
+    command: [ 'photo_mask.py' ]
 #
 #  photo_review_6:
 #    <<: *review_template
 #    container_name: photo_review_6
 #    hostname: photo_review_6
 #    depends_on:
 #      - photo_mask_2
 #    command: [ 'photo_review.py' ]
 #
 #  photo_review_7:
 #    <<: *review_template
 #    container_name: photo_review_7
 #    hostname: photo_review_7
 #    depends_on:
 #      - photo_review_6
 #    command: [ 'photo_review.py' ]
--- a/log/init.py
+++ b/log/init.py
@@ -1,13 +1,15 @@
 import os
 import socket
 # 项目根目录
 PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 # 获取主机名，方便区分容器
 HOSTNAME = socket.gethostname()
 # 检测日志文件的路径是否存在，不存在则创建
 LOG_PATHS = [
-    f"log/{HOSTNAME}/ucloud",
+    os.path.join(PROJECT_ROOT, 'log', HOSTNAME, 'ucloud'),
-    f"log/{HOSTNAME}/error",
+    os.path.join(PROJECT_ROOT, 'log', HOSTNAME, 'error'),
-    f"log/{HOSTNAME}/qr",
+    os.path.join(PROJECT_ROOT, 'log', HOSTNAME, 'qr'),
 ]
 for path in LOG_PATHS:
    if not os.path.exists(path):
--- a/auto_email/init.py
+++ b/auto_email/init.py
@@ -8,15 +8,13 @@ MAX_WAIT_TIME = 3
 # 程序异常短信配置
 ERROR_EMAIL_CONFIG = {
    # SMTP服务器地址
-    "smtp_server": "smtp.163.com",
+    'smtp_server': 'smtp.163.com',
    # 连接SMTP的端口
-    "port": 994,
+    'port': 994,
    # 发件人邮箱地址，请确保开启了SMTP邮件服务！
-    "sender": "EchoLiu618@163.com",
+    'sender': 'EchoLiu618@163.com',
    # 授权码--用于登录第三方邮件客户端的专用密码，不是邮箱密码
-    "authorization_code": "OKPQLIIVLVGRZYVH",
+    'authorization_code': 'OKPQLIIVLVGRZYVH',
    # 收件人邮箱地址
-    "receivers": ["1515783401@qq.com"],
+    'receivers': ['1515783401@qq.com'],
    # 尝试次数
    "retry_times": 3,
 }
--- a/auto_email/error_email.py
+++ b/auto_email/error_email.py
@@ -5,18 +5,18 @@ from email.mime.text import MIMEText
 from tenacity import retry, stop_after_attempt, wait_random
 from auto_email import ERROR_EMAIL_CONFIG, TRY_TIMES, MIN_WAIT_TIME, MAX_WAIT_TIME
 from log import HOSTNAME
 from my_email import ERROR_EMAIL_CONFIG, TRY_TIMES, MIN_WAIT_TIME, MAX_WAIT_TIME
@retry(stop=stop_after_attempt(TRY_TIMES), wait=wait_random(MIN_WAIT_TIME, MAX_WAIT_TIME), reraise=True,
-       after=lambda x: logging.warning("发送邮件失败！"))
+       after=lambda x: logging.warning('发送邮件失败！'))
 def send_email(email_config, massage):
-    smtp_server = email_config["smtp_server"]
+    smtp_server = email_config['smtp_server']
-    port = email_config["port"]
+    port = email_config['port']
-    sender = email_config["sender"]
+    sender = email_config['sender']
-    authorization_code = email_config["authorization_code"]
+    authorization_code = email_config['authorization_code']
-    receivers = email_config["receivers"]
+    receivers = email_config['receivers']
    mail = smtplib.SMTP_SSL(smtp_server, port)  # 连接SMTP服务
    mail.login(sender, authorization_code)  # 登录到SMTP服务
    mail.sendmail(sender, receivers, massage.as_string())  # 发送邮件
@@ -34,13 +34,13 @@ def send_error_email(program_name, error_name, error_detail):
    """
    # SMTP 服务器配置
-    sender = ERROR_EMAIL_CONFIG["sender"]
+    sender = ERROR_EMAIL_CONFIG['sender']
-    receivers = ERROR_EMAIL_CONFIG["receivers"]
+    receivers = ERROR_EMAIL_CONFIG['receivers']
    # 获取程序出错的时间
-    error_time = datetime.datetime.strftime(datetime.datetime.today(), "%Y-%m-%d %H:%M:%S:%f")
+    error_time = datetime.datetime.strftime(datetime.datetime.today(), '%Y-%m-%d %H:%M:%S:%f')
    # 邮件内容
-    subject = f"【程序异常提醒】{program_name}({HOSTNAME}) {error_time}"  # 邮件的标题
+    subject = f'【程序异常提醒】{program_name}({HOSTNAME}) {error_time}'  # 邮件的标题
    content = f'''<div class="emailcontent" style="width:100%;max-width:720px;text-align:left;margin:0 auto;padding-top:80px;padding-bottom:20px">
        <div class="emailtitle">
            <h1 style="color:#fff;background:#51a0e3;line-height:70px;font-size:24px;font-weight:400;padding-left:40px;margin:0">程序运行异常通知</h1>
--- a/paddle_detection/README.md
+++ b/paddle_detection/README.md
@@ -1 +0,0 @@
 README_cn.md
--- a/paddle_detection/init.py
+++ b/paddle_detection/init.py
@@ -1,4 +0,0 @@
 from onnxruntime import InferenceSession
 PADDLE_DET = InferenceSession("model/object_det_model/ppyoloe_plus_crn_l_80e_coco_w_nms.onnx",
                              providers=["CPUExecutionProvider"], provider_options=[{"device_id": 0}])
--- a/paddle_detection/configs/mot/bytetrack/README.md
+++ b/paddle_detection/configs/mot/bytetrack/README.md
@@ -1 +0,0 @@
 README_cn.md
--- a/paddle_detection/configs/mot/bytetrack/detector/README.md
+++ b/paddle_detection/configs/mot/bytetrack/detector/README.md
@@ -1 +0,0 @@
 README_cn.md
--- a/paddle_detection/configs/mot/centertrack/README.md
+++ b/paddle_detection/configs/mot/centertrack/README.md
@@ -1 +0,0 @@
 README_cn.md
--- a/paddle_detection/configs/mot/deepsort/README.md
+++ b/paddle_detection/configs/mot/deepsort/README.md
@@ -1 +0,0 @@
 README_cn.md
--- a/paddle_detection/configs/mot/headtracking21/README.md
+++ b/paddle_detection/configs/mot/headtracking21/README.md
@@ -1 +0,0 @@
 README_cn.md
--- a/paddle_detection/configs/mot/mtmct/README.md
+++ b/paddle_detection/configs/mot/mtmct/README.md
@@ -1 +0,0 @@
 README_cn.md
--- a/paddle_detection/configs/mot/ocsort/README.md
+++ b/paddle_detection/configs/mot/ocsort/README.md
@@ -1 +0,0 @@
 README_cn.md
--- a/paddle_detection/configs/mot/pedestrian/README.md
+++ b/paddle_detection/configs/mot/pedestrian/README.md
@@ -1 +0,0 @@
 README_cn.md
--- a/paddle_detection/configs/mot/vehicle/README.md
+++ b/paddle_detection/configs/mot/vehicle/README.md
@@ -1 +0,0 @@
 README_cn.md
--- a/paddle_detection/deploy/pptracking/README.md
+++ b/paddle_detection/deploy/pptracking/README.md
@@ -1 +0,0 @@
 README_en.md
--- a/paddle_detection/detector.py
+++ b/paddle_detection/detector.py
@@ -1,76 +0,0 @@
 import base64
 import logging
 import tempfile
 from collections import defaultdict
 import cv2
 import numpy as np
 import requests
 from tenacity import retry, stop_after_attempt, wait_random
 from paddle_detection import PADDLE_DET
 from paddle_detection.deploy.third_engine.onnx.infer import PredictConfig
 from paddle_detection.deploy.third_engine.onnx.preprocess import Compose
 from util import image_util, util
 def predict_image(infer_config, predictor, img_path):
    # load preprocess transforms
    transforms = Compose(infer_config.preprocess_infos)
    # predict image
    inputs = transforms(img_path)
    inputs["image"] = np.array(inputs["image"]).astype('float32')
    inputs_name = [var.name for var in predictor.get_inputs()]
    inputs = {k: inputs[k][None,] for k in inputs_name}
    outputs = predictor.run(output_names=None, input_feed=inputs)
    bboxes = np.array(outputs[0])
    result = defaultdict(list)
    for bbox in bboxes:
        if bbox[0] > -1 and bbox[1] > infer_config.draw_threshold:
            result[bbox[0]].append({"score": bbox[1], "box": bbox[2:]})
    return result
 def detect_image(img_path):
    infer_cfg = "model/object_det_model/infer_cfg.yml"
    # load infer config
    infer_config = PredictConfig(infer_cfg)
    return predict_image(infer_config, PADDLE_DET, img_path)
 def get_book_areas(image):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
        cv2.imwrite(temp_file.name, image)
    detect_result = detect_image(temp_file.name)
    util.delete_temp_file(temp_file.name)
    book_areas = detect_result[73]
    result = []
    for book_area in book_areas:
        result.append(image_util.capture(image, book_area["box"]))
    return result
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
       after=lambda x: logging.warning("获取文档区域失败！"))
 def request_book_areas(image):
    url = "http://det_api:5000/det/detect_books"
    _, encoded_image = cv2.imencode('.jpg', image)
    byte_stream = encoded_image.tobytes()
    files = {"image": ("image.jpg", byte_stream)}
    response = requests.post(url, files=files)
    if response.status_code == 200:
        img_str_list = response.json()
        result = []
        for img_str in img_str_list:
            img_data = base64.b64decode(img_str)
            np_array = np.frombuffer(img_data, np.uint8)
            img = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
            height, width = img.shape[:2]
            if max(height, width) / min(height, width) <= 6.5:
                result.append(img)  # 过滤异常结果
        return result
    else:
        return []
--- a/photo_mask.py
+++ b/photo_mask.py
@@ -5,35 +5,36 @@ from time import sleep
 from sqlalchemy import update
 from auto_email.error_email import send_error_email
 from db import MysqlSession
 from db.mysql import ZxPhhd
 from log import LOGGING_CONFIG
 from my_email.error_email import send_error_email
 from photo_mask import auto_photo_mask, SEND_ERROR_EMAIL
 if __name__ == '__main__':
-    program_name = "照片审核自动涂抹脚本"
+    program_name = '照片审核自动涂抹脚本'
    logging.config.dictConfig(LOGGING_CONFIG)
    logging.info('等待接口服务启动...')
    sleep(60)
    parser = argparse.ArgumentParser()
-    parser.add_argument("--clean", default=False, type=bool, help="是否将涂抹中的案子改为待涂抹状态")
+    parser.add_argument('--clean', default=False, type=bool, help='是否将涂抹中的案子改为待涂抹状态')
    args = parser.parse_args()
    if args.clean:
        # 主要用于启动时，清除仍在涂抹中的案子
        session = MysqlSession()
-        update_flag = (update(ZxPhhd).where(ZxPhhd.paint_flag == "2").values(paint_flag="1"))
+        update_flag = (update(ZxPhhd).where(ZxPhhd.paint_flag == '2').values(paint_flag='1'))
        session.execute(update_flag)
        session.commit()
        session.close()
-        logging.info("已释放残余的涂抹案子！")
+        logging.info('已释放残余的涂抹案子！')
    else:
        sleep(5)
    try:
-        logging.info(f"【{program_name}】开始运行")
+        logging.info(f'【{program_name}】开始运行')
        auto_photo_mask.main()
    except Exception as e:
-        error_logger = logging.getLogger("error")
+        error_logger = logging.getLogger('error')
        error_logger.error(traceback.format_exc())
        if SEND_ERROR_EMAIL:
            send_error_email(program_name, repr(e), traceback.format_exc())
--- a/photo_mask/init.py
+++ b/photo_mask/init.py
@@ -1,5 +1,3 @@
 from paddleocr import PaddleOCR
 """
 项目配置
 """
@@ -40,13 +38,3 @@ SIMILAR_CHAR = {
    "侯": ["候"],
    "宇": ["字"],
 }
 # 如果不希望识别出空格，可以设置use_space_char=False。做此项设置一定要测试，2.7.3版本此项设置有bug，会导致识别失败
 OCR = PaddleOCR(
    gpu_id=0,
    show_log=False,
    det_db_thresh=0.1,
    det_db_box_thresh=0.3,
    det_limit_side_len=1248,
    drop_score=0.3
 )
--- a/photo_mask/auto_photo_mask.py
+++ b/photo_mask/auto_photo_mask.py
@@ -1,7 +1,9 @@
 import logging.config
 import os
 import re
-import tempfile
+import shutil
 import time
 import uuid
 from time import sleep
 import cv2
@@ -10,9 +12,10 @@ from sqlalchemy import update, and_
 from db import MysqlSession
 from db.mysql import ZxPhrec, ZxPhhd
 from log import HOSTNAME
-from photo_mask import OCR, PHHD_BATCH_SIZE, SLEEP_MINUTES, NAME_KEYS, ID_CARD_NUM_KEYS, SIMILAR_CHAR
+from photo_mask import PHHD_BATCH_SIZE, SLEEP_MINUTES, NAME_KEYS, ID_CARD_NUM_KEYS, SIMILAR_CHAR
 from photo_review import set_batch_id
 from ucloud import BUCKET, ufile
-from util import image_util, util
+from util import image_util, common_util, model_util
 def find_boxes(content, layout, offset=0, length=None, improve=False, image_path=None, extra_content=None):
@@ -55,14 +58,15 @@ def find_boxes(content, layout, offset=0, length=None, improve=False, image_path
    if improve:
        # 再次识别，提高精度
        image = cv2.imread(image_path)
        img_name, img_ext = common_util.parse_save_path(image_path)
        # 截图时偏大一点
-        capture_box = util.zoom_rectangle(box, 0.2)
+        capture_box = common_util.zoom_rectangle(box, 0.2)
        captured_image = image_util.capture(image, capture_box)
-        with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_file:
+        captured_image_path = common_util.get_processed_img_path(f'{img_name}.capture.{img_ext}')
-            captured_image, offset_x, offset_y = image_util.expand_to_a4_size(captured_image)
+        cv2.imwrite(captured_image_path, captured_image)
-            cv2.imwrite(temp_file.name, captured_image)
+        captured_a4_img_path, offset_x, offset_y = image_util.expand_to_a4_size(captured_image_path)
        try:
-            layouts = util.get_ocr_layout(OCR, temp_file.name)
+            layouts = common_util.ocr_result_to_layout(model_util.ocr(captured_a4_img_path))
        except TypeError:
            # 如果是类型错误，大概率是没识别到文字
            layouts = []
@@ -86,22 +90,17 @@ def find_boxes(content, layout, offset=0, length=None, improve=False, image_path
                            temp_box[3] + capture_box[1] - offset_y,
                        ])
                        break
        util.delete_temp_file(temp_file.name)
    if not boxes:
        boxes.append(box)
    return boxes
-def get_mask_layout(image, name, id_card_num):
+def get_mask_layout(img_path, name, id_card_num):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
        cv2.imwrite(temp_file.name, image)
    result = []
    try:
        try:
-            layouts = util.get_ocr_layout(OCR, temp_file.name)
+            layouts = common_util.ocr_result_to_layout(model_util.ocr(img_path))
            # layouts = OCR.parse({"doc": temp_file.name})["layout"]
        except TypeError:
            # 如果是类型错误，大概率是没识别到文字
            layouts = []
@@ -135,12 +134,12 @@ def get_mask_layout(image, name, id_card_num):
                find_id_card_num_by_key = True
                matches = re.findall(r, layout[1])
                for match in matches:
-                    result += find_boxes(match, layout, improve=True, image_path=temp_file.name, extra_content=r)
+                    result += find_boxes(match, layout, improve=True, image_path=img_path, extra_content=r)
                    find_name_by_key = False
                    break
                if id_card_num in layout[1]:
-                    result += find_boxes(id_card_num, layout, improve=True, image_path=temp_file.name)
+                    result += find_boxes(id_card_num, layout, improve=True, image_path=img_path)
                    find_id_card_num_by_key = False
                def _find_boxes_by_keys(keys):
@@ -163,8 +162,6 @@ def get_mask_layout(image, name, id_card_num):
    except Exception as e:
        logging.error("涂抹时出错！", exc_info=e)
        return result
    finally:
        util.delete_temp_file(temp_file.name)
 def handle_image_for_mask(split_result):
@@ -174,11 +171,15 @@ def handle_image_for_mask(split_result):
    return expand_img, split_result["x_offset"], split_result["y_offset"]
-def mask_photo(img_url, name, id_card_num, color=(255, 255, 255)):
+def mask_photo(img_path, name, id_card_num, color=(255, 255, 255)):
-    def _mask(i, n, icn, c):
+    def _mask(ip, n, icn, c):
        i = cv2.imread(ip)
        img_name, img_ext = common_util.parse_save_path(ip)
        do_mask = False
-        split_results = image_util.split(i)
+        split_results = image_util.split(ip)
        for split_result in split_results:
            if not split_result['img']:
                continue
            to_mask_img, x_offset, y_offset = handle_image_for_mask(split_result)
            results = get_mask_layout(to_mask_img, n, icn)
@@ -193,27 +194,27 @@ def mask_photo(img_url, name, id_card_num, color=(255, 255, 255)):
                    result[3] + y_offset,
                )
                cv2.rectangle(i, (int(result[0]), int(result[1])), (int(result[2]), int(result[3])), c, -1, 0)
-        return do_mask, i
+        masked_path = common_util.get_processed_img_path(f'{img_name}.mask.{img_ext}')
        cv2.imwrite(masked_path, i)
        return do_mask, masked_path
-    # 打开图片
+    original_image = img_path
-    image = image_util.read(img_url)
+    is_masked, img_path = _mask(img_path, name, id_card_num, color)
    original_image = image
    is_masked, image = _mask(image, name, id_card_num, color)
    if not is_masked:
        # 如果没有涂抹，可能是图片方向不对
-        angles = image_util.parse_rotation_angles(image)
+        angles = model_util.clas_orientation(img_path)
        angle = angles[0]
        if angle != "0":
-            image = image_util.rotate(image, int(angle))
+            img_path = image_util.rotate(img_path, int(angle))
-            is_masked, image = _mask(image, name, id_card_num, color)
+            is_masked, img_path = _mask(img_path, name, id_card_num, color)
        if not is_masked:
            # 如果旋转后也没有涂抹，恢复原来的方向
-            image = original_image
+            img_path = original_image
        else:
            # 如果旋转有效果，打一个日志
            logging.info(f"图片旋转了{angle}°")
-    return is_masked, image
+    return is_masked, img_path
 def photo_mask(pk_phhd, name, id_card_num):
@@ -223,32 +224,37 @@ def photo_mask(pk_phhd, name, id_card_num):
        ZxPhrec.cRectype.in_(["3", "4"])
    )).all()
    session.close()
    # 同一批图的标识
    set_batch_id(uuid.uuid4().hex)
    processed_img_dir = common_util.get_processed_img_path('')
    os.makedirs(processed_img_dir, exist_ok=True)
    for phrec in phrecs:
        img_url = ufile.get_private_url(phrec.cfjaddress)
        if not img_url:
            continue
-
+        original_img_path = common_util.save_to_local(img_url)
-        is_masked, image = mask_photo(img_url, name, id_card_num)
+        img_path = common_util.get_processed_img_path(phrec.cfjaddress)
        shutil.copy2(original_img_path, img_path)
        is_masked, image = mask_photo(img_path, name, id_card_num)
        # 如果涂抹了要备份以及更新
        if is_masked:
            ufile.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress)
            with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
                cv2.imwrite(temp_file.name, image)
            try:
-                ufile.upload_file(phrec.cfjaddress, temp_file.name)
+                ufile.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress)
                ufile.upload_file(phrec.cfjaddress, image)
                session = MysqlSession()
                update_flag = (update(ZxPhrec).where(ZxPhrec.pk_phrec == phrec.pk_phrec).values(
                    paint_user=HOSTNAME,
-                    paint_date=util.get_default_datetime()))
+                    paint_date=common_util.get_default_datetime()))
                session.execute(update_flag)
                session.commit()
                session.close()
            except Exception as e:
                logging.error("上传图片出错", exc_info=e)
-            finally:
+
-                util.delete_temp_file(temp_file.name)
+    # 删除多余图片
    if os.path.exists(processed_img_dir) and os.path.isdir(processed_img_dir):
        shutil.rmtree(processed_img_dir)
 def main():
@@ -278,7 +284,7 @@ def main():
                update_flag = (update(ZxPhhd).where(ZxPhhd.pk_phhd == pk_phhd).values(
                    paint_flag="8",
                    paint_user=HOSTNAME,
-                    paint_date=util.get_default_datetime(),
+                    paint_date=common_util.get_default_datetime(),
                    fZcfwfy=time.time() - start_time))
                session.execute(update_flag)
                session.commit()
--- a/photo_mask/photo_mask_error_check.py
+++ b/photo_mask/photo_mask_error_check.py
@@ -8,7 +8,7 @@ from db import MysqlSession
 from db.mysql import ZxIeOcrerror, ZxPhrec
 from photo_mask.auto_photo_mask import mask_photo
 from ucloud import ufile
-from util import image_util, util
+from util import image_util, common_util
 def check_error(error_ocr):
@@ -91,7 +91,7 @@ if __name__ == '__main__':
    session = MysqlSession()
    update_error = (update(ZxIeOcrerror).where(ZxIeOcrerror.pk_phrec == ocr_error.pk_phrec).values(
-        checktime=util.get_default_datetime(), cfjaddress2=error_descript))
+        checktime=common_util.get_default_datetime(), cfjaddress2=error_descript))
    session.execute(update_error)
    session.commit()
    session.close()
--- a/photo_mask/photo_mask_error_report.py
+++ b/photo_mask/photo_mask_error_report.py
@@ -7,7 +7,7 @@ from sqlalchemy import update, and_
 from db import MysqlSession
 from db.mysql import ZxIeOcrerror
 from photo_mask.photo_mask_error_check import auto_check_error
-from util import util
+from util import common_util
 if __name__ == '__main__':
    today = date.today()
@@ -29,7 +29,7 @@ if __name__ == '__main__':
        if error_descript == "未知错误":
            check_time = None
        else:
-            check_time = util.get_default_datetime()
+            check_time = common_util.get_default_datetime()
        session = MysqlSession()
        update_error = (update(ZxIeOcrerror).where(ZxIeOcrerror.pk_phrec == ocr_error.pk_phrec).values(
@@ -41,5 +41,5 @@ if __name__ == '__main__':
    print(result)
    with open("photo_mask_error_report.txt", 'w', encoding='utf-8') as file:
        file.write(json.dumps(result, indent=4, ensure_ascii=False))
-        file.write(util.get_default_datetime())
+        file.write(common_util.get_default_datetime())
    print("结果已保存。")
--- a/photo_review.py
+++ b/photo_review.py
@@ -5,36 +5,36 @@ from time import sleep
 from sqlalchemy import update
 from auto_email.error_email import send_error_email
 from db import MysqlSession
 from db.mysql import ZxPhhd
 from log import LOGGING_CONFIG
 from my_email.error_email import send_error_email
 from photo_review import auto_photo_review, SEND_ERROR_EMAIL
-# 项目必须从此处启动，否则代码中的相对路径可能导致错误的发生
+# 照片审核自动识别脚本入口
 if __name__ == '__main__':
    program_name = '照片审核自动识别脚本'
    logging.config.dictConfig(LOGGING_CONFIG)
    logging.info('等待接口服务启动...')
    sleep(60)
    parser = argparse.ArgumentParser()
-    parser.add_argument("--clean", default=False, type=bool, help="是否将识别中的案子改为待识别状态")
+    parser.add_argument('--clean', default=False, type=bool, help='是否将识别中的案子改为待识别状态')
    args = parser.parse_args()
    if args.clean:
-        # 主要用于启动时，清除仍在涂抹中的案子
+        # 启动时清除仍在识别中的案子
        session = MysqlSession()
-        update_flag = (update(ZxPhhd).where(ZxPhhd.exsuccess_flag == "2").values(exsuccess_flag="1"))
+        update_flag = (update(ZxPhhd).where(ZxPhhd.exsuccess_flag == '2').values(exsuccess_flag='1'))
        session.execute(update_flag)
        session.commit()
        session.close()
-        logging.info("已释放残余的识别案子！")
+        logging.info('已释放残余的识别案子！')
    else:
        sleep(5)
    try:
-        logging.info(f"【{program_name}】开始运行")
+        logging.info(f'【{program_name}】开始运行')
        auto_photo_review.main()
    except Exception as e:
-        error_logger = logging.getLogger('error')
+        logging.getLogger('error').error(traceback.format_exc())
        error_logger.error(traceback.format_exc())
        if SEND_ERROR_EMAIL:
            send_error_email(program_name, repr(e), traceback.format_exc())
--- a/photo_review/init.py
+++ b/photo_review/init.py
@@ -1,6 +1,4 @@
 import jieba
 from paddlenlp import Taskflow
 from paddleocr import PaddleOCR
 '''
 项目配置
@@ -11,52 +9,8 @@ PHHD_BATCH_SIZE = 10
 SLEEP_MINUTES = 5
 # 是否发送报错邮件
 SEND_ERROR_EMAIL = True
-# 是否开启布局分析
+# 处理批号（这里仅起声明作用）
-LAYOUT_ANALYSIS = False
+BATCH_ID = ''
 """
 信息抽取关键词配置
 """
 # 患者姓名
 PATIENT_NAME = ['患者姓名']
 # 入院日期
 ADMISSION_DATE = ['入院日期']
 # 出院日期
 DISCHARGE_DATE = ['出院日期']
 # 发生医疗费
 MEDICAL_EXPENSES = ['费用总额']
 # 个人现金支付
 PERSONAL_CASH_PAYMENT = ['个人现金支付']
 # 个人账户支付
 PERSONAL_ACCOUNT_PAYMENT = ['个人账户支付']
 # 个人自费金额
 PERSONAL_FUNDED_AMOUNT = ['自费金额', '个人自费']
 # 医保类别
 MEDICAL_INSURANCE_TYPE = ['医保类型']
 # 就诊医院
 HOSPITAL = ['医院']
 # 就诊科室
 DEPARTMENT = ['科室']
 # 主治医生
 DOCTOR = ['主治医生']
 # 住院号
 ADMISSION_ID = ['住院号']
 # 医保结算单号码
 SETTLEMENT_ID = ['医保结算单号码']
 # 年龄
 AGE = ['年龄']
 # 大写总额
 UPPERCASE_MEDICAL_EXPENSES = ['大写总额']
 SETTLEMENT_LIST_SCHEMA = \
    (PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES + PERSONAL_CASH_PAYMENT
     + PERSONAL_ACCOUNT_PAYMENT + PERSONAL_FUNDED_AMOUNT + MEDICAL_INSURANCE_TYPE + ADMISSION_ID + SETTLEMENT_ID
     + UPPERCASE_MEDICAL_EXPENSES)
 DISCHARGE_RECORD_SCHEMA = \
    HOSPITAL + DEPARTMENT + PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + DOCTOR + ADMISSION_ID + AGE
 COST_LIST_SCHEMA = PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES
 '''
 别名配置
@@ -92,13 +46,32 @@ jieba.suggest_freq(('胆', '道'), True)
 jieba.suggest_freq(('脾', '胃'), True)
 '''
-模型配置
+出院记录缺页判断关键词配置
 '''
-SETTLEMENT_IE = Taskflow('information_extraction', schema=SETTLEMENT_LIST_SCHEMA, model='uie-x-base',
+DISCHARGE_KEY = {
-                         task_path='model/settlement_list_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
+    '入院诊断': ['入院诊断'],
-DISCHARGE_IE = Taskflow('information_extraction', schema=DISCHARGE_RECORD_SCHEMA, model='uie-x-base',
+    '入院情况': ['入院情况', '入院时情况', '入院时主要症状'],
-                        task_path='model/discharge_record_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
+    '入院日期': ['入院日期', '入院时间'],
-COST_IE = Taskflow('information_extraction', schema=COST_LIST_SCHEMA, model='uie-x-base', device_id=1,
+    '诊疗经过': ['诊疗经过', '住院经过', '治疗经过'],
-                   task_path='model/cost_list_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
+    '出院诊断': ['出院诊断'],
    '出院情况': ['出院情况', '出院时情况'],
    '出院日期': ['出院日期', '出院时间'],
    '出院医嘱': ['出院医嘱', '出院医瞩']
 }
-OCR = PaddleOCR(use_angle_cls=False, show_log=False, gpu_id=1, det_db_box_thresh=0.3)
+
 def get_batch_id():
    """
    获取处理批号
    :return: 处理批号
    """
    return BATCH_ID
 def set_batch_id(batch_id):
    """
    修改处理批号哦
    :param batch_id: 新批号
    """
    global BATCH_ID
    BATCH_ID = batch_id
--- a/photo_review/auto_photo_review.py
+++ b/photo_review/auto_photo_review.py
@@ -1,113 +1,76 @@
 import copy
 import json
 import logging
 import os
-import tempfile
+import re
 import shutil
 import time
 import uuid
 from collections import defaultdict
 from time import sleep
 import cv2
 import fitz
 import jieba
 import numpy as np
 import requests
 import zxingcpp
 from rapidfuzz import process, fuzz
 from sqlalchemy import update
 from db import MysqlSession
-from db.mysql import BdYljg, BdYlks, ZxIeResult, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec
+from db.mysql import BdYljg, BdYlks, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec, ZxIeReview, ZxIeResult
 from log import HOSTNAME
-from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
+from photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, \
-    PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \
+    DEPARTMENT_FILTER, DISCHARGE_KEY, set_batch_id, get_batch_id
-    ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \
+from services.paddle_services import IE_KEY
-    UPPERCASE_MEDICAL_EXPENSES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, DEPARTMENT_FILTER
+from ucloud import ufile, BUCKET
-from ucloud import ufile
+from util import image_util, common_util, html_util, model_util
-from util import image_util, util, html_util
+from util.data_util import handle_date, handle_decimal, parse_department, handle_name, handle_insurance_type, \
-from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \
+    handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, parse_hospital, \
-    handle_insurance_type, handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, \
+    parse_page_num, handle_tiny_int
    parse_hospital
-# 合并信息抽取结果
+def parse_qrcode(img_path, image_id):
-def merge_result(result1, result2):
+    """
-    for key in result2:
+    解析二维码，尝试从中获取高清图片
-        result1[key] = result1.get(key, []) + result2[key]
+    :param img_path: 待解析图片
-    return result1
+    :param image_id: 图片id
    :return: 解析结果
    """
 def ie_temp_image(ie, ocr, image):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
        cv2.imwrite(temp_file.name, image)
    ie_result = []
    try:
        layout = util.get_ocr_layout(ocr, temp_file.name)
        if not layout:
            # 无识别结果
            ie_result = []
        else:
            ie_result = ie({"doc": temp_file.name, "layout": layout})[0]
    except Exception as e:
        logging.error("信息抽取时出错", exc_info=e)
    finally:
        try:
            os.remove(temp_file.name)
        except Exception as e:
            logging.info(f"删除临时文件 {temp_file.name} 时出错", exc_info=e)
    return ie_result
 # 关键信息提取
 def request_ie_result(task_enum, phrecs):
    url = task_enum.request_url()
    identity = int(time.time())
    images = []
    for phrec in phrecs:
        images.append({"name": phrec.cfjaddress, "pk": phrec.pk_phrec})
    payload = {"images": images, "schema": task_enum.schema(), "pk_phhd": phrecs[0].pk_phhd, "identity": identity}
    response = requests.post(url, json=payload)
    if response.status_code == 200:
        return response.json()["data"]
    else:
        raise Exception(f"请求信息抽取结果失败，状态码：{response.status_code}")
 # 尝试从二维码中获取高清图片
 def get_better_image_from_qrcode(image, image_id, dpi=150):
    def _parse_pdf_url(pdf_url_to_parse):
        pdf_file = None
        local_pdf_path = None
        img_name, img_ext = common_util.parse_save_path(img_path)
        try:
            local_pdf_path = html_util.download_pdf(pdf_url_to_parse)
-            # 打开PDF文件
+            pdf_imgs = image_util.pdf_to_imgs(local_pdf_path)
-            pdf_file = fitz.open(local_pdf_path)
+            # 结算单部分
-            # 选择第一页
+            better_settlement_path = common_util.get_processed_img_path(f'{img_name}.better_settlement.jpg')
-            page = pdf_file[0]
+            cv2.imwrite(better_settlement_path, pdf_imgs[0][0])
-            # 定义缩放系数（DPI）
+            # 费用清单部分
-            default_dpi = 72
+            better_cost_path = common_util.get_processed_img_path(f'{img_name}.better_cost.jpg')
-            zoom = dpi / default_dpi
+            total_height = sum([p[0].shape[0] for p in pdf_imgs[1:]])
-            # 设置矩阵变换参数
+            common_width = pdf_imgs[1][0].shape[1]
-            mat = fitz.Matrix(zoom, zoom)
+            better_cost_img = np.zeros((total_height, common_width, 3), dtype=np.uint8)
-            # 渲染页面
+            current_y = 0
-            pix = page.get_pixmap(matrix=mat)
+            for pdf in pdf_imgs[1:]:
-            # 将渲染结果转换为OpenCV兼容的格式
+                height = pdf[0].shape[0]
-            img = np.frombuffer(pix.samples, dtype=np.uint8).reshape((pix.height, pix.width, -1))
+                better_cost_img[current_y:current_y + height, :, :] = pdf[0]
-            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+                current_y += height
-            return img, page.get_text()
+                # cost_text += pdf[1]  # 费用清单文本暂时没用到
            cv2.imwrite(better_cost_path, better_cost_img)
            return better_settlement_path, pdf_imgs[0][1], better_cost_path
        except Exception as ex:
            logging.getLogger('error').error('解析pdf失败！', exc_info=ex)
-            return None, None
+            return None, None, None
        finally:
            if pdf_file:
                pdf_file.close()
            if local_pdf_path:
-                util.delete_temp_file(local_pdf_path)
+                common_util.delete_temp_file(local_pdf_path)
    jsczt_base_url = 'http://einvoice.jsczt.cn'
    try:
-        results = zxingcpp.read_barcodes(image)
+        img = cv2.imread(img_path)
        results = zxingcpp.read_barcodes(img, text_mode=zxingcpp.TextMode.HRI)
    except Exception as e:
        logging.getLogger('error').info('二维码识别失败', exc_info=e)
        results = []
@@ -132,138 +95,122 @@ def get_better_image_from_qrcode(image, image_id, dpi=150):
                if not pdf_url:
                    continue
                return _parse_pdf_url(pdf_url)
-            elif url.startswith('http://weixin.qq.com'):
+            elif (url.startswith('http://weixin.qq.com')
                  or url == 'https://ybj.jszwfw.gov.cn/hsa-app-panel/index.html'):
                # 无效地址
                continue
            elif url.startswith('http://dzpj.ntzyy.com'):
                # 南通市中医院
                return _parse_pdf_url(url)
            # elif url.startswith('https://apph5.ztejsapp.cn/nj/view/elecInvoiceForOther/QRCode2Invoice'):
            #     pdf_url = html_util.get_dtsrmyy_pdf_url(url)
            #     if not pdf_url:
            #         continue
            #     return _parse_pdf_url(pdf_url)
            else:
                logging.getLogger('qr').info(f'[{image_id}]中有未知二维码内容：{url}')
        except Exception as e:
            logging.getLogger('error').error('从二维码中获取高清图片时出错', exc_info=e)
            continue
-    return None, None
+    return None, None, None
 # 关键信息提取
-def information_extraction(ie, phrecs, identity):
+def information_extraction(phrec, pk_phhd):
-    result = {}
+    """
-    for phrec in phrecs:
+    处理单张图片
-        img_path = ufile.get_private_url(phrec.cfjaddress)
+    :param phrec:图片信息
-        if not img_path:
+    :param pk_phhd:案子主键
-            continue
+    :return:记录类型，信息抽取结果
-
+    """
-        image = image_util.read(img_path)
+    img_path = common_util.get_processed_img_path(phrec.cfjaddress)
-
+    if not os.path.exists(img_path):
-        # 尝试从二维码中获取高清图片
+        original_img_path = common_util.get_img_path(phrec.cfjaddress)
-        better_image, text = get_better_image_from_qrcode(image, phrec.cfjaddress)
+        if not original_img_path:
-        if phrec.cRectype != '1':
+            img_url = ufile.get_private_url(phrec.cfjaddress)
-            better_image = None  # 非结算单暂时不进行替换
+            if not img_url:
-        zx_ie_results = []
+                return None, None, None
-        if better_image is not None:
+            original_img_path = common_util.save_to_local(img_url)
-            img_angle = '0'
+        shutil.copy2(original_img_path, img_path)
-            image = better_image
+    if image_util.is_photo(img_path):
-            if text:
+        book_img_path = model_util.det_book(img_path)  # 识别文档区域并裁剪
-                info_extract = ie(text)[0]
+        dewarped_img_path = model_util.dewarp(book_img_path)  # 去扭曲
-            else:
+    else:  # todo:也可能是图片，后续添加细分逻辑
-                info_extract = ie_temp_image(ie, OCR, image)
+        dewarped_img_path = img_path
-            ie_result = {'result': info_extract, 'angle': '0'}
+    angles = model_util.clas_orientation(dewarped_img_path)
-
+    ocr_text = ''
-            now = util.get_default_datetime()
+    info_extract = []
-            if not ie_result['result']:
+    rec_type = None
    for angle in angles:
        ocr_result = []
        rotated_img = image_util.rotate(dewarped_img_path, int(angle))
        split_results = image_util.split(rotated_img)
        for split_result in split_results:
            if split_result['img'] is None:
                continue
            a4_img = image_util.expand_to_a4_size(split_result['img'])
            tmp_ocr_result = model_util.ocr(a4_img)
            if tmp_ocr_result:
                ocr_result += tmp_ocr_result
        tmp_ocr_text = common_util.ocr_result_to_text(ocr_result)
-            result_json = json.dumps(ie_result['result'], ensure_ascii=False)
+        # if any(key in tmp_ocr_text for key in ['出院记录', '出院小结', '死亡记录']):
-            if len(result_json) > 5000:
+        #     tmp_rec_type = '出院记录'
-                result_json = result_json[:5000]
+        # elif any(key in tmp_ocr_text for key in ['费用汇总清单', '费用清单', '费用明细', '结账清单', '费用小项统计']):
-            zx_ie_results.append(ZxIeResult(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, id=identity,
+        #     tmp_rec_type = '费用清单'
-                                            cfjaddress=phrec.cfjaddress, content=result_json,
+        # elif any(key in tmp_ocr_text for key in ['住院收费票据', '结算单', '财政部监制', '结算凭证']):
-                                            rotation_angle=int(ie_result['angle']),
+        #     tmp_rec_type = '基本医保结算单'
-                                            x_offset=0, y_offset=0, create_time=now,
+        # else:
-                                            creator=HOSTNAME, update_time=now, updater=HOSTNAME))
+        #     tmp_rec_type = model_util.clas_text(tmp_ocr_text) if tmp_ocr_text else None
-
+        # if not tmp_rec_type:
-            result = merge_result(result, ie_result['result'])
+        rec_dict = {
            '1': '基本医保结算单',
            '3': '出院记录',
            '4': '费用清单',
        }
        tmp_rec_type = rec_dict.get(phrec.cRectype)
        if tmp_rec_type == '基本医保结算单':
            tmp_info_extract = model_util.ie_settlement(rotated_img, common_util.ocr_result_to_layout(ocr_result))
        elif tmp_rec_type == '出院记录':
            tmp_info_extract = model_util.ie_discharge(rotated_img, common_util.ocr_result_to_layout(ocr_result))
        elif tmp_rec_type == '费用清单':
            tmp_info_extract = model_util.ie_cost(rotated_img, common_util.ocr_result_to_layout(ocr_result))
        else:
-            target_images = []
+            tmp_info_extract = []
            # target_images += detector.request_book_areas(image)  # 识别文档区域并裁剪
            if not target_images:
                target_images.append(image)  # 识别失败
            angle_count = defaultdict(int, {'0': 0})  # 分割后图片的最优角度统计
            for target_image in target_images:
                # dewarped_image = dewarp.dewarp_image(target_image)  # 去扭曲
                dewarped_image = target_image
                angles = image_util.parse_rotation_angles(dewarped_image)
-                split_results = image_util.split(dewarped_image)
+        if tmp_info_extract and len(tmp_info_extract) > len(info_extract):
-                for split_result in split_results:
+            info_extract = tmp_info_extract
-                    if split_result['img'] is None or split_result['img'].size == 0:
+            ocr_text = tmp_ocr_text
-                        continue
+            rec_type = tmp_rec_type
                    rotated_img = image_util.rotate(split_result['img'], int(angles[0]))
                    ie_results = [{'result': ie_temp_image(ie, OCR, rotated_img), 'angle': angles[0]}]
                    if not ie_results[0]['result'] or len(ie_results[0]['result']) < len(ie.kwargs.get('schema')):
                        rotated_img = image_util.rotate(split_result['img'], int(angles[1]))
                        ie_results.append({'result': ie_temp_image(ie, OCR, rotated_img), 'angle': angles[1]})
                    now = util.get_default_datetime()
                    best_angle = ['0', 0]
                    for ie_result in ie_results:
                        if not ie_result['result']:
                            continue
-                        result_json = json.dumps(ie_result['result'], ensure_ascii=False)
+    if info_extract:
-                        if len(result_json) > 5000:
+        result_json = json.dumps(info_extract, ensure_ascii=False)
-                            result_json = result_json[:5000]
+        if len(result_json) > 5000:
-                        zx_ie_results.append(ZxIeResult(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, id=identity,
+            result_json = result_json[:5000]
                                                        cfjaddress=phrec.cfjaddress, content=result_json,
                                                        rotation_angle=int(ie_result['angle']),
                                                        x_offset=split_result['x_offset'],
                                                        y_offset=split_result['y_offset'], create_time=now,
                                                        creator=HOSTNAME, update_time=now, updater=HOSTNAME))
                        result = merge_result(result, ie_result['result'])
                        if len(ie_result['result']) > best_angle[1]:
                            best_angle = [ie_result['angle'], len(ie_result['result'])]
                    angle_count[best_angle[0]] += 1
            img_angle = max(angle_count, key=angle_count.get)
        if img_angle != '0' or better_image is not None:
            image = image_util.rotate(image, int(img_angle))
            with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as temp_file:
                cv2.imwrite(temp_file.name, image)
            try:
                ufile.upload_file(phrec.cfjaddress, temp_file.name)
                if img_angle != '0':
                    logging.info(f'旋转图片[{phrec.cfjaddress}]替换成功，已旋转{img_angle}度。')
                    # 修正旋转角度
                    for zx_ie_result in zx_ie_results:
                        zx_ie_result.rotation_angle -= int(img_angle)
                else:
                    logging.info(f'高清图片[{phrec.cfjaddress}]替换成功！')
            except Exception as e:
                logging.error(f'上传图片({phrec.cfjaddress})失败', exc_info=e)
            finally:
                util.delete_temp_file(temp_file.name)
        now = common_util.get_default_datetime()
        session = MysqlSession()
-        session.add_all(zx_ie_results)
+        session.add(ZxIeResult(pk_phhd=pk_phhd, pk_phrec=phrec.pk_phrec, id=get_batch_id(),
                               cfjaddress=phrec.cfjaddress, content=result_json, create_time=now,
                               creator=HOSTNAME, update_time=now, updater=HOSTNAME))
        session.commit()
        session.close()
-
+    return rec_type, info_extract, ocr_text
    return result
 # 从keys中获取准确率最高的value
-def get_best_value_in_keys(source, keys):
+def get_best_value_of_key(source, key):
    # 最终结果
    result = None
    # 最大可能性
    best_probability = 0
-    for key in keys:
+    values = source.get(key)
-        values = source.get(key)
+    if values:
-        if values:
+        for value in values:
-            for value in values:
+            for v in value:
-                text = value.get("text")
+                text = v.get("text")
-                probability = value.get("probability")
+                probability = v.get("probability")
                if text and probability > best_probability:
                    result = text
                    best_probability = probability
@@ -271,11 +218,11 @@ def get_best_value_in_keys(source, keys):
 # 从keys中获取所有value组成list
-def get_values_of_keys(source, keys):
+def get_values_of_key(source, key):
    result = []
-    for key in keys:
+    values = source.get(key)
-        value = source.get(key)
+    if values:
-        if value:
+        for value in values:
            for v in value:
                v = v.get("text")
                if v:
@@ -289,7 +236,7 @@ def save_or_update_ie(table, pk_phhd, data):
    obj = table(**data)
    session = MysqlSession()
    db_data = session.query(table).filter_by(pk_phhd=pk_phhd).one_or_none()
-    now = util.get_default_datetime()
+    now = common_util.get_default_datetime()
    if db_data:
        # 更新
        db_data.update_time = now
@@ -364,23 +311,24 @@ def search_department(department):
    return best_match
-def settlement_task(pk_phhd, settlement_list, identity):
+def settlement_task(pk_phhd, settlement_list_ie_result):
    settlement_list_ie_result = information_extraction(SETTLEMENT_IE, settlement_list, identity)
    settlement_data = {
        "pk_phhd": pk_phhd,
-        "name": handle_name(get_best_value_in_keys(settlement_list_ie_result, PATIENT_NAME)),
+        "name": handle_name(get_best_value_of_key(settlement_list_ie_result, IE_KEY['name'])),
-        "admission_date_str": handle_original_data(get_best_value_in_keys(settlement_list_ie_result, ADMISSION_DATE)),
+        "admission_date_str": handle_original_data(
-        "discharge_date_str": handle_original_data(get_best_value_in_keys(settlement_list_ie_result, DISCHARGE_DATE)),
+            get_best_value_of_key(settlement_list_ie_result, IE_KEY['admission_date'])),
        "discharge_date_str": handle_original_data(
            get_best_value_of_key(settlement_list_ie_result, IE_KEY['discharge_date'])),
        "personal_cash_payment_str": handle_original_data(
-            get_best_value_in_keys(settlement_list_ie_result, PERSONAL_CASH_PAYMENT)),
+            get_best_value_of_key(settlement_list_ie_result, IE_KEY['personal_cash_payment'])),
        "personal_account_payment_str": handle_original_data(
-            get_best_value_in_keys(settlement_list_ie_result, PERSONAL_ACCOUNT_PAYMENT)),
+            get_best_value_of_key(settlement_list_ie_result, IE_KEY['personal_account_payment'])),
        "personal_funded_amount_str": handle_original_data(
-            get_best_value_in_keys(settlement_list_ie_result, PERSONAL_FUNDED_AMOUNT)),
+            get_best_value_of_key(settlement_list_ie_result, IE_KEY['personal_funded_amount'])),
        "medical_insurance_type_str": handle_original_data(
-            get_best_value_in_keys(settlement_list_ie_result, MEDICAL_INSURANCE_TYPE)),
+            get_best_value_of_key(settlement_list_ie_result, IE_KEY['medical_insurance_type'])),
-        "admission_id": handle_id(get_best_value_in_keys(settlement_list_ie_result, ADMISSION_ID)),
+        "admission_id": handle_id(get_best_value_of_key(settlement_list_ie_result, IE_KEY['admission_id'])),
-        "settlement_id": handle_id(get_best_value_in_keys(settlement_list_ie_result, SETTLEMENT_ID)),
+        "settlement_id": handle_id(get_best_value_of_key(settlement_list_ie_result, IE_KEY['settlement_id'])),
    }
    settlement_data["admission_date"] = handle_date(settlement_data["admission_date_str"])
    settlement_data["admission_date"] = handle_date(settlement_data["admission_date_str"])
@@ -390,27 +338,30 @@ def settlement_task(pk_phhd, settlement_list, identity):
    settlement_data["personal_funded_amount"] = handle_decimal(settlement_data["personal_funded_amount_str"])
    settlement_data["medical_insurance_type"] = handle_insurance_type(settlement_data["medical_insurance_type_str"])
-    parse_money_result = parse_money(get_best_value_in_keys(settlement_list_ie_result, UPPERCASE_MEDICAL_EXPENSES),
+    parse_money_result = parse_money(
-                                     get_best_value_in_keys(settlement_list_ie_result, MEDICAL_EXPENSES))
+        get_best_value_of_key(settlement_list_ie_result, IE_KEY['uppercase_medical_expenses']),
        get_best_value_of_key(settlement_list_ie_result, IE_KEY['medical_expenses']))
    settlement_data["medical_expenses_str"] = handle_original_data(parse_money_result[0])
    settlement_data["medical_expenses"] = parse_money_result[1]
    save_or_update_ie(ZxIeSettlement, pk_phhd, settlement_data)
    return settlement_data
-def discharge_task(pk_phhd, discharge_record, identity):
+def discharge_task(pk_phhd, discharge_record_ie_result):
-    discharge_record_ie_result = information_extraction(DISCHARGE_IE, discharge_record, identity)
+    hospitals = get_values_of_key(discharge_record_ie_result, IE_KEY['hospital'])
-    hospitals = get_values_of_keys(discharge_record_ie_result, HOSPITAL)
+    departments = get_values_of_key(discharge_record_ie_result, IE_KEY['department'])
    departments = get_values_of_keys(discharge_record_ie_result, DEPARTMENT)
    discharge_data = {
        "pk_phhd": pk_phhd,
        "hospital": handle_hospital(",".join(hospitals)),
        "department": handle_department(",".join(departments)),
-        "name": handle_name(get_best_value_in_keys(discharge_record_ie_result, PATIENT_NAME)),
+        "name": handle_name(get_best_value_of_key(discharge_record_ie_result, IE_KEY['name'])),
-        "admission_date_str": handle_original_data(get_best_value_in_keys(discharge_record_ie_result, ADMISSION_DATE)),
+        "admission_date_str": handle_original_data(
-        "discharge_date_str": handle_original_data(get_best_value_in_keys(discharge_record_ie_result, DISCHARGE_DATE)),
+            get_best_value_of_key(discharge_record_ie_result, IE_KEY['admission_date'])),
-        "doctor": handle_name(get_best_value_in_keys(discharge_record_ie_result, DOCTOR)),
+        "discharge_date_str": handle_original_data(
-        "admission_id": handle_id(get_best_value_in_keys(discharge_record_ie_result, ADMISSION_ID)),
+            get_best_value_of_key(discharge_record_ie_result, IE_KEY['discharge_date'])),
-        "age": handle_age(get_best_value_in_keys(discharge_record_ie_result, AGE)),
+        "doctor": handle_name(get_best_value_of_key(discharge_record_ie_result, IE_KEY['doctor'])),
        "admission_id": handle_id(get_best_value_of_key(discharge_record_ie_result, IE_KEY['admission_id'])),
        "age": handle_age(get_best_value_of_key(discharge_record_ie_result, IE_KEY['age'])),
    }
    discharge_data["admission_date"] = handle_date(discharge_data["admission_date_str"])
    discharge_data["discharge_date"] = handle_date(discharge_data["discharge_date_str"])
@@ -467,52 +418,270 @@ def discharge_task(pk_phhd, discharge_record, identity):
        if best_match:
            discharge_data["pk_ylks"] = best_match[2]
    save_or_update_ie(ZxIeDischarge, pk_phhd, discharge_data)
    return discharge_data
-def cost_task(pk_phhd, cost_list, identity):
+def cost_task(pk_phhd, cost_list_ie_result):
    cost_list_ie_result = information_extraction(COST_IE, cost_list, identity)
    cost_data = {
        "pk_phhd": pk_phhd,
-        "name": handle_name(get_best_value_in_keys(cost_list_ie_result, PATIENT_NAME)),
+        "name": handle_name(get_best_value_of_key(cost_list_ie_result, IE_KEY['name'])),
-        "admission_date_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, ADMISSION_DATE)),
+        "admission_date_str": handle_original_data(
-        "discharge_date_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, DISCHARGE_DATE)),
+            get_best_value_of_key(cost_list_ie_result, IE_KEY['admission_date'])),
-        "medical_expenses_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, MEDICAL_EXPENSES))
+        "discharge_date_str": handle_original_data(
            get_best_value_of_key(cost_list_ie_result, IE_KEY['discharge_date'])),
        "medical_expenses_str": handle_original_data(
            get_best_value_of_key(cost_list_ie_result, IE_KEY['medical_expenses']))
    }
    cost_data["admission_date"] = handle_date(cost_data["admission_date_str"])
    cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"])
    cost_data["medical_expenses"] = handle_decimal(cost_data["medical_expenses_str"])
    if cost_list_ie_result.get(IE_KEY['page']):
        page_nums, page_count = parse_page_num(cost_list_ie_result[IE_KEY['page']])
        if page_nums:
            page_nums_str = [str(num) for num in page_nums]
            cost_data['page_nums'] = handle_original_data(','.join(page_nums_str))
            cost_data['page_count'] = handle_tiny_int(page_count)
    save_or_update_ie(ZxIeCost, pk_phhd, cost_data)
    return cost_data
-def photo_review(pk_phhd):
+def parse_pdf_text(settlement_text):
-    settlement_list = []
+    pattern = (r'(?:交款人：(.*?)\n|住院时间：(.*?)至(.*?)\n|\(小写\)(.*?)\n|个人现金支付：(.*?)\n|个人账户支付：(.*?)\n'
-    discharge_record = []
+               r'|个人自费：(.*?)\n|医保类型：(.*?)\n|住院科别：(.*?)\n|住院号：(.*?)\n|票据号码：(.*?)\n|)')
-    cost_list = []
+    # 查找所有匹配项
    matches = re.findall(pattern, settlement_text)
    results = {}
    keys = ['患者姓名', '入院日期', '出院日期', '费用总额', '个人现金支付', '个人账户支付', '个人自费', '医保类型',
            '科室', '住院号', '医保结算单号码']
    for match in matches:
        for key, value in zip(keys, match):
            if value:
                results[key] = [[{'text': value, 'probability': 1}]]
    settlement_key = ['患者姓名', '入院日期', '出院日期', '费用总额', '个人现金支付', '个人账户支付', '个人自费',
                      '医保类型', '住院号', '医保结算单号码']
    discharge_key = ['科室', '患者姓名', '入院日期', '出院日期', '住院号']
    cost_key = ['患者姓名', '入院日期', '出院日期', '费用总额']
    settlement_result = {key: copy.copy(results[key]) for key in settlement_key if key in results}
    discharge_result = {key: copy.copy(results[key]) for key in discharge_key if key in results}
    cost_result = {key: copy.copy(results[key]) for key in cost_key if key in results}
    return settlement_result, discharge_result, cost_result
 def photo_review(pk_phhd, name):
    """
    处理单个报销案子
    :param pk_phhd: 报销单主键
    :param name: 报销人姓名
    """
    settlement_result = defaultdict(list)
    discharge_result = defaultdict(list)
    cost_result = defaultdict(list)
    session = MysqlSession()
-    phrecs = session.query(ZxPhrec.pk_phrec, ZxPhrec.pk_phhd, ZxPhrec.cRectype, ZxPhrec.cfjaddress).filter(
+    phrecs = session.query(ZxPhrec.pk_phrec, ZxPhrec.cRectype, ZxPhrec.cfjaddress).filter(
        ZxPhrec.pk_phhd == pk_phhd
-    ).all()
+    ).order_by(ZxPhrec.cRectype, ZxPhrec.rowno).all()
    session.close()
    for phrec in phrecs:
        if phrec.cRectype == "1":
            settlement_list.append(phrec)
        elif phrec.cRectype == "3":
            discharge_record.append(phrec)
        elif phrec.cRectype == "4":
            cost_list.append(phrec)
    # 同一批图的标识
-    identity = int(time.time())
+    set_batch_id(uuid.uuid4().hex)
-    settlement_task(pk_phhd, settlement_list, identity)
+    processed_img_dir = common_util.get_processed_img_path('')
-    discharge_task(pk_phhd, discharge_record, identity)
+    os.makedirs(processed_img_dir, exist_ok=True)
-    cost_task(pk_phhd, cost_list, identity)
+
    has_pdf = False  # 是否获取到了pdf，获取到可以直接利用pdf更快的获取信息
    better_settlement_path = None
    better_cost_path = None
    settlement_text = ''
    qrcode_img_id = None
    for phrec in phrecs:
        original_img_path = common_util.get_img_path(phrec.cfjaddress)
        if not original_img_path:
            img_url = ufile.get_private_url(phrec.cfjaddress)
            if not img_url:
                continue
            original_img_path = common_util.save_to_local(img_url)
        img_path = common_util.get_processed_img_path(phrec.cfjaddress)
        shutil.copy2(original_img_path, img_path)
        # 尝试从二维码中获取高清图片
        better_settlement_path, settlement_text, better_cost_path = parse_qrcode(img_path, phrec.cfjaddress)
        if better_settlement_path:
            has_pdf = True
            qrcode_img_id = phrec.cfjaddress
            break
    discharge_text = ''
    if has_pdf:
        settlement_result, discharge_result, cost_result = parse_pdf_text(settlement_text)
        discharge_ie_result = defaultdict(list)
        is_cost_updated = False
        for phrec in phrecs:
            if phrec.cRectype == '1':
                if phrec.cfjaddress == qrcode_img_id:
                    try:
                        ufile.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress)
                        ufile.upload_file(phrec.cfjaddress, better_settlement_path)
                    except Exception as e:
                        logging.error("更新结算单pdf图片出错", exc_info=e)
            elif phrec.cRectype == '3':
                rec_type, ie_result, ocr_text = information_extraction(phrec, pk_phhd)
                if rec_type == '出院记录':
                    discharge_text += ocr_text
                    for key, value in ie_result.items():
                        discharge_ie_result[key].append(value)
            # 暂不替换费用清单
            # elif phrec.cRectype == '4':
            #     if not is_cost_updated:
            #         try:
            #             ufile.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress)
            #             ufile.upload_file(phrec.cfjaddress, better_cost_path)
            #         except Exception as e:
            #             logging.error("更新费用清单pdf图片出错", exc_info=e)
            #         finally:
            #             is_cost_updated = True
        # 合并出院记录
        for key, value in discharge_ie_result.items():
            ie_value = get_best_value_of_key(discharge_ie_result, key)
            pdf_value = discharge_result.get(key)[0][0]['text'] if discharge_result.get(key) else ''
            similarity_ratio = fuzz.ratio(ie_value, pdf_value)
            if similarity_ratio < 60:
                discharge_result[key] = [[{'text': ie_value, 'probability': 1}]]
    else:
        for phrec in phrecs:
            rec_type, ie_result, ocr_text = information_extraction(phrec, pk_phhd)
            if rec_type == '基本医保结算单':
                rec_result = settlement_result
            elif rec_type == '出院记录':
                rec_result = discharge_result
                discharge_text += ocr_text
            elif rec_type == '费用清单':
                rec_result = cost_result
            else:
                rec_result = None
            if rec_result is not None:
                for key, value in ie_result.items():
                    rec_result[key].append(value)
    # 删除多余图片
    if os.path.exists(processed_img_dir) and os.path.isdir(processed_img_dir):
        shutil.rmtree(processed_img_dir)
    settlement_data = settlement_task(pk_phhd, settlement_result)
    discharge_data = discharge_task(pk_phhd, discharge_result)
    cost_data = cost_task(pk_phhd, cost_result)
    # 三项资料完整性判断
    # 三项资料缺项判断
    review_result = {
        'pk_phhd': pk_phhd,
        'has_settlement': bool(settlement_result),
        'has_discharge': bool(discharge_result),
        'has_cost': bool(cost_result),
    }
    if (review_result['has_settlement'] and settlement_data.get('personal_account_payment')
            and settlement_data.get('personal_cash_payment') and settlement_data.get('medical_expenses')):
        review_result['has_settlement'] &= (
                float(settlement_data['personal_account_payment']) + float(settlement_data['personal_cash_payment'])
                < float(settlement_data['medical_expenses'])
        )
    if has_pdf:
        review_result['has_discharge'] &= bool(discharge_text)
    # 三项资料缺页判断
    page_description = []
    if review_result['has_discharge']:
        for discharge_item in DISCHARGE_KEY:
            if not any(key in discharge_text for key in DISCHARGE_KEY[discharge_item]):
                page_description.append(f"《出院记录》缺页")
                break
    if review_result['has_cost']:
        cost_missing_page = {}
        if cost_data.get('page_nums') and cost_data.get('page_count'):
            page_nums = cost_data['page_nums'].split(',')
            required_set = set(range(1, cost_data['page_count'] + 1))
            page_set = set([int(num) for num in page_nums])
            cost_missing_page = required_set - page_set
        if cost_missing_page:
            cost_missing_page = sorted(cost_missing_page)
            cost_missing_page = [str(num) for num in cost_missing_page]
            page_description.append(f"《住院费用清单》，缺第{','.join(cost_missing_page)}页")
    if page_description:
        review_result['full_page'] = False
        review_result['page_description'] = ';'.join(page_description)
    else:
        review_result['full_page'] = True
    review_result['integrity'] = (review_result['has_settlement'] and review_result['has_discharge']
                                  and review_result['has_cost'] and review_result['full_page'])
    # 三项资料一致性判断
    # 姓名一致性
    name_list = [settlement_data['name'], discharge_data['name'], cost_data['name']]
    if sum(not bool(n) for n in name_list) > 1:  # 有2个及以上空值直接认为都不一致
        review_result['name_match'] = '0'
    else:
        unique_name = set(name_list)
        if len(unique_name) == 1:
            review_result['name_match'] = '1' if name == unique_name.pop() else '5'
        elif len(unique_name) == 2:
            if settlement_data['name'] != discharge_data['name'] and settlement_data['name'] != cost_data['name']:
                review_result['name_match'] = '2'
            elif discharge_data['name'] != settlement_data['name'] and discharge_data['name'] != cost_data['name']:
                review_result['name_match'] = '3'
            else:
                review_result['name_match'] = '4'
        else:
            review_result['name_match'] = '0'
    # 住院日期一致性
    if (settlement_data['admission_date'] and discharge_data['admission_date']
            and settlement_data['discharge_date'] and discharge_data['discharge_date']
            and settlement_data['admission_date'] == discharge_data['admission_date']
            and settlement_data['discharge_date'] == discharge_data['discharge_date']):
        review_result['admission_date_match'] = '1'
    else:
        review_result['admission_date_match'] = '0'
    # 出院日期一致性
    discharge_date_list = [settlement_data['discharge_date'], discharge_data['discharge_date'],
                           cost_data['discharge_date']]
    if sum(not bool(d) for d in discharge_date_list) > 1:
        review_result['discharge_date_match'] = '0'
    else:
        unique_discharge_date = set(discharge_date_list)
        if len(unique_discharge_date) == 1:
            review_result['discharge_date_match'] = '1'
        elif len(unique_discharge_date) == 2:
            if (settlement_data['discharge_date'] != discharge_data['discharge_date']
                    and settlement_data['discharge_date'] != cost_data['discharge_date']):
                review_result['discharge_date_match'] = '2'
            elif (discharge_data['discharge_date'] != settlement_data['discharge_date']
                  and discharge_data['discharge_date'] != cost_data['discharge_date']):
                review_result['discharge_date_match'] = '3'
            else:
                review_result['discharge_date_match'] = '4'
        else:
            review_result['discharge_date_match'] = '0'
    review_result['consistency'] = (
            review_result['name_match'] == '1' and review_result['admission_date_match'] == '1'
            and review_result['discharge_date_match'] == '1')
    review_result['success'] = review_result['integrity'] and review_result['consistency']
    save_or_update_ie(ZxIeReview, pk_phhd, review_result)
 def main():
    """
    照片审核批量控制
    """
    while 1:
        session = MysqlSession()
-        phhds = (session.query(ZxPhhd.pk_phhd)
+        phhds = (session.query(ZxPhhd.pk_phhd, ZxPhhd.cXm)
                 .join(ZxPhrec, ZxPhhd.pk_phhd == ZxPhrec.pk_phhd, isouter=True)
                 .filter(ZxPhhd.exsuccess_flag == "1")
                 .filter(ZxPhrec.pk_phrec.isnot(None))
@@ -529,14 +698,14 @@ def main():
                pk_phhd = phhd.pk_phhd
                logging.info(f"开始识别：{pk_phhd}")
                start_time = time.time()
-                photo_review(pk_phhd)
+                photo_review(pk_phhd, phhd.cXm)
                # 识别完成更新标识
                session = MysqlSession()
                update_flag = (update(ZxPhhd).where(ZxPhhd.pk_phhd == pk_phhd).values(
                    exsuccess_flag="8",
                    ref_id1=HOSTNAME,
-                    checktime=util.get_default_datetime(),
+                    checktime=common_util.get_default_datetime(),
                    fFSYLFY=time.time() - start_time))
                session.execute(update_flag)
                session.commit()
--- a/check_ie_result/font/simfang.ttf
+++ b/check_ie_result/font/simfang.ttf
--- a/photo_review/photo_review_error_report.py
+++ b/photo_review/photo_review_error_report.py
@@ -6,7 +6,7 @@ from sqlalchemy.sql.functions import count
 from db import MysqlSession
 from db.mysql import ZxPhhd, ViewErrorReview
-from util import util
+from util import common_util
 def handle_reason(reason):
@@ -95,5 +95,5 @@ if __name__ == '__main__':
    print(result)
    with open("photo_review_error_report.txt", 'w', encoding='utf-8') as file:
        file.write(json.dumps(result, indent=4, ensure_ascii=False))
-        file.write(util.get_default_datetime())
+        file.write(common_util.get_default_datetime())
    print("结果已保存。")
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,16 +1,16 @@
 beautifulsoup4==4.12.3  # 网页分析
 jieba==0.42.1  # 中文分词
 numpy==1.26.4
-onnxconverter-common==1.14.0
+OpenCC==1.1.9  # 中文繁简转换
 OpenCC==1.1.6
 opencv-python==4.6.0.66
-paddle2onnx==1.2.3
+opencv-python-headless==4.10.0.84
 paddleclas==2.5.2
 paddlenlp==2.6.1
 paddleocr==2.7.3
 pillow==10.4.0
 PyMuPDF==1.24.9  # pdf处理
 pymysql==1.1.1
 rapidfuzz==3.9.4  #文本相似度
 requests==2.32.3
-sqlacodegen==2.3.0.post1
+sqlacodegen==2.3.0.post1  # 实体类生成
-sqlalchemy==1.4.52
+sqlalchemy==1.4.52  # ORM框架
-tenacity==8.5.0
+tenacity==8.5.0  # 重试
-ufile==3.2.9
+ufile==3.2.9  # 云空间
-zxing-cpp==2.2.0
+zxing-cpp==2.2.0  # 二维码识别
--- a/paddle_detection/dataset/dota/.gitignore
+++ b/paddle_detection/dataset/dota/.gitignore
--- a/services/paddle_services/.dockerignore
+++ b/services/paddle_services/.dockerignore
@@ -0,0 +1,245 @@
 ### PyCharm template
 # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
 # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
 # User-specific stuff
 .idea/**/workspace.xml
 .idea/**/tasks.xml
 .idea/**/usage.statistics.xml
 .idea/**/dictionaries
 .idea/**/shelf
 # AWS User-specific
 .idea/**/aws.xml
 # Generated files
 .idea/**/contentModel.xml
 # Sensitive or high-churn files
 .idea/**/dataSources/
 .idea/**/dataSources.ids
 .idea/**/dataSources.local.xml
 .idea/**/sqlDataSources.xml
 .idea/**/dynamic.xml
 .idea/**/uiDesigner.xml
 .idea/**/dbnavigator.xml
 # Gradle
 .idea/**/gradle.xml
 .idea/**/libraries
 # Gradle and Maven with auto-import
 # When using Gradle or Maven with auto-import, you should exclude module files,
 # since they will be recreated, and may cause churn.  Uncomment if using
 # auto-import.
 # .idea/artifacts
 # .idea/compiler.xml
 # .idea/jarRepositories.xml
 # .idea/modules.xml
 # .idea/*.iml
 # .idea/modules
 # *.iml
 # *.ipr
 # CMake
 cmake-build-*/
 # Mongo Explorer plugin
 .idea/**/mongoSettings.xml
 # File-based project format
 *.iws
 # IntelliJ
 out/
 # mpeltonen/sbt-idea plugin
 .idea_modules/
 # JIRA plugin
 atlassian-ide-plugin.xml
 # Cursive Clojure plugin
 .idea/replstate.xml
 # SonarLint plugin
 .idea/sonarlint/
 # Crashlytics plugin (for Android Studio and IntelliJ)
 com_crashlytics_export_strings.xml
 crashlytics.properties
 crashlytics-build.properties
 fabric.properties
 # Editor-based Rest Client
 .idea/httpRequests
 # Android studio 3.1+ serialized cache file
 .idea/caches/build_file_checksums.ser
 ### Python template
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 #poetry.lock
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/#use-with-ide
 .pdm.toml
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 # PyCharm
 #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 # 通过卷绑定挂载到容器中
 /log
 /model
 # docker
 Dockerfile
--- a/services/paddle_services/Dockerfile
+++ b/services/paddle_services/Dockerfile
@@ -0,0 +1,28 @@
 # 使用官方的paddle镜像作为基础
 FROM registry.baidubce.com/paddlepaddle/paddle:2.6.1-gpu-cuda12.0-cudnn8.9-trt8.6
 # 设置工作目录
 WORKDIR /app
 # 设置环境变量
 ENV PYTHONUNBUFFERED=1 \
    # 设置时区
    TZ=Asia/Shanghai \
    # 设置pip镜像地址，加快安装速度
    PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
 # 安装依赖
 COPY requirements.txt /app/requirements.txt
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo '$TZ' > /etc/timezone \
    && pip install --no-cache-dir -r requirements.txt \
    && pip uninstall -y onnxruntime onnxruntime-gpu \
    && pip install onnxruntime-gpu==1.18.0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
 # 将当前目录内容复制到容器的/app内
 COPY . /app
 # 暴露端口
 # EXPOSE 8081
 # 运行api接口，具体接口在命令行或docker-compose.yml文件中定义
 ENTRYPOINT ["gunicorn"]
--- a/services/paddle_services/init.py
+++ b/services/paddle_services/init.py
@@ -0,0 +1,21 @@
 """
 信息抽取关键词配置
 """
 IE_KEY = {
    'name': '患者姓名',
    'admission_date': '入院日期',
    'discharge_date': '出院日期',
    'medical_expenses': '费用总额',
    'personal_cash_payment': '个人现金支付',
    'personal_account_payment': '个人账户支付',
    'personal_funded_amount': '自费金额',
    'medical_insurance_type': '医保类型',
    'hospital': '医院',
    'department': '科室',
    'doctor': '主治医生',
    'admission_id': '住院号',
    'settlement_id': '医保结算单号码',
    'age': '年龄',
    'uppercase_medical_expenses': '大写总额',
    'page': '页码',
 }
--- a/services/paddle_services/clas_orientation.py
+++ b/services/paddle_services/clas_orientation.py
@@ -0,0 +1,30 @@
 import logging.config
 from flask import Flask, request
 from paddleclas import PaddleClas
 from log import LOGGING_CONFIG
 from utils import process_request
 app = Flask(__name__)
 CLAS = PaddleClas(model_name='text_image_orientation')
@app.route(rule='/', methods=['POST'])
@process_request
 def main():
    """
    判断图片旋转角度，逆时针旋转该角度后为正。可能值['0', '90', '180', '270']
    :return: 最有可能的两个角度
    """
    img_path = request.form.get('img_path')
    clas_result = CLAS.predict(input_data=img_path)
    clas_result = next(clas_result)[0]
    if clas_result['scores'][0] < 0.5:
        return ['0', '90']
    return clas_result['label_names']
 if __name__ == '__main__':
    logging.config.dictConfig(LOGGING_CONFIG)
    app.run('0.0.0.0', 5005)
--- a/services/paddle_services/clas_text.py
+++ b/services/paddle_services/clas_text.py
@@ -0,0 +1,31 @@
 import logging.config
 from flask import Flask, request
 from paddlenlp import Taskflow
 from log import LOGGING_CONFIG
 from utils import process_request
 app = Flask(__name__)
 schema = ['基本医保结算单', '出院记录', '费用清单']
 CLAS = Taskflow('zero_shot_text_classification', model='utc-xbase', schema=schema,
                task_path='model/text_classification', precision='fp32')
@app.route('/', methods=['POST'])
@process_request
 def main():
    text = request.form.get('text')
    cls_result = CLAS(text)
    cls_result = cls_result[0].get('predictions')
    if cls_result:
        cls_result = cls_result[0]
    if cls_result['score'] and float(cls_result['score']) < 0.8:
        logging.info(f"识别结果置信度{cls_result['score']}过低！text: {text}")
        return None
    return cls_result['label']
 if __name__ == '__main__':
    logging.config.dictConfig(LOGGING_CONFIG)
    app.run('0.0.0.0', 5008)
--- a/services/paddle_services/det_book.py
+++ b/services/paddle_services/det_book.py
@@ -0,0 +1,31 @@
 import logging.config
 import os.path
 import cv2
 from flask import Flask, request
 from log import LOGGING_CONFIG
 from paddle_detection import detector
 from utils import process_request, parse_img_path
 app = Flask(__name__)
@app.route('/', methods=['POST'])
@process_request
 def main():
    img_path = request.form.get('img_path')
    result = detector.get_book_areas(img_path)
    dirname, img_name, img_ext = parse_img_path(img_path)
    books_path = []
    for i in range(len(result)):
        save_path = os.path.join(dirname, f'{img_name}.book_{i}.{img_ext}')
        cv2.imwrite(save_path, result[i])
        books_path.append(save_path)
    return books_path
 if __name__ == '__main__':
    logging.config.dictConfig(LOGGING_CONFIG)
    app.run('0.0.0.0', 5006)
--- a/services/paddle_services/dewarp.py
+++ b/services/paddle_services/dewarp.py
@@ -0,0 +1,28 @@
 import logging.config
 import os
 import cv2
 from flask import Flask, request
 from doc_dewarp import dewarper
 from log import LOGGING_CONFIG
 from utils import process_request, parse_img_path
 app = Flask(__name__)
@app.route('/', methods=['POST'])
@process_request
 def main():
    img_path = request.form.get('img_path')
    img = cv2.imread(img_path)
    dewarped_img = dewarper.dewarp_image(img)
    dirname, img_name, img_ext = parse_img_path(img_path)
    save_path = os.path.join(dirname, f'{img_name}.dewarped.{img_ext}')
    cv2.imwrite(save_path, dewarped_img)
    return save_path
 if __name__ == '__main__':
    logging.config.dictConfig(LOGGING_CONFIG)
    app.run('0.0.0.0', 5007)
--- a/services/paddle_services/doc_dewarp/.gitignore
+++ b/services/paddle_services/doc_dewarp/.gitignore
--- a/services/paddle_services/doc_dewarp/.pre-commit-config.yaml
+++ b/services/paddle_services/doc_dewarp/.pre-commit-config.yaml
--- a/services/paddle_services/doc_dewarp/GeoTr.py
+++ b/services/paddle_services/doc_dewarp/GeoTr.py
--- a/services/paddle_services/doc_dewarp/README.md
+++ b/services/paddle_services/doc_dewarp/README.md
--- a/services/paddle_services/doc_dewarp/init.py
+++ b/services/paddle_services/doc_dewarp/init.py
@@ -0,0 +1,7 @@
 import os
 from onnxruntime import InferenceSession
 MODEL_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
                          'model', 'dewarp_model', 'doc_tr_pp.onnx')
 DOC_TR = InferenceSession(MODEL_PATH, providers=['CUDAExecutionProvider'], provider_options=[{'device_id': 0}])
--- a/services/paddle_services/doc_dewarp/data_visualization.py
+++ b/services/paddle_services/doc_dewarp/data_visualization.py
--- a/services/paddle_services/doc_dewarp/dewarper.py
+++ b/services/paddle_services/doc_dewarp/dewarper.py
@@ -11,10 +11,10 @@ def dewarp_image(image):
    y = to_tensor(image)
    img = np.transpose(img, (2, 0, 1))
-    bm = DOC_TR.run(None, {"image": img[None,]})[0]
+    bm = DOC_TR.run(None, {'image': img[None,]})[0]
    bm = paddle.to_tensor(bm)
    bm = paddle.nn.functional.interpolate(
-        bm, y.shape[2:], mode="bilinear", align_corners=False
+        bm, y.shape[2:], mode='bilinear', align_corners=False
    )
    bm_nhwc = np.transpose(bm, (0, 2, 3, 1))
    out = paddle.nn.functional.grid_sample(y, (bm_nhwc / 288 - 0.5) * 2)
--- a/services/paddle_services/doc_dewarp/doc/download_dataset.sh
+++ b/services/paddle_services/doc_dewarp/doc/download_dataset.sh
--- a/services/paddle_services/doc_dewarp/doc/imgs/document_image_rectification.jpg
+++ b/services/paddle_services/doc_dewarp/doc/imgs/document_image_rectification.jpg
--- a/services/paddle_services/doc_dewarp/doc3d_dataset.py
+++ b/services/paddle_services/doc_dewarp/doc3d_dataset.py
--- a/services/paddle_services/doc_dewarp/export.py
+++ b/services/paddle_services/doc_dewarp/export.py
--- a/services/paddle_services/doc_dewarp/extractor.py
+++ b/services/paddle_services/doc_dewarp/extractor.py
--- a/services/paddle_services/doc_dewarp/plots.py
+++ b/services/paddle_services/doc_dewarp/plots.py
--- a/services/paddle_services/doc_dewarp/position_encoding.py
+++ b/services/paddle_services/doc_dewarp/position_encoding.py
--- a/services/paddle_services/doc_dewarp/predict.py
+++ b/services/paddle_services/doc_dewarp/predict.py
--- a/services/paddle_services/doc_dewarp/requirements.txt
+++ b/services/paddle_services/doc_dewarp/requirements.txt
--- a/services/paddle_services/doc_dewarp/split_dataset.py
+++ b/services/paddle_services/doc_dewarp/split_dataset.py
--- a/services/paddle_services/doc_dewarp/train.py
+++ b/services/paddle_services/doc_dewarp/train.py
--- a/services/paddle_services/doc_dewarp/train.sh
+++ b/services/paddle_services/doc_dewarp/train.sh
--- a/services/paddle_services/doc_dewarp/utils.py
+++ b/services/paddle_services/doc_dewarp/utils.py
--- a/services/paddle_services/doc_dewarp/weight_init.py
+++ b/services/paddle_services/doc_dewarp/weight_init.py
--- a/services/paddle_services/ie_cost.py
+++ b/services/paddle_services/ie_cost.py
@@ -0,0 +1,36 @@
 import json
 import logging.config
 from flask import Flask, request
 from paddlenlp import Taskflow
 from __init__ import IE_KEY
 from log import LOGGING_CONFIG
 from utils import process_request
 app = Flask(__name__)
 COST_LIST_SCHEMA = tuple(IE_KEY[key] for key in [
    'name', 'admission_date', 'discharge_date', 'medical_expenses', 'page'
 ])
 COST = Taskflow('information_extraction', schema=COST_LIST_SCHEMA, model='uie-x-base',
                task_path='model/cost_list_model', layout_analysis=False, precision='fp16')
@app.route('/', methods=['POST'], endpoint='cost')
@process_request
 def main():
    img_path = request.form.get('img_path')
    layout = request.form.get('layout')
    return COST({'doc': img_path, 'layout': json.loads(layout)})
@app.route('/text', methods=['POST'])
@process_request
 def text():
    t = request.form.get('text')
    return COST(t)
 if __name__ == '__main__':
    logging.config.dictConfig(LOGGING_CONFIG)
    app.run('0.0.0.0', 5004)
--- a/services/paddle_services/ie_discharge.py
+++ b/services/paddle_services/ie_discharge.py
@@ -0,0 +1,36 @@
 import json
 import logging.config
 from flask import Flask, request
 from paddlenlp import Taskflow
 from __init__ import IE_KEY
 from log import LOGGING_CONFIG
 from utils import process_request
 app = Flask(__name__)
 DISCHARGE_RECORD_SCHEMA = tuple(IE_KEY[key] for key in [
    'hospital', 'department', 'name', 'admission_date', 'discharge_date', 'doctor', 'admission_id', 'age'
 ])
 DISCHARGE = Taskflow('information_extraction', schema=DISCHARGE_RECORD_SCHEMA, model='uie-x-base',
                     task_path='model/discharge_record_model', layout_analysis=False, precision='fp16')
@app.route('/', methods=['POST'], endpoint='discharge')
@process_request
 def main():
    img_path = request.form.get('img_path')
    layout = request.form.get('layout')
    return DISCHARGE({'doc': img_path, 'layout': json.loads(layout)})
@app.route('/text', methods=['POST'])
@process_request
 def text():
    t = request.form.get('text')
    return DISCHARGE(t)
 if __name__ == '__main__':
    logging.config.dictConfig(LOGGING_CONFIG)
    app.run('0.0.0.0', 5003)
--- a/services/paddle_services/ie_settlement.py
+++ b/services/paddle_services/ie_settlement.py
@@ -0,0 +1,38 @@
 import json
 import logging.config
 from flask import Flask, request
 from paddlenlp import Taskflow
 from __init__ import IE_KEY
 from log import LOGGING_CONFIG
 from utils import process_request
 app = Flask(__name__)
 SETTLEMENT_LIST_SCHEMA = tuple(IE_KEY[key] for key in [
    'name', 'admission_date', 'discharge_date', 'medical_expenses', 'personal_cash_payment',
    'personal_account_payment', 'personal_funded_amount', 'medical_insurance_type', 'admission_id', 'settlement_id',
    'uppercase_medical_expenses'
 ])
 SETTLEMENT_IE = Taskflow('information_extraction', schema=SETTLEMENT_LIST_SCHEMA, model='uie-x-base',
                         task_path='model/settlement_list_model', layout_analysis=False, precision='fp16')
@app.route('/', methods=['POST'], endpoint='settlement')
@process_request
 def main():
    img_path = request.form.get('img_path')
    layout = request.form.get('layout')
    return SETTLEMENT_IE({'doc': img_path, 'layout': json.loads(layout)})
@app.route('/text', methods=['POST'])
@process_request
 def text():
    t = request.form.get('text')
    return SETTLEMENT_IE(t)
 if __name__ == '__main__':
    logging.config.dictConfig(LOGGING_CONFIG)
    app.run('0.0.0.0', 5002)
--- a/services/paddle_services/log/init.py
+++ b/services/paddle_services/log/init.py
@@ -0,0 +1,70 @@
 import os
 import socket
 # 获取主机名，方便区分容器
 HOSTNAME = socket.gethostname()
 # 检测日志文件的路径是否存在，不存在则创建
 LOG_PATHS = [
    f'log/{HOSTNAME}/error',
 ]
 for path in LOG_PATHS:
    if not os.path.exists(path):
        os.makedirs(path)
 # 配置字典
 LOGGING_CONFIG = {
    'version': 1,  # 必需，指定配置格式的版本
    'disable_existing_loggers': False,  # 是否禁用已经存在的logger实例
    # formatters定义了不同格式的日志样式
    'formatters': {
        'standard': {
            'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s',
            'datefmt': '%Y-%m-%d %H:%M:%S',
        },
    },
    # handlers定义了不同类型的日志处理器
    'handlers': {
        'console': {
            'class': 'logging.StreamHandler',  # 控制台处理器
            'level': 'DEBUG',
            'formatter': 'standard',
            'stream': 'ext://sys.stdout',  # 输出到标准输出，默认编码跟随系统，一般为UTF-8
        },
        'file': {
            'class': 'logging.handlers.TimedRotatingFileHandler',  # 文件处理器，支持日志滚动
            'level': 'INFO',
            'formatter': 'standard',
            'filename': f'log/{HOSTNAME}/fcb_photo_review.log',  # 日志文件路径
            'when': 'midnight',
            'interval': 1,
            'backupCount': 14,  # 保留的备份文件数量
            'encoding': 'utf-8',  # 显式指定文件编码为UTF-8以支持中文
        },
        'error': {
            'class': 'logging.handlers.TimedRotatingFileHandler',
            'level': 'INFO',
            'formatter': 'standard',
            'filename': f'log/{HOSTNAME}/error/fcb_photo_review_error.log',
            'when': 'midnight',
            'interval': 1,
            'backupCount': 14,
            'encoding': 'utf-8',
        },
    },
    # loggers定义了日志记录器
    'loggers': {
        '': {  # 根记录器
            'handlers': ['console', 'file'],  # 关联的处理器
            'level': 'DEBUG',  # 根记录器的级别
            'propagate': False,  # 是否向上级传播日志信息
        },
        'error': {
            'handlers': ['console', 'file', 'error'],
            'level': 'DEBUG',
            'propagate': False,
        },
    },
 }
--- a/services/paddle_services/model/README.md
+++ b/services/paddle_services/model/README.md
--- a/services/paddle_services/model/cost_list_model/README.md
+++ b/services/paddle_services/model/cost_list_model/README.md
@@ -0,0 +1 @@
 住院费用清单信息抽取微调模型存放目录
--- a/services/paddle_services/model/dewarp_model/README.md
+++ b/services/paddle_services/model/dewarp_model/README.md
@@ -0,0 +1 @@
 图片扭曲矫正模型存放目录
--- a/services/paddle_services/model/discharge_record_model/README.md
+++ b/services/paddle_services/model/discharge_record_model/README.md
@@ -0,0 +1 @@
 出院记录信息抽取微调模型存放目录
--- a/services/paddle_services/model/object_det_model/README.md
+++ b/services/paddle_services/model/object_det_model/README.md
@@ -0,0 +1 @@
 文档检测模型存放目录
--- a/services/paddle_services/model/settlement_list_model/README.md
+++ b/services/paddle_services/model/settlement_list_model/README.md
@@ -0,0 +1 @@
 基本医保结算单信息抽取微调模型存放目录
--- a/services/paddle_services/model/text_classification/README.md
+++ b/services/paddle_services/model/text_classification/README.md
@@ -0,0 +1 @@
 文本分类模型存放目录
--- a/services/paddle_services/ocr.py
+++ b/services/paddle_services/ocr.py
@@ -0,0 +1,24 @@
 import logging.config
 from flask import Flask, request
 from paddleocr import PaddleOCR
 from log import LOGGING_CONFIG
 from utils import process_request
 app = Flask(__name__)
 # 如果不希望识别出空格，可以设置use_space_char=False。做此项设置一定要测试，2.7.3版本此项设置有bug，会导致识别失败
 OCR = PaddleOCR(use_angle_cls=False, show_log=False, det_db_thresh=0.1, det_db_box_thresh=0.3, det_limit_side_len=1248,
                drop_score=0.3)
@app.route('/', methods=['POST'])
@process_request
 def main():
    img_path = request.form.get('img_path')
    return OCR.ocr(img_path, cls=False)
 if __name__ == '__main__':
    logging.config.dictConfig(LOGGING_CONFIG)
    app.run('0.0.0.0', 5001)
--- a/services/paddle_services/paddle_detection/.github/ISSUE_TEMPLATE/1_bug-report.yml
+++ b/services/paddle_services/paddle_detection/.github/ISSUE_TEMPLATE/1_bug-report.yml
--- a/services/paddle_services/paddle_detection/.github/ISSUE_TEMPLATE/2_feature-request.yml
+++ b/services/paddle_services/paddle_detection/.github/ISSUE_TEMPLATE/2_feature-request.yml
--- a/services/paddle_services/paddle_detection/.github/ISSUE_TEMPLATE/3_documentation-issue.yml
+++ b/services/paddle_services/paddle_detection/.github/ISSUE_TEMPLATE/3_documentation-issue.yml
--- a/services/paddle_services/paddle_detection/.github/ISSUE_TEMPLATE/4_ask-a-question.yml
+++ b/services/paddle_services/paddle_detection/.github/ISSUE_TEMPLATE/4_ask-a-question.yml
--- a/services/paddle_services/paddle_detection/.github/ISSUE_TEMPLATE/5_others.yml
+++ b/services/paddle_services/paddle_detection/.github/ISSUE_TEMPLATE/5_others.yml
--- a/services/paddle_services/paddle_detection/.gitignore
+++ b/services/paddle_services/paddle_detection/.gitignore
--- a/services/paddle_services/paddle_detection/.pre-commit-config.yaml
+++ b/services/paddle_services/paddle_detection/.pre-commit-config.yaml
--- a/services/paddle_services/paddle_detection/.style.yapf
+++ b/services/paddle_services/paddle_detection/.style.yapf
--- a/services/paddle_services/paddle_detection/.travis.yml
+++ b/services/paddle_services/paddle_detection/.travis.yml
--- a/services/paddle_services/paddle_detection/.travis/codestyle/clang_format.hook
+++ b/services/paddle_services/paddle_detection/.travis/codestyle/clang_format.hook
--- a/services/paddle_services/paddle_detection/.travis/codestyle/cpplint_pre_commit.hook
+++ b/services/paddle_services/paddle_detection/.travis/codestyle/cpplint_pre_commit.hook
--- a/services/paddle_services/paddle_detection/.travis/precommit.sh
+++ b/services/paddle_services/paddle_detection/.travis/precommit.sh
--- a/services/paddle_services/paddle_detection/.travis/requirements.txt
+++ b/services/paddle_services/paddle_detection/.travis/requirements.txt
--- a/services/paddle_services/paddle_detection/.travis/unittest.sh
+++ b/services/paddle_services/paddle_detection/.travis/unittest.sh
--- a/services/paddle_services/paddle_detection/LICENSE
+++ b/services/paddle_services/paddle_detection/LICENSE
--- a/services/paddle_services/paddle_detection/README.md
+++ b/services/paddle_services/paddle_detection/README.md
@@ -0,0 +1 @@
 README_cn.md
--- a/services/paddle_services/paddle_detection/README_cn.md
+++ b/services/paddle_services/paddle_detection/README_cn.md
--- a/services/paddle_services/paddle_detection/README_en.md
+++ b/services/paddle_services/paddle_detection/README_en.md
--- a/services/paddle_services/paddle_detection/init.py
+++ b/services/paddle_services/paddle_detection/init.py
@@ -0,0 +1,7 @@
 import os
 from onnxruntime import InferenceSession
 MODEL_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'model', 'object_det_model')
 PADDLE_DET = InferenceSession(os.path.join(MODEL_DIR, 'ppyoloe_plus_crn_l_80e_coco_w_nms.onnx'),
                              providers=['CUDAExecutionProvider'], provider_options=[{'device_id': 0}])
--- a/services/paddle_services/paddle_detection/activity/直播答疑第一期.md
+++ b/services/paddle_services/paddle_detection/activity/直播答疑第一期.md
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
liuyebo	3044f1fc08	修正heic图片保存问题	2024-10-21 09:32:39 +08:00
liuyebo	da18d890f7	删除多余导入	2024-10-21 09:02:30 +08:00
liuyebo	ae52d0594e	清除临时图片另外写脚本执行	2024-10-21 08:45:39 +08:00
liuyebo	d51e56b8f2	东台医院接口暂未处理好	2024-10-21 08:37:25 +08:00
liuyebo	83339b5e58	延长接口服务等待时间	2024-10-18 16:28:52 +08:00
liuyebo	727743d20e	新增二维码无效地址，优化页码判空	2024-10-18 16:21:39 +08:00
liuyebo	814730a0f0	二维码识别新增南通市中医院	2024-10-18 16:14:00 +08:00
liuyebo	c9894d257e	减少进程限制	2024-10-18 14:45:08 +08:00
liuyebo	68043e5773	提高进程限制	2024-10-18 14:30:08 +08:00
liuyebo	fe58bb3bfa	信息抽取结果判空	2024-10-18 14:08:13 +08:00
liuyebo	ce44a81603	调整显卡分配	2024-10-18 14:05:53 +08:00
liuyebo	4b90bf6dfa	测试全部启用新版效果	2024-10-18 13:55:47 +08:00
liuyebo	248e49bf4b	三项资料分类效果不佳，暂时停用。按数据库存储来分类	2024-10-18 13:39:01 +08:00
liuyebo	401954dca0	优化三项资料分类方法	2024-10-18 13:04:42 +08:00
liuyebo	6529dc3d98	提高文本分类精度	2024-10-18 11:02:58 +08:00
liuyebo	3f93bd476a	修正页码排序	2024-10-18 10:49:01 +08:00
liuyebo	d85b3fff8f	优化页码解析	2024-10-18 10:24:52 +08:00
liuyebo	61a7802674	调整文本分类的判空	2024-10-18 10:19:33 +08:00
liuyebo	9556da47e9	过滤页码异常的值	2024-10-18 10:05:38 +08:00
liuyebo	3710450221	文本分类增加判空	2024-10-18 09:42:51 +08:00
liuyebo	27a4395ca0	调整出院记录缺页判断关键词	2024-10-18 09:37:43 +08:00
liuyebo	f116798c30	缺页提示排序	2024-10-18 09:06:49 +08:00
liuyebo	8c47beb00c	调整文本分类的精度，fp16也许只能接收1024个字符	2024-10-18 08:56:12 +08:00
liuyebo	74920869e7	修正页码操作时的数据类型	2024-10-17 17:29:39 +08:00
liuyebo	9d0db073d6	延长等待接口服务的时间	2024-10-17 17:12:19 +08:00
liuyebo	8e7745f1f6	使用gpu运行文档检测模型	2024-10-17 17:08:29 +08:00
liuyebo	cc53243647	修正ocr没有识别到内容导致的空指针错误	2024-10-17 16:59:10 +08:00
liuyebo	39da0d8a00	自动审核、自动涂抹测试	2024-10-17 16:43:51 +08:00
liuyebo	a2e1f10261	费用清单不好准确定位，暂不替换	2024-10-17 15:38:14 +08:00
liuyebo	e1bd9f3786	更新zx_ie_cost	2024-10-17 15:19:28 +08:00
liuyebo	46f295d422	修正图片分割为空的问题	2024-10-17 15:18:06 +08:00
liuyebo	1a0caf30d0	限制文本分类的文本长度	2024-10-17 15:13:41 +08:00
liuyebo	25df420be8	优化数据类型	2024-10-17 15:03:29 +08:00
liuyebo	b5dffaf5bd	修正页码分析	2024-10-17 14:55:19 +08:00
liuyebo	0e4cfd10b6	自动审核、自动涂抹联动测试	2024-10-17 14:48:58 +08:00
liuyebo	f98969d957	修正布尔类型的运算	2024-10-17 14:34:31 +08:00
liuyebo	0c9bed8661	修正出院记录的合并处理	2024-10-17 14:29:12 +08:00
liuyebo	d0b4a77817	单独测试自动审核	2024-10-17 14:22:44 +08:00
liuyebo	00e5ca7c30	单独测试自动审核	2024-10-17 14:17:47 +08:00
liuyebo	5dee4ed568	添加构建镜像配置	2024-10-17 14:04:43 +08:00
liuyebo	06869e691f	单独测试自动涂抹	2024-10-17 13:41:52 +08:00
liuyebo	8e06fdafa0	清除因程序错误遗留的临时图片	2024-10-17 13:38:29 +08:00
liuyebo	84d106c7de	优化自动涂抹的图片删除逻辑	2024-10-17 13:12:07 +08:00
liuyebo	9c41fab95c	优化含pdf时出院记录的处理	2024-10-17 12:58:23 +08:00
liuyebo	0060c4ad59	修正细节错误	2024-10-17 10:38:29 +08:00
liuyebo	d374e0743a	优化图片分类和图片方向判断	2024-10-16 17:01:56 +08:00
liuyebo	947b4f20f3	优化方向选择的条件	2024-10-15 15:04:24 +08:00
liuyebo	445d57e8c6	优化方向选择的条件	2024-10-15 14:21:23 +08:00
liuyebo	b09f16fe23	调整图片删除位置	2024-10-15 13:45:11 +08:00
liuyebo	c28fc62d3f	修正全局变量的获取	2024-10-15 13:27:55 +08:00
liuyebo	b332aa00dd	优化金额处理	2024-10-15 11:29:06 +08:00
liuyebo	5af6256376	优化图片的存储，及时删除处理过程中产生的图片	2024-10-15 10:17:01 +08:00
liuyebo	15ea3ff96f	更新zx_ie_result	2024-10-14 13:03:14 +08:00
liuyebo	19237d3a3c	修正页码解析	2024-10-12 15:46:50 +08:00
liuyebo	0b0882d456	添加接口日志	2024-10-12 15:26:13 +08:00
liuyebo	304f6897f0	Revert "使用更小的基础镜像" This reverts commit `a9f172fdb0`.	2024-10-12 13:52:38 +08:00
liuyebo	a9f172fdb0	使用更小的基础镜像	2024-10-12 13:50:31 +08:00
liuyebo	ac4e4ff8f8	修正服务依赖	2024-10-12 13:44:45 +08:00
liuyebo	f7fbe709bf	打开自动涂抹	2024-10-12 13:37:46 +08:00
liuyebo	396550058f	添加beautifulsoup4依赖	2024-10-12 13:31:26 +08:00
liuyebo	b9ac638b38	删除无效方法	2024-10-12 13:29:14 +08:00
liuyebo	894cab4f0b	添加rapidfuzz依赖	2024-10-12 13:17:11 +08:00
liuyebo	bb6d9c3b47	添加pymupdf依赖	2024-10-12 13:13:06 +08:00
liuyebo	f8280e87ee	修正镜像源替换	2024-10-12 12:56:56 +08:00
liuyebo	608a647621	优化镜像分层	2024-10-12 11:32:09 +08:00
liuyebo	7b9d9ca589	apt配置国内镜像源	2024-10-12 11:28:31 +08:00
liuyebo	d9b24e906d	修复libGL.so.1库缺失	2024-10-12 11:16:32 +08:00
liuyebo	97c7b2cfce	添加缺失依赖	2024-10-12 11:13:00 +08:00
liuyebo	004dd12004	更换基础镜像	2024-10-12 11:09:18 +08:00
liuyebo	cc9d020008	基础镜像使用的是debian系统，修正下载命令行	2024-10-12 10:48:06 +08:00
liuyebo	7335553080	去除提权操作	2024-10-12 10:44:16 +08:00
liuyebo	ebb10b2816	修复libGL.so.1库缺失	2024-10-12 10:42:40 +08:00
liuyebo	98fb9fa861	添加缺失依赖	2024-10-12 10:36:47 +08:00
liuyebo	c75415164e	添加dockerignore配置	2024-10-12 10:27:01 +08:00
liuyebo	03d8652b8f	添加缺失依赖	2024-10-12 10:23:51 +08:00
liuyebo	e3be5cf4b2	修正服务依赖	2024-10-12 10:18:08 +08:00
liuyebo	c92b549480	打开自动识别	2024-10-12 10:14:31 +08:00
liuyebo	d36740d729	修正装饰器错误	2024-10-12 10:03:39 +08:00
liuyebo	a1dea6f29c	统一照片脱敏的图片流转方式	2024-10-11 15:17:26 +08:00
liuyebo	0fc0c80d6f	修正出院记录缺页判断条件	2024-10-11 10:26:54 +08:00
liuyebo	f3930cc7bd	优化自动审核判断	2024-10-11 10:03:20 +08:00
liuyebo	a11cefb999	修正部分英文拼写；修正图片传递；修正页码解析	2024-10-10 15:36:46 +08:00
liuyebo	5c0fc0f819	修正list值的获取	2024-10-10 14:45:39 +08:00
liuyebo	77010f0598	调整出院记录缺页关键词	2024-10-10 14:08:39 +08:00
liuyebo	e4b58e30c0	补充缺页判断	2024-10-10 11:24:16 +08:00
liuyebo	15fe5d4f0d	添加信息抽取存表，根据抽取结果进行缺项判断	2024-10-10 09:24:09 +08:00
liuyebo	fc69aa5b9d	修正已知错误	2024-10-09 14:50:02 +08:00
liuyebo	795134f566	优化案子处理逻辑	2024-10-09 09:39:29 +08:00
liuyebo	a3fa1e502e	调整图片名	2024-09-29 13:55:37 +08:00
liuyebo	7a4cb5263a	添加文本信息抽取接口	2024-09-27 15:31:11 +08:00
liuyebo	46be9a26be	初步添加自动审核功能	2024-09-27 14:53:16 +08:00
liuyebo	f1149854ce	统一模型接口，新增文本分类接口	2024-09-27 13:50:55 +08:00
liuyebo	117b29a737	修正包的导入	2024-09-27 08:52:47 +08:00
liuyebo	3219f28934	将services视为包	2024-09-26 17:22:51 +08:00
liuyebo	2e1c0a57c7	修正方法调用	2024-09-26 17:20:38 +08:00
liuyebo	2dcd2d2a34	添加目录说明	2024-09-26 17:13:37 +08:00
liuyebo	153eb70f84	测试接口服务能否成功启动	2024-09-26 17:05:07 +08:00
liuyebo	b5aba0418b	修正模型地址	2024-09-26 16:56:48 +08:00
liuyebo	603b027ca6	修正新镜像构建文件	2024-09-26 16:47:59 +08:00
liuyebo	d4c54b04f5	构建新镜像	2024-09-26 16:43:34 +08:00
liuyebo	fc3e7b4ed4	接口添加日志	2024-09-26 16:41:30 +08:00
liuyebo	a62c2af816	修正图片矫正接口ip	2024-09-26 16:23:48 +08:00
liuyebo	0618754da2	测试扭曲矫正模型	2024-09-26 16:04:50 +08:00
liuyebo	c5a03ad16f	统一引号格式，优化架构排布	2024-09-26 15:16:57 +08:00
liuyebo	ff9d612e67	调整容器挂载文件	2024-09-26 13:44:05 +08:00
liuyebo	86d28096d4	修改基础镜像	2024-09-26 11:25:05 +08:00
liuyebo	87180cd282	升级opencc	2024-09-26 10:55:18 +08:00
liuyebo	f13ffd1fe9	调整基础镜像	2024-09-26 10:51:06 +08:00
liuyebo	09f62b36a9	修正command写法	2024-09-26 09:43:05 +08:00
liuyebo	186cab0317	调试docker-compose启动	2024-09-25 16:29:56 +08:00
liuyebo	101b2126f4	测试期间删除固定的gunicorn启动	2024-09-25 16:24:56 +08:00
liuyebo	d4a695e9ea	添加gunicorn依赖	2024-09-25 16:14:00 +08:00
liuyebo	72794f699e	修正docker镜像构建	2024-09-25 16:07:12 +08:00
liuyebo	3189caf7aa	添加测试功能	2024-09-25 15:19:45 +08:00
liuyebo	b8c1202957	项目架构调整，模型全部采用接口调用	2024-09-25 14:46:37 +08:00
liuyebo	7647df7d74	移动doc_dewarp	2024-09-24 17:10:56 +08:00
liuyebo	3438cf6e0e	移动paddle_detection	2024-09-24 17:02:56 +08:00
liuyebo	90a6d5ec75	矫正扭曲和图片方向分类接口化	2024-09-24 08:36:34 +08:00
liuyebo	9c21152823	修正url及图片保存	2024-09-23 15:32:16 +08:00
liuyebo	c091a82a91	优化接口图片传输方式	2024-09-23 14:45:03 +08:00
liuyebo	a2a82df21c	优化flask接口	2024-09-20 14:47:43 +08:00
liuyebo	f0c03e763b	优化命名，类与模块最好不要重名	2024-09-20 14:32:31 +08:00
liuyebo	7b6e78373c	删除重复check_ie_result	2024-09-20 12:40:08 +08:00
liuyebo	65b7126348	删除无效配置	2024-09-20 11:32:37 +08:00
		`@@ -0,0 +1 @@`
							`住院费用清单信息抽取微调模型存放目录`
		`@@ -0,0 +1 @@`
							`出院记录信息抽取微调模型存放目录`
		`@@ -0,0 +1 @@`
							`基本医保结算单信息抽取微调模型存放目录`