优化接口图片传输方式

2024-09-23 14:45:03 +08:00
parent a2a82df21c
commit c091a82a91
8 changed files with 89 additions and 62 deletions
--- a/det_api.py
+++ b/det_api.py
@@ -1,8 +1,7 @@
-import base64
+import os.path

 import cv2
-import numpy as np
-from flask import Flask, request, jsonify, Blueprint
+from flask import Flask, request, Blueprint

 from paddle_detection import detector
 from util.common_util import process_request
@@ -12,25 +11,23 @@ det_bp = Blueprint('det_bp', __name__)
 app.register_blueprint(det_bp, url_prefix='/det')


-@det_bp.route("/books", methods=['POST'])
+@det_bp.route('/books', methods=['POST'])
@process_request
 def books():
-    try:
-        file = request.files['image']
-        image_data = file.read()
-        nparr = np.frombuffer(image_data, np.uint8)
-        image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+    img_path = request.form['img_path']
+    image = cv2.imread(img_path)
    result = detector.get_book_areas(image)
-        encoded_images = []
-        for i in result:
-            _, encoded_image = cv2.imencode('.jpg', i)
-            byte_stream = encoded_image.tobytes()
-            img_str = base64.b64encode(byte_stream).decode('utf-8')
-            encoded_images.append(img_str)
-        return jsonify(encoded_images), 200
-    except Exception as e:
-        return jsonify({'error': str(e)}), 500
+
+    dirname = os.path.dirname(img_path)
+    img_name, ext = os.path.basename(img_path).rsplit('.', 1)
+    books_path = []
+    for i in range(len(result)):
+        save_path = os.path.join(dirname, img_name + '_book_' + str(i) + '.' + ext)
+        with open(save_path, 'wb') as file:
+            file.write(result[i])
+        books_path.append(save_path)
+    return books_path


 if __name__ == '__main__':
-    app.run("0.0.0.0", 5000)
+    app.run('0.0.0.0', 5000)
--- a/log/init.py
+++ b/log/init.py
@@ -1,6 +1,8 @@
 import os
 import socket

+# 项目根目录
+PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 # 获取主机名，方便区分容器
 HOSTNAME = socket.gethostname()
 # 检测日志文件的路径是否存在，不存在则创建
--- a/paddle_detection/detector.py
+++ b/paddle_detection/detector.py
@@ -1,12 +1,8 @@
-import base64
-import logging
 import tempfile
 from collections import defaultdict

 import cv2
 import numpy as np
-import requests
-from tenacity import retry, stop_after_attempt, wait_random

 from paddle_detection import PADDLE_DET
 from paddle_detection.deploy.third_engine.onnx.infer import PredictConfig
@@ -51,26 +47,3 @@ def get_book_areas(image):
    for book_area in book_areas:
        result.append(image_util.capture(image, book_area["box"]))
    return result
-
-
-@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
-       after=lambda x: logging.warning("获取文档区域失败！"))
-def request_book_areas(image):
-    url = "http://det_api:5000/det/books"
-    _, encoded_image = cv2.imencode('.jpg', image)
-    byte_stream = encoded_image.tobytes()
-    files = {"image": ("image.jpg", byte_stream)}
-    response = requests.post(url, files=files)
-    if response.status_code == 200:
-        img_str_list = response.json()
-        result = []
-        for img_str in img_str_list:
-            img_data = base64.b64decode(img_str)
-            np_array = np.frombuffer(img_data, np.uint8)
-            img = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
-            height, width = img.shape[:2]
-            if max(height, width) / min(height, width) <= 6.5:
-                result.append(img)  # 过滤异常结果
-        return result
-    else:
-        return []
--- a/photo_review.py
+++ b/photo_review.py
@@ -11,27 +11,26 @@ from db.mysql import ZxPhhd
 from log import LOGGING_CONFIG
 from photo_review import auto_photo_review, SEND_ERROR_EMAIL

-# 项目必须从此处启动，否则代码中的相对路径可能导致错误的发生
 if __name__ == '__main__':
    program_name = '照片审核自动识别脚本'
    logging.config.dictConfig(LOGGING_CONFIG)

    parser = argparse.ArgumentParser()
-    parser.add_argument("--clean", default=False, type=bool, help="是否将识别中的案子改为待识别状态")
+    parser.add_argument('--clean', default=False, type=bool, help='是否将识别中的案子改为待识别状态')
    args = parser.parse_args()
    if args.clean:
        # 主要用于启动时，清除仍在涂抹中的案子
        session = MysqlSession()
-        update_flag = (update(ZxPhhd).where(ZxPhhd.exsuccess_flag == "2").values(exsuccess_flag="1"))
+        update_flag = (update(ZxPhhd).where(ZxPhhd.exsuccess_flag == '2').values(exsuccess_flag='1'))
        session.execute(update_flag)
        session.commit()
        session.close()
-        logging.info("已释放残余的识别案子！")
+        logging.info('已释放残余的识别案子！')
    else:
        sleep(5)

    try:
-        logging.info(f"【{program_name}】开始运行")
+        logging.info(f'【{program_name}】开始运行')
        auto_photo_review.main()
    except Exception as e:
        error_logger = logging.getLogger('error')
--- a/photo_review/auto_photo_review.py
+++ b/photo_review/auto_photo_review.py
@@ -23,7 +23,7 @@ from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_E
    ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \
    UPPERCASE_MEDICAL_EXPENSES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, DEPARTMENT_FILTER
 from ucloud import ufile
-from util import image_util, common_util, html_util
+from util import image_util, common_util, html_util, model_util
 from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \
    handle_insurance_type, handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, \
    parse_hospital
@@ -148,12 +148,12 @@ def get_better_image_from_qrcode(image, image_id, dpi=150):
 def information_extraction(ie, phrecs, identity):
    result = {}
    for phrec in phrecs:
-        img_path = ufile.get_private_url(phrec.cfjaddress)
-        if not img_path:
+        img_url = ufile.get_private_url(phrec.cfjaddress)
+        if not img_url:
            continue

-        image = image_util.read(img_path)
-
+        img_path = image_util.save_to_local(img_url)
+        image = cv2.imread(img_path)
        # 尝试从二维码中获取高清图片
        better_image, text = get_better_image_from_qrcode(image, phrec.cfjaddress)
        if phrec.cRectype != '1':
@@ -166,12 +166,11 @@ def information_extraction(ie, phrecs, identity):
                info_extract = ie(text)[0]
            else:
                info_extract = ie_temp_image(ie, OCR, image)
-            ie_result = {'result': info_extract, 'angle': '0'}
-
-            now = common_util.get_default_datetime()
-            if not ie_result['result']:
+            if not info_extract:
                continue

+            ie_result = {'result': info_extract, 'angle': img_angle}
+            now = common_util.get_default_datetime()
            result_json = json.dumps(ie_result['result'], ensure_ascii=False)
            if len(result_json) > 5000:
                result_json = result_json[:5000]
@@ -184,7 +183,7 @@ def information_extraction(ie, phrecs, identity):
            result = merge_result(result, ie_result['result'])
        else:
            target_images = []
-            # target_images += detector.request_book_areas(image)  # 识别文档区域并裁剪
+            target_images += model_util.request_book_areas(img_path)  # 识别文档区域并裁剪
            if not target_images:
                target_images.append(image)  # 识别失败
            angle_count = defaultdict(int, {'0': 0})  # 分割后图片的最优角度统计
--- a/tmp_img/README.md
+++ b/tmp_img/README.md
@@ -0,0 +1 @@
+本文件夹用于保存临时图片，方便各个服务间调用。
--- a/util/image_util.py
+++ b/util/image_util.py
@@ -1,12 +1,16 @@
 import logging
 import math
+import os
 import urllib.request

 import cv2
 import numpy
+import requests
 from paddleclas import PaddleClas
 from tenacity import retry, stop_after_attempt, wait_random

+from log import PROJECT_ROOT
+

@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
       after=lambda x: logging.warning("获取图片失败！"))
@@ -247,3 +251,35 @@ def combined(img1, img2):
    combined_img[:height1, :width1] = img1
    combined_img[:height2, width1:width1 + width2] = img2
    return combined_img
+
+
+def parse_img_url(url):
+    """
+    解析图片url
+    :param url: 图片url
+    :return: 图片名称和图片后缀
+    """
+    url = url.split('?')[0]
+    return os.path.basename(url).rsplit('.', 1)
+
+
+@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
+       after=lambda x: logging.warning('保存图片失败！'))
+def save_to_local(img_url, save_path=None):
+    """
+    保存图片到本地
+    :param img_url: 图片url
+    :param save_path: 本地保存地址，精确到文件名
+    :return: 本地保存地址
+    """
+    response = requests.get(img_url)
+    response.raise_for_status()  # 检查响应状态码是否正常
+
+    if save_path is None:
+        img_name, img_ext = parse_img_url(img_url)
+        save_path = os.path.join(PROJECT_ROOT, 'tmp_img', img_name + '.' + img_ext)
+
+    with open(save_path, 'wb') as file:
+        file.write(response.content)
+
+    return save_path
--- a/util/model_util.py
+++ b/util/model_util.py
@@ -0,0 +1,20 @@
+import logging
+
+import cv2
+import requests
+from tenacity import retry, stop_after_attempt, wait_random
+
+
+@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
+       after=lambda x: logging.warning('获取文档区域失败！'))
+def request_book_areas(img_path):
+    url = 'http://det_api:5000/det/books'
+    response = requests.post(url, {'img_path': img_path})
+    if response.status_code == 200:
+        response_data = response.json()
+        books = []
+        for books_path in response_data:
+            books.append(cv2.imread(books_path))
+        return books
+    else:
+        return []
				`@@ -0,0 +1 @@`
				`本文件夹用于保存临时图片，方便各个服务间调用。`