优化接口图片传输方式

2024-09-23 14:45:03 +08:00
parent a2a82df21c
commit c091a82a91
8 changed files with 89 additions and 62 deletions
--- a/det_api.py
+++ b/det_api.py
@@ -1,8 +1,7 @@
-import base64
+import os.path
 import cv2
-import numpy as np
+from flask import Flask, request, Blueprint
 from flask import Flask, request, jsonify, Blueprint
 from paddle_detection import detector
 from util.common_util import process_request
@@ -12,25 +11,23 @@ det_bp = Blueprint('det_bp', __name__)
 app.register_blueprint(det_bp, url_prefix='/det')
-@det_bp.route("/books", methods=['POST'])
+@det_bp.route('/books', methods=['POST'])
@process_request
 def books():
-    try:
+    img_path = request.form['img_path']
-        file = request.files['image']
+    image = cv2.imread(img_path)
-        image_data = file.read()
+    result = detector.get_book_areas(image)
-        nparr = np.frombuffer(image_data, np.uint8)
+
-        image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+    dirname = os.path.dirname(img_path)
-        result = detector.get_book_areas(image)
+    img_name, ext = os.path.basename(img_path).rsplit('.', 1)
-        encoded_images = []
+    books_path = []
-        for i in result:
+    for i in range(len(result)):
-            _, encoded_image = cv2.imencode('.jpg', i)
+        save_path = os.path.join(dirname, img_name + '_book_' + str(i) + '.' + ext)
-            byte_stream = encoded_image.tobytes()
+        with open(save_path, 'wb') as file:
-            img_str = base64.b64encode(byte_stream).decode('utf-8')
+            file.write(result[i])
-            encoded_images.append(img_str)
+        books_path.append(save_path)
-        return jsonify(encoded_images), 200
+    return books_path
    except Exception as e:
        return jsonify({'error': str(e)}), 500
 if __name__ == '__main__':
-    app.run("0.0.0.0", 5000)
+    app.run('0.0.0.0', 5000)
--- a/log/init.py
+++ b/log/init.py
@@ -1,6 +1,8 @@
 import os
 import socket
 # 项目根目录
 PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 # 获取主机名，方便区分容器
 HOSTNAME = socket.gethostname()
 # 检测日志文件的路径是否存在，不存在则创建
--- a/paddle_detection/detector.py
+++ b/paddle_detection/detector.py
@@ -1,12 +1,8 @@
 import base64
 import logging
 import tempfile
 from collections import defaultdict
 import cv2
 import numpy as np
 import requests
 from tenacity import retry, stop_after_attempt, wait_random
 from paddle_detection import PADDLE_DET
 from paddle_detection.deploy.third_engine.onnx.infer import PredictConfig
@@ -51,26 +47,3 @@ def get_book_areas(image):
    for book_area in book_areas:
        result.append(image_util.capture(image, book_area["box"]))
    return result
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
       after=lambda x: logging.warning("获取文档区域失败！"))
 def request_book_areas(image):
    url = "http://det_api:5000/det/books"
    _, encoded_image = cv2.imencode('.jpg', image)
    byte_stream = encoded_image.tobytes()
    files = {"image": ("image.jpg", byte_stream)}
    response = requests.post(url, files=files)
    if response.status_code == 200:
        img_str_list = response.json()
        result = []
        for img_str in img_str_list:
            img_data = base64.b64decode(img_str)
            np_array = np.frombuffer(img_data, np.uint8)
            img = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
            height, width = img.shape[:2]
            if max(height, width) / min(height, width) <= 6.5:
                result.append(img)  # 过滤异常结果
        return result
    else:
        return []
--- a/photo_review.py
+++ b/photo_review.py
@@ -11,27 +11,26 @@ from db.mysql import ZxPhhd
 from log import LOGGING_CONFIG
 from photo_review import auto_photo_review, SEND_ERROR_EMAIL
 # 项目必须从此处启动，否则代码中的相对路径可能导致错误的发生
 if __name__ == '__main__':
    program_name = '照片审核自动识别脚本'
    logging.config.dictConfig(LOGGING_CONFIG)
    parser = argparse.ArgumentParser()
-    parser.add_argument("--clean", default=False, type=bool, help="是否将识别中的案子改为待识别状态")
+    parser.add_argument('--clean', default=False, type=bool, help='是否将识别中的案子改为待识别状态')
    args = parser.parse_args()
    if args.clean:
        # 主要用于启动时，清除仍在涂抹中的案子
        session = MysqlSession()
-        update_flag = (update(ZxPhhd).where(ZxPhhd.exsuccess_flag == "2").values(exsuccess_flag="1"))
+        update_flag = (update(ZxPhhd).where(ZxPhhd.exsuccess_flag == '2').values(exsuccess_flag='1'))
        session.execute(update_flag)
        session.commit()
        session.close()
-        logging.info("已释放残余的识别案子！")
+        logging.info('已释放残余的识别案子！')
    else:
        sleep(5)
    try:
-        logging.info(f"【{program_name}】开始运行")
+        logging.info(f'【{program_name}】开始运行')
        auto_photo_review.main()
    except Exception as e:
        error_logger = logging.getLogger('error')
--- a/photo_review/auto_photo_review.py
+++ b/photo_review/auto_photo_review.py
@@ -23,7 +23,7 @@ from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_E
    ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \
    UPPERCASE_MEDICAL_EXPENSES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, DEPARTMENT_FILTER
 from ucloud import ufile
-from util import image_util, common_util, html_util
+from util import image_util, common_util, html_util, model_util
 from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \
    handle_insurance_type, handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, \
    parse_hospital
@@ -148,12 +148,12 @@ def get_better_image_from_qrcode(image, image_id, dpi=150):
 def information_extraction(ie, phrecs, identity):
    result = {}
    for phrec in phrecs:
-        img_path = ufile.get_private_url(phrec.cfjaddress)
+        img_url = ufile.get_private_url(phrec.cfjaddress)
-        if not img_path:
+        if not img_url:
            continue
-        image = image_util.read(img_path)
+        img_path = image_util.save_to_local(img_url)
-
+        image = cv2.imread(img_path)
        # 尝试从二维码中获取高清图片
        better_image, text = get_better_image_from_qrcode(image, phrec.cfjaddress)
        if phrec.cRectype != '1':
@@ -166,12 +166,11 @@ def information_extraction(ie, phrecs, identity):
                info_extract = ie(text)[0]
            else:
                info_extract = ie_temp_image(ie, OCR, image)
-            ie_result = {'result': info_extract, 'angle': '0'}
+            if not info_extract:
            now = common_util.get_default_datetime()
            if not ie_result['result']:
                continue
            ie_result = {'result': info_extract, 'angle': img_angle}
            now = common_util.get_default_datetime()
            result_json = json.dumps(ie_result['result'], ensure_ascii=False)
            if len(result_json) > 5000:
                result_json = result_json[:5000]
@@ -184,7 +183,7 @@ def information_extraction(ie, phrecs, identity):
            result = merge_result(result, ie_result['result'])
        else:
            target_images = []
-            # target_images += detector.request_book_areas(image)  # 识别文档区域并裁剪
+            target_images += model_util.request_book_areas(img_path)  # 识别文档区域并裁剪
            if not target_images:
                target_images.append(image)  # 识别失败
            angle_count = defaultdict(int, {'0': 0})  # 分割后图片的最优角度统计
--- a/tmp_img/README.md
+++ b/tmp_img/README.md
@@ -0,0 +1 @@
 本文件夹用于保存临时图片，方便各个服务间调用。
--- a/util/image_util.py
+++ b/util/image_util.py
@@ -1,12 +1,16 @@
 import logging
 import math
 import os
 import urllib.request
 import cv2
 import numpy
 import requests
 from paddleclas import PaddleClas
 from tenacity import retry, stop_after_attempt, wait_random
 from log import PROJECT_ROOT
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
       after=lambda x: logging.warning("获取图片失败！"))
@@ -247,3 +251,35 @@ def combined(img1, img2):
    combined_img[:height1, :width1] = img1
    combined_img[:height2, width1:width1 + width2] = img2
    return combined_img
 def parse_img_url(url):
    """
    解析图片url
    :param url: 图片url
    :return: 图片名称和图片后缀
    """
    url = url.split('?')[0]
    return os.path.basename(url).rsplit('.', 1)
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
       after=lambda x: logging.warning('保存图片失败！'))
 def save_to_local(img_url, save_path=None):
    """
    保存图片到本地
    :param img_url: 图片url
    :param save_path: 本地保存地址，精确到文件名
    :return: 本地保存地址
    """
    response = requests.get(img_url)
    response.raise_for_status()  # 检查响应状态码是否正常
    if save_path is None:
        img_name, img_ext = parse_img_url(img_url)
        save_path = os.path.join(PROJECT_ROOT, 'tmp_img', img_name + '.' + img_ext)
    with open(save_path, 'wb') as file:
        file.write(response.content)
    return save_path
--- a/util/model_util.py
+++ b/util/model_util.py
@@ -0,0 +1,20 @@
 import logging
 import cv2
 import requests
 from tenacity import retry, stop_after_attempt, wait_random
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
       after=lambda x: logging.warning('获取文档区域失败！'))
 def request_book_areas(img_path):
    url = 'http://det_api:5000/det/books'
    response = requests.post(url, {'img_path': img_path})
    if response.status_code == 200:
        response_data = response.json()
        books = []
        for books_path in response_data:
            books.append(cv2.imread(books_path))
        return books
    else:
        return []
		`@@ -0,0 +1 @@`
							`本文件夹用于保存临时图片，方便各个服务间调用。`