优化文档检测为接口

2024-08-28 17:14:43 +08:00
parent 06b637544a
commit a427ad01b9
4 changed files with 77 additions and 3 deletions
--- a/det_api.py
+++ b/det_api.py
@@ -0,0 +1,32 @@
 import base64
 import cv2
 import numpy as np
 from flask import Flask, request, jsonify
 from paddle_detection import detector
 app = Flask(__name__)
@app.route("/det/detect_books", methods=['POST'])
 def detect_books():
    try:
        file = request.files['image']
        image_data = file.read()
        nparr = np.frombuffer(image_data, np.uint8)
        image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
        result = detector.get_book_areas(image)
        encoded_images = []
        for i in result:
            _, encoded_image = cv2.imencode('.jpg', i)
            byte_stream = encoded_image.tobytes()
            img_str = base64.b64encode(byte_stream).decode('utf-8')
            encoded_images.append(img_str)
        return jsonify(encoded_images), 200
    except Exception as e:
        return jsonify({'error': str(e)}), 500
 if __name__ == '__main__':
    app.run()
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,13 +1,29 @@
 x-env:
  &template
-  image: fcb_photo_review:1.13.7
+  image: fcb_photo_review:1.13.8
  restart: always
 services:
-  photo_review_1:
+  det_api:
    <<: *template
    build:
      context: .
    container_name: det_api
    hostname: det_api
    volumes:
      - ./log:/app/log
      - ./model:/app/model
    command: [ "det_api.py" ]
    deploy:
      resources:
        reservations:
          devices:
            - device_ids: [ "0" ]
              capabilities: [ "gpu" ]
              driver: "nvidia"
  photo_review_1:
    <<: *template
    container_name: photo_review_1
    hostname: photo_review_1
    volumes:
--- a/paddle_detection/detector.py
+++ b/paddle_detection/detector.py
@@ -1,8 +1,12 @@
 import base64
 import logging
 import tempfile
 from collections import defaultdict
 import cv2
 import numpy as np
 import requests
 from tenacity import retry, stop_after_attempt, wait_random
 from paddle_detection import PADDLE_DET
 from paddle_detection.deploy.third_engine.onnx.infer import PredictConfig
@@ -47,3 +51,23 @@ def get_book_areas(image):
    for book_area in book_areas:
        result.append(image_util.capture(image, book_area["box"]))
    return result
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
       after=lambda x: logging.warning("获取文档区域失败！"))
 def request_book_areas(image):
    url = "http://det_api:5000/det/detect_books"
    _, encoded_image = cv2.imencode('.jpg', image)
    byte_stream = encoded_image.tobytes()
    files = {"image": ("image.jpg", byte_stream)}
    response = requests.post(url, files=files)
    if response.status_code == 200:
        img_str_list = response.json()
        result = []
        for img_str in img_str_list:
            img_data = base64.b64decode(img_str)
            np_array = np.frombuffer(img_data, np.uint8)
            result.append(cv2.imdecode(np_array, cv2.IMREAD_COLOR))
        return result
    else:
        return []
--- a/photo_review/auto_photo_review.py
+++ b/photo_review/auto_photo_review.py
@@ -82,7 +82,9 @@ def information_extraction(ie, phrecs, identity):
        image = image_util.read(img_path)
        target_images = []
-        target_images += detector.get_book_areas(image)  # 识别文档区域并裁剪
+        det_time = time.time()
        target_images += detector.request_book_areas(image)  # 识别文档区域并裁剪
        logging.info(f"检测目标耗时{time.time() - det_time}秒")
        if not target_images:
            target_images.append(image)  # 识别失败
        angle_count = defaultdict(int, {"0": 0})  # 分割后图片的最优角度统计