优化文档检测为接口

2024-08-28 17:14:43 +08:00
parent 06b637544a
commit a427ad01b9
4 changed files with 77 additions and 3 deletions
--- a/det_api.py
+++ b/det_api.py
@@ -0,0 +1,32 @@
+import base64
+
+import cv2
+import numpy as np
+from flask import Flask, request, jsonify
+
+from paddle_detection import detector
+
+app = Flask(__name__)
+
+
+@app.route("/det/detect_books", methods=['POST'])
+def detect_books():
+    try:
+        file = request.files['image']
+        image_data = file.read()
+        nparr = np.frombuffer(image_data, np.uint8)
+        image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+        result = detector.get_book_areas(image)
+        encoded_images = []
+        for i in result:
+            _, encoded_image = cv2.imencode('.jpg', i)
+            byte_stream = encoded_image.tobytes()
+            img_str = base64.b64encode(byte_stream).decode('utf-8')
+            encoded_images.append(img_str)
+        return jsonify(encoded_images), 200
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+
+
+if __name__ == '__main__':
+    app.run()
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,13 +1,29 @@
 x-env:
  &template
-  image: fcb_photo_review:1.13.7
+  image: fcb_photo_review:1.13.8
  restart: always

 services:
-  photo_review_1:
+  det_api:
    <<: *template
    build:
      context: .
+    container_name: det_api
+    hostname: det_api
+    volumes:
+      - ./log:/app/log
+      - ./model:/app/model
+    command: [ "det_api.py" ]
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - device_ids: [ "0" ]
+              capabilities: [ "gpu" ]
+              driver: "nvidia"
+
+  photo_review_1:
+    <<: *template
    container_name: photo_review_1
    hostname: photo_review_1
    volumes:
--- a/paddle_detection/detector.py
+++ b/paddle_detection/detector.py
@@ -1,8 +1,12 @@
+import base64
+import logging
 import tempfile
 from collections import defaultdict

 import cv2
 import numpy as np
+import requests
+from tenacity import retry, stop_after_attempt, wait_random

 from paddle_detection import PADDLE_DET
 from paddle_detection.deploy.third_engine.onnx.infer import PredictConfig
@@ -47,3 +51,23 @@ def get_book_areas(image):
    for book_area in book_areas:
        result.append(image_util.capture(image, book_area["box"]))
    return result
+
+
+@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
+       after=lambda x: logging.warning("获取文档区域失败！"))
+def request_book_areas(image):
+    url = "http://det_api:5000/det/detect_books"
+    _, encoded_image = cv2.imencode('.jpg', image)
+    byte_stream = encoded_image.tobytes()
+    files = {"image": ("image.jpg", byte_stream)}
+    response = requests.post(url, files=files)
+    if response.status_code == 200:
+        img_str_list = response.json()
+        result = []
+        for img_str in img_str_list:
+            img_data = base64.b64decode(img_str)
+            np_array = np.frombuffer(img_data, np.uint8)
+            result.append(cv2.imdecode(np_array, cv2.IMREAD_COLOR))
+        return result
+    else:
+        return []
--- a/photo_review/auto_photo_review.py
+++ b/photo_review/auto_photo_review.py
@@ -82,7 +82,9 @@ def information_extraction(ie, phrecs, identity):

        image = image_util.read(img_path)
        target_images = []
-        target_images += detector.get_book_areas(image)  # 识别文档区域并裁剪
+        det_time = time.time()
+        target_images += detector.request_book_areas(image)  # 识别文档区域并裁剪
+        logging.info(f"检测目标耗时{time.time() - det_time}秒")
        if not target_images:
            target_images.append(image)  # 识别失败
        angle_count = defaultdict(int, {"0": 0})  # 分割后图片的最优角度统计