统一模型接口，新增文本分类接口

2024-09-27 13:50:55 +08:00
parent 117b29a737
commit f1149854ce
13 changed files with 144 additions and 97 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -20,13 +20,13 @@ x-paddle:
    - ./tmp_img:/app/tmp_img

 services:
-  ocr_api:
+  ocr:
    <<: *paddle_template
    build:
      context: ./services/paddle_services
-    container_name: ocr_api
-    hostname: ocr_api
-    command: [ '-w', '1', 'ocr_api:app', '--bind', '0.0.0.0:5001' ]
+    container_name: ocr
+    hostname: ocr
+    command: [ '-w', '1', 'ocr:app', '--bind', '0.0.0.0:5001' ]
    deploy:
      resources:
        reservations:
@@ -35,11 +35,11 @@ services:
              capabilities: [ 'gpu' ]
              driver: 'nvidia'

-  settlement_api:
+  ie_settlement:
    <<: *paddle_template
-    container_name: settlement_api
-    hostname: settlement_api
-    command: [ '-w', '1', 'settlement_api:app', '--bind', '0.0.0.0:5002' ]
+    container_name: ie_settlement
+    hostname: ie_settlement
+    command: [ '-w', '1', 'ie_settlement:app', '--bind', '0.0.0.0:5002' ]
    deploy:
      resources:
        reservations:
@@ -48,11 +48,11 @@ services:
              capabilities: [ 'gpu' ]
              driver: 'nvidia'

-  discharge_api:
+  ie_discharge:
    <<: *paddle_template
-    container_name: discharge_api
-    hostname: discharge_api
-    command: [ '-w', '1', 'discharge_api:app', '--bind', '0.0.0.0:5003' ]
+    container_name: ie_discharge
+    hostname: ie_discharge
+    command: [ '-w', '1', 'ie_discharge:app', '--bind', '0.0.0.0:5003' ]
    deploy:
      resources:
        reservations:
@@ -61,11 +61,11 @@ services:
              capabilities: [ 'gpu' ]
              driver: 'nvidia'

-  cost_api:
+  ie_cost:
    <<: *paddle_template
-    container_name: cost_api
-    hostname: cost_api
-    command: [ '-w', '1', 'cost_api:app', '--bind', '0.0.0.0:5004' ]
+    container_name: ie_cost
+    hostname: ie_cost
+    command: [ '-w', '1', 'ie_cost:app', '--bind', '0.0.0.0:5004' ]
    deploy:
      resources:
        reservations:
@@ -74,11 +74,11 @@ services:
              capabilities: [ 'gpu' ]
              driver: 'nvidia'

-  clas_api:
+  clas_orientation:
    <<: *paddle_template
-    container_name: clas_api
-    hostname: clas_api
-    command: [ '-w', '1', 'clas_api:app', '--bind', '0.0.0.0:5005' ]
+    container_name: clas_orientation
+    hostname: clas_orientation
+    command: [ '-w', '1', 'clas_orientation:app', '--bind', '0.0.0.0:5005' ]
    deploy:
      resources:
        reservations:
@@ -87,11 +87,11 @@ services:
              capabilities: [ 'gpu' ]
              driver: 'nvidia'

-  det_api:
+  det_book:
    <<: *paddle_template
-    container_name: det_api
-    hostname: det_api
-    command: [ '-w', '1', 'det_api:app', '--bind', '0.0.0.0:5006' ]
+    container_name: det_book
+    hostname: det_book
+    command: [ '-w', '1', 'det_book:app', '--bind', '0.0.0.0:5006' ]
    deploy:
      resources:
        reservations:
@@ -100,11 +100,11 @@ services:
              capabilities: [ 'gpu' ]
              driver: 'nvidia'

-  dewarp_api:
+  dewarp:
    <<: *paddle_template
-    container_name: dewarp_api
-    hostname: dewarp_api
-    command: [ '-w', '1', 'dewarp_api:app', '--bind', '0.0.0.0:5007' ]
+    container_name: dewarp
+    hostname: dewarp
+    command: [ '-w', '1', 'dewarp:app', '--bind', '0.0.0.0:5007' ]
    deploy:
      resources:
        reservations:
@@ -129,37 +129,37 @@ services:
 #      - dewarp_api
 #    command: [ 'photo_review.py', '--clean', 'True' ]

-  #  photo_review_2:
-  #    <<: *project_template
-  #    container_name: photo_review_2
-  #    hostname: photo_review_2
-  #    depends_on:
-  #      - photo_review_1
-  #    command: [ 'photo_review.py' ]
-  #
-  #  photo_review_3:
-  #    <<: *project_template
-  #    container_name: photo_review_3
-  #    hostname: photo_review_3
-  #    depends_on:
-  #      - photo_review_2
-  #    command: [ 'photo_review.py' ]
-  #
-  #  photo_review_4:
-  #    <<: *project_template
-  #    container_name: photo_review_4
-  #    hostname: photo_review_4
-  #    depends_on:
-  #      - photo_review_3
-  #    command: [ 'photo_review.py' ]
-  #
-  #  photo_review_5:
-  #    <<: *project_template
-  #    container_name: photo_review_5
-  #    hostname: photo_review_5
-  #    depends_on:
-  #      - photo_review_4
-  #    command: [ 'photo_review.py' ]
+#  photo_review_2:
+#    <<: *project_template
+#    container_name: photo_review_2
+#    hostname: photo_review_2
+#    depends_on:
+#      - photo_review_1
+#    command: [ 'photo_review.py' ]
+#
+#  photo_review_3:
+#    <<: *project_template
+#    container_name: photo_review_3
+#    hostname: photo_review_3
+#    depends_on:
+#      - photo_review_2
+#    command: [ 'photo_review.py' ]
+#
+#  photo_review_4:
+#    <<: *project_template
+#    container_name: photo_review_4
+#    hostname: photo_review_4
+#    depends_on:
+#      - photo_review_3
+#    command: [ 'photo_review.py' ]
+#
+#  photo_review_5:
+#    <<: *project_template
+#    container_name: photo_review_5
+#    hostname: photo_review_5
+#    depends_on:
+#      - photo_review_4
+#    command: [ 'photo_review.py' ]

 #  photo_mask_1:
 #    <<: *project_template
--- a/photo_review/auto_photo_review.py
+++ b/photo_review/auto_photo_review.py
@@ -185,8 +185,8 @@ def information_extraction(ie, phrecs, identity):
            target_images = model_util.request_book_areas(img_path)  # 识别文档区域并裁剪
            angle_count = defaultdict(int, {'0': 0})  # 分割后图片的最优角度统计
            for target_image in target_images:
-                dewarped_image = model_util.request_dewarped_image(target_image)  # 去扭曲
-                angles = model_util.request_image_orientation(dewarped_image)
+                dewarped_image = model_util.dewarp(target_image)  # 去扭曲
+                angles = model_util.clas_orientation(dewarped_image)

                split_results = image_util.split(dewarped_image)
                for split_result in split_results:
--- a/services/paddle_services/clas_orientation.py
+++ b/services/paddle_services/clas_orientation.py
@@ -1,4 +1,4 @@
-import logging
+import logging.config

 from flask import Flask, request
 from paddleclas import PaddleClas
@@ -10,9 +10,9 @@ app = Flask(__name__)
 CLAS = PaddleClas(model_name='text_image_orientation')


-@app.route('/clas/orientation', methods=['POST'])
+@app.route(rule='/', methods=['POST'])
@process_request
-def orientation():
+def main():
    """
    判断图片旋转角度，逆时针旋转该角度后为正。可能值['0', '90', '180', '270']
    :return: 最有可能的两个角度
--- a/services/paddle_services/clas_text.py
+++ b/services/paddle_services/clas_text.py
@@ -0,0 +1,28 @@
+import logging.config
+
+from flask import Flask, request
+from paddlenlp import Taskflow
+
+from log import LOGGING_CONFIG
+from utils import process_request
+
+app = Flask(__name__)
+schema = ['基本医保结算单', '出院记录', '费用清单']
+CLAS = Taskflow('zero_shot_text_classification', model='utc-xbase', schema=schema,
+                task_path='model/text_classification', precision='fp16')
+
+
+@app.route('/', methods=['POST'])
+@process_request
+def main():
+    text = request.form.get('text')
+    cls_result = CLAS(text)
+    cls_result = cls_result[0].get('predictions')[0]
+    if cls_result['score'] < 0.8:
+        raise Exception(f'识别结果置信度过低！text: {text}')
+    return cls_result['label']
+
+
+if __name__ == '__main__':
+    logging.config.dictConfig(LOGGING_CONFIG)
+    app.run('0.0.0.0', 5008)
--- a/services/paddle_services/det_book.py
+++ b/services/paddle_services/det_book.py
@@ -1,4 +1,4 @@
-import logging
+import logging.config
 import os.path

 import cv2
@@ -11,9 +11,9 @@ from utils import process_request, parse_img_path
 app = Flask(__name__)


-@app.route('/det/books', methods=['POST'])
+@app.route('/', methods=['POST'])
@process_request
-def books():
+def main():
    img_path = request.form.get('img_path')
    result = detector.get_book_areas(img_path)

--- a/services/paddle_services/dewarp_api.py
+++ b/services/paddle_services/dewarp_api.py
@@ -1,4 +1,4 @@
-import logging
+import logging.config
 import os

 import cv2
@@ -11,9 +11,9 @@ from utils import process_request, parse_img_path
 app = Flask(__name__)


-@app.route('/dewarp', methods=['POST'])
+@app.route('/', methods=['POST'])
@process_request
-def dewarp():
+def main():
    img_path = request.form.get('img_path')
    img = cv2.imread(img_path)
    dewarped_img = dewarper.dewarp_image(img)
--- a/services/paddle_services/cost_api.py
+++ b/services/paddle_services/cost_api.py
@@ -1,5 +1,5 @@
 import json
-import logging
+import logging.config

 from flask import Flask, request
 from paddlenlp import Taskflow
@@ -14,9 +14,9 @@ COST = Taskflow('information_extraction', schema=COST_LIST_SCHEMA, model='uie-x-
                task_path='model/cost_list_model', layout_analysis=False, precision='fp16')


-@app.route('/nlp/cost', methods=['POST'])
+@app.route('/', methods=['POST'])
@process_request
-def cost():
+def main():
    img_path = request.form.get('img_path')
    layout = request.form.get('layout')
    return COST({'doc': img_path, 'layout': json.loads(layout)})
--- a/services/paddle_services/discharge_api.py
+++ b/services/paddle_services/discharge_api.py
@@ -1,5 +1,5 @@
 import json
-import logging
+import logging.config

 from flask import Flask, request
 from paddlenlp import Taskflow
@@ -16,9 +16,9 @@ DISCHARGE = Taskflow('information_extraction', schema=DISCHARGE_RECORD_SCHEMA, m
                     task_path='model/discharge_record_model', layout_analysis=False, precision='fp16')


-@app.route('/nlp/discharge', methods=['POST'])
+@app.route('/', methods=['POST'])
@process_request
-def discharge():
+def main():
    img_path = request.form.get('img_path')
    layout = request.form.get('layout')
    return DISCHARGE({'doc': img_path, 'layout': json.loads(layout)})
--- a/services/paddle_services/settlement_api.py
+++ b/services/paddle_services/settlement_api.py
@@ -1,5 +1,5 @@
 import json
-import logging
+import logging.config

 from flask import Flask, request
 from paddlenlp import Taskflow
@@ -20,9 +20,9 @@ SETTLEMENT_IE = Taskflow('information_extraction', schema=SETTLEMENT_LIST_SCHEMA
                         task_path='model/settlement_list_model', layout_analysis=False, precision='fp16')


-@app.route('/nlp/settlement', methods=['POST'])
+@app.route('/', methods=['POST'])
@process_request
-def settlement():
+def main():
    img_path = request.form.get('img_path')
    layout = request.form.get('layout')
    return SETTLEMENT_IE({'doc': img_path, 'layout': json.loads(layout)})
--- a/services/paddle_services/model/text_classification/README.md
+++ b/services/paddle_services/model/text_classification/README.md
@@ -0,0 +1 @@
+文本分类模型存放目录
--- a/services/paddle_services/ocr_api.py
+++ b/services/paddle_services/ocr_api.py
@@ -1,4 +1,4 @@
-import logging
+import logging.config

 from flask import Flask, request
 from paddleocr import PaddleOCR
@@ -10,9 +10,9 @@ app = Flask(__name__)
 OCR = PaddleOCR(use_angle_cls=False, show_log=False, gpu_id=0, det_db_box_thresh=0.3)


-@app.route('/ocr', methods=['POST'])
+@app.route('/', methods=['POST'])
@process_request
-def ocr():
+def main():
    img_path = request.form.get('img_path')
    return OCR.ocr(img_path, cls=False)

--- a/util/common_util.py
+++ b/util/common_util.py
@@ -36,7 +36,7 @@ def get_ocr_layout(ocr, img_path):
        return True

    layout = []
-    ocr_result = model_util.request_ocr(img_path)
+    ocr_result = model_util.ocr(img_path)
    ocr_result = ocr_result[0]
    if not ocr_result:
        return layout
--- a/util/model_util.py
+++ b/util/model_util.py
@@ -7,13 +7,13 @@ from tenacity import retry, stop_after_attempt, wait_random

@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
       after=lambda x: logging.warning('OCR识别失败！'))
-def request_ocr(img_path):
+def ocr(img_path):
    """
    请求图片OCR识别接口
    :param img_path: 待识别图片路径
    :return: 识别结果
    """
-    url = 'http://ocr_api:5001/ocr'
+    url = 'http://ocr:5001'
    response = requests.post(url, {'img_path': img_path})
    if response.status_code == 200:
        return response.json()
@@ -23,14 +23,14 @@ def request_ocr(img_path):

@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
       after=lambda x: logging.warning('抽取基本医保结算单失败！'))
-def request_discharge_info(img_path, layout):
+def ie_settlement(img_path, layout):
    """
    请求基本医保结算单信息抽取接口
    :param img_path: 待抽取图片路径
    :param layout: 图片ocr信息
    :return: 抽取结果
    """
-    url = 'http://settlement_api:5002/nlp/settlement'
+    url = 'http://ie_settlement:5002'
    response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)})
    if response.status_code == 200:
        return response.json()
@@ -40,14 +40,14 @@ def request_discharge_info(img_path, layout):

@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
       after=lambda x: logging.warning('抽取出院记录失败！'))
-def request_discharge_info(img_path, layout):
+def ie_discharge(img_path, layout):
    """
    请求出院记录信息抽取接口
    :param img_path: 待抽取图片路径
    :param layout: 图片ocr信息
    :return: 抽取结果
    """
-    url = 'http://discharge_api:5003/nlp/discharge'
+    url = 'http://ie_discharge:5003'
    response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)})
    if response.status_code == 200:
        return response.json()
@@ -57,14 +57,14 @@ def request_discharge_info(img_path, layout):

@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
       after=lambda x: logging.warning('抽取费用清单失败！'))
-def request_cost_info(img_path, layout):
+def ie_cost(img_path, layout):
    """
    请求费用清单信息抽取接口
    :param img_path: 待抽取图片路径
    :param layout: 图片ocr信息
    :return: 抽取结果
    """
-    url = 'http://cost_api:5004/nlp/cost'
+    url = 'http://ie_cost:5004'
    response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)})
    if response.status_code == 200:
        return response.json()
@@ -74,13 +74,13 @@ def request_cost_info(img_path, layout):

@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
       after=lambda x: logging.warning('获取图片方向失败！'))
-def request_image_orientation(img_path):
+def clas_orientation(img_path):
    """
    请求图片方向分类接口
    :param img_path: 待分类图片路径
    :return: 最有可能的两个图片方向
    """
-    url = 'http://clas_api:5005/clas/orientation'
+    url = 'http://clas_orientation:5005'
    response = requests.post(url, {'img_path': img_path})
    if response.status_code == 200:
        return response.json()
@@ -90,13 +90,13 @@ def request_image_orientation(img_path):

@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
       after=lambda x: logging.warning('获取文档区域失败！'))
-def request_book_areas(img_path):
+def det_book(img_path):
    """
    请求文档区域识别接口
    :param img_path: 待识别图片路径
    :return: 文档图片路径列表
    """
-    url = 'http://det_api:5006/det/books'
+    url = 'http://det_book:5006'
    response = requests.post(url, {'img_path': img_path})
    if response.status_code == 200:
        return response.json()
@@ -106,15 +106,33 @@ def request_book_areas(img_path):

@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
       after=lambda x: logging.warning('矫正扭曲图片失败！'))
-def request_dewarped_image(img_path):
+def dewarp(img_path):
    """
    请求矫正图片接口
    :param img_path: 待矫正图片路径
    :return: 矫正后的图片路径
    """
-    url = 'http://dewarp_api:5007/dewarp'
+    url = 'http://dewarp:5007'
    response = requests.post(url, {'img_path': img_path})
    if response.status_code == 200:
        return response.json()
    else:
        return img_path
+
+
+@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
+       after=lambda x: logging.warning('文本分类失败！'))
+def clas_text(text):
+    """
+    请求文本分类接口
+    :param text: 待分类文本
+    :return: 分类结果
+    """
+    if not text:
+        return None
+    url = 'http://clas_text:5008'
+    response = requests.post(url, {'text': text})
+    if response.status_code == 200:
+        return response.json()
+    else:
+        return None