diff --git a/docker-compose.yml b/docker-compose.yml index a0558dc..3fd477a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -20,13 +20,13 @@ x-paddle: - ./tmp_img:/app/tmp_img services: - ocr_api: + ocr: <<: *paddle_template build: context: ./services/paddle_services - container_name: ocr_api - hostname: ocr_api - command: [ '-w', '1', 'ocr_api:app', '--bind', '0.0.0.0:5001' ] + container_name: ocr + hostname: ocr + command: [ '-w', '1', 'ocr:app', '--bind', '0.0.0.0:5001' ] deploy: resources: reservations: @@ -35,11 +35,11 @@ services: capabilities: [ 'gpu' ] driver: 'nvidia' - settlement_api: + ie_settlement: <<: *paddle_template - container_name: settlement_api - hostname: settlement_api - command: [ '-w', '1', 'settlement_api:app', '--bind', '0.0.0.0:5002' ] + container_name: ie_settlement + hostname: ie_settlement + command: [ '-w', '1', 'ie_settlement:app', '--bind', '0.0.0.0:5002' ] deploy: resources: reservations: @@ -48,11 +48,11 @@ services: capabilities: [ 'gpu' ] driver: 'nvidia' - discharge_api: + ie_discharge: <<: *paddle_template - container_name: discharge_api - hostname: discharge_api - command: [ '-w', '1', 'discharge_api:app', '--bind', '0.0.0.0:5003' ] + container_name: ie_discharge + hostname: ie_discharge + command: [ '-w', '1', 'ie_discharge:app', '--bind', '0.0.0.0:5003' ] deploy: resources: reservations: @@ -61,11 +61,11 @@ services: capabilities: [ 'gpu' ] driver: 'nvidia' - cost_api: + ie_cost: <<: *paddle_template - container_name: cost_api - hostname: cost_api - command: [ '-w', '1', 'cost_api:app', '--bind', '0.0.0.0:5004' ] + container_name: ie_cost + hostname: ie_cost + command: [ '-w', '1', 'ie_cost:app', '--bind', '0.0.0.0:5004' ] deploy: resources: reservations: @@ -74,11 +74,11 @@ services: capabilities: [ 'gpu' ] driver: 'nvidia' - clas_api: + clas_orientation: <<: *paddle_template - container_name: clas_api - hostname: clas_api - command: [ '-w', '1', 'clas_api:app', '--bind', '0.0.0.0:5005' ] + container_name: clas_orientation + hostname: clas_orientation + command: [ '-w', '1', 'clas_orientation:app', '--bind', '0.0.0.0:5005' ] deploy: resources: reservations: @@ -87,11 +87,11 @@ services: capabilities: [ 'gpu' ] driver: 'nvidia' - det_api: + det_book: <<: *paddle_template - container_name: det_api - hostname: det_api - command: [ '-w', '1', 'det_api:app', '--bind', '0.0.0.0:5006' ] + container_name: det_book + hostname: det_book + command: [ '-w', '1', 'det_book:app', '--bind', '0.0.0.0:5006' ] deploy: resources: reservations: @@ -100,11 +100,11 @@ services: capabilities: [ 'gpu' ] driver: 'nvidia' - dewarp_api: + dewarp: <<: *paddle_template - container_name: dewarp_api - hostname: dewarp_api - command: [ '-w', '1', 'dewarp_api:app', '--bind', '0.0.0.0:5007' ] + container_name: dewarp + hostname: dewarp + command: [ '-w', '1', 'dewarp:app', '--bind', '0.0.0.0:5007' ] deploy: resources: reservations: @@ -129,37 +129,37 @@ services: # - dewarp_api # command: [ 'photo_review.py', '--clean', 'True' ] - # photo_review_2: - # <<: *project_template - # container_name: photo_review_2 - # hostname: photo_review_2 - # depends_on: - # - photo_review_1 - # command: [ 'photo_review.py' ] - # - # photo_review_3: - # <<: *project_template - # container_name: photo_review_3 - # hostname: photo_review_3 - # depends_on: - # - photo_review_2 - # command: [ 'photo_review.py' ] - # - # photo_review_4: - # <<: *project_template - # container_name: photo_review_4 - # hostname: photo_review_4 - # depends_on: - # - photo_review_3 - # command: [ 'photo_review.py' ] - # - # photo_review_5: - # <<: *project_template - # container_name: photo_review_5 - # hostname: photo_review_5 - # depends_on: - # - photo_review_4 - # command: [ 'photo_review.py' ] +# photo_review_2: +# <<: *project_template +# container_name: photo_review_2 +# hostname: photo_review_2 +# depends_on: +# - photo_review_1 +# command: [ 'photo_review.py' ] +# +# photo_review_3: +# <<: *project_template +# container_name: photo_review_3 +# hostname: photo_review_3 +# depends_on: +# - photo_review_2 +# command: [ 'photo_review.py' ] +# +# photo_review_4: +# <<: *project_template +# container_name: photo_review_4 +# hostname: photo_review_4 +# depends_on: +# - photo_review_3 +# command: [ 'photo_review.py' ] +# +# photo_review_5: +# <<: *project_template +# container_name: photo_review_5 +# hostname: photo_review_5 +# depends_on: +# - photo_review_4 +# command: [ 'photo_review.py' ] # photo_mask_1: # <<: *project_template diff --git a/photo_review/auto_photo_review.py b/photo_review/auto_photo_review.py index 56db655..683cf7c 100644 --- a/photo_review/auto_photo_review.py +++ b/photo_review/auto_photo_review.py @@ -185,8 +185,8 @@ def information_extraction(ie, phrecs, identity): target_images = model_util.request_book_areas(img_path) # 识别文档区域并裁剪 angle_count = defaultdict(int, {'0': 0}) # 分割后图片的最优角度统计 for target_image in target_images: - dewarped_image = model_util.request_dewarped_image(target_image) # 去扭曲 - angles = model_util.request_image_orientation(dewarped_image) + dewarped_image = model_util.dewarp(target_image) # 去扭曲 + angles = model_util.clas_orientation(dewarped_image) split_results = image_util.split(dewarped_image) for split_result in split_results: diff --git a/services/paddle_services/clas_api.py b/services/paddle_services/clas_orientation.py similarity index 89% rename from services/paddle_services/clas_api.py rename to services/paddle_services/clas_orientation.py index 77a216c..5cba9e4 100644 --- a/services/paddle_services/clas_api.py +++ b/services/paddle_services/clas_orientation.py @@ -1,4 +1,4 @@ -import logging +import logging.config from flask import Flask, request from paddleclas import PaddleClas @@ -10,9 +10,9 @@ app = Flask(__name__) CLAS = PaddleClas(model_name='text_image_orientation') -@app.route('/clas/orientation', methods=['POST']) +@app.route(rule='/', methods=['POST']) @process_request -def orientation(): +def main(): """ 判断图片旋转角度,逆时针旋转该角度后为正。可能值['0', '90', '180', '270'] :return: 最有可能的两个角度 diff --git a/services/paddle_services/clas_text.py b/services/paddle_services/clas_text.py new file mode 100644 index 0000000..c202cb8 --- /dev/null +++ b/services/paddle_services/clas_text.py @@ -0,0 +1,28 @@ +import logging.config + +from flask import Flask, request +from paddlenlp import Taskflow + +from log import LOGGING_CONFIG +from utils import process_request + +app = Flask(__name__) +schema = ['基本医保结算单', '出院记录', '费用清单'] +CLAS = Taskflow('zero_shot_text_classification', model='utc-xbase', schema=schema, + task_path='model/text_classification', precision='fp16') + + +@app.route('/', methods=['POST']) +@process_request +def main(): + text = request.form.get('text') + cls_result = CLAS(text) + cls_result = cls_result[0].get('predictions')[0] + if cls_result['score'] < 0.8: + raise Exception(f'识别结果置信度过低!text: {text}') + return cls_result['label'] + + +if __name__ == '__main__': + logging.config.dictConfig(LOGGING_CONFIG) + app.run('0.0.0.0', 5008) diff --git a/services/paddle_services/det_api.py b/services/paddle_services/det_book.py similarity index 90% rename from services/paddle_services/det_api.py rename to services/paddle_services/det_book.py index 71690cb..efdc50f 100644 --- a/services/paddle_services/det_api.py +++ b/services/paddle_services/det_book.py @@ -1,4 +1,4 @@ -import logging +import logging.config import os.path import cv2 @@ -11,9 +11,9 @@ from utils import process_request, parse_img_path app = Flask(__name__) -@app.route('/det/books', methods=['POST']) +@app.route('/', methods=['POST']) @process_request -def books(): +def main(): img_path = request.form.get('img_path') result = detector.get_book_areas(img_path) diff --git a/services/paddle_services/dewarp_api.py b/services/paddle_services/dewarp.py similarity index 90% rename from services/paddle_services/dewarp_api.py rename to services/paddle_services/dewarp.py index 1249a1a..53b5293 100644 --- a/services/paddle_services/dewarp_api.py +++ b/services/paddle_services/dewarp.py @@ -1,4 +1,4 @@ -import logging +import logging.config import os import cv2 @@ -11,9 +11,9 @@ from utils import process_request, parse_img_path app = Flask(__name__) -@app.route('/dewarp', methods=['POST']) +@app.route('/', methods=['POST']) @process_request -def dewarp(): +def main(): img_path = request.form.get('img_path') img = cv2.imread(img_path) dewarped_img = dewarper.dewarp_image(img) diff --git a/services/paddle_services/cost_api.py b/services/paddle_services/ie_cost.py similarity index 91% rename from services/paddle_services/cost_api.py rename to services/paddle_services/ie_cost.py index 092f7c7..446fbc9 100644 --- a/services/paddle_services/cost_api.py +++ b/services/paddle_services/ie_cost.py @@ -1,5 +1,5 @@ import json -import logging +import logging.config from flask import Flask, request from paddlenlp import Taskflow @@ -14,9 +14,9 @@ COST = Taskflow('information_extraction', schema=COST_LIST_SCHEMA, model='uie-x- task_path='model/cost_list_model', layout_analysis=False, precision='fp16') -@app.route('/nlp/cost', methods=['POST']) +@app.route('/', methods=['POST']) @process_request -def cost(): +def main(): img_path = request.form.get('img_path') layout = request.form.get('layout') return COST({'doc': img_path, 'layout': json.loads(layout)}) diff --git a/services/paddle_services/discharge_api.py b/services/paddle_services/ie_discharge.py similarity index 91% rename from services/paddle_services/discharge_api.py rename to services/paddle_services/ie_discharge.py index c5d4918..a7960e6 100644 --- a/services/paddle_services/discharge_api.py +++ b/services/paddle_services/ie_discharge.py @@ -1,5 +1,5 @@ import json -import logging +import logging.config from flask import Flask, request from paddlenlp import Taskflow @@ -16,9 +16,9 @@ DISCHARGE = Taskflow('information_extraction', schema=DISCHARGE_RECORD_SCHEMA, m task_path='model/discharge_record_model', layout_analysis=False, precision='fp16') -@app.route('/nlp/discharge', methods=['POST']) +@app.route('/', methods=['POST']) @process_request -def discharge(): +def main(): img_path = request.form.get('img_path') layout = request.form.get('layout') return DISCHARGE({'doc': img_path, 'layout': json.loads(layout)}) diff --git a/services/paddle_services/settlement_api.py b/services/paddle_services/ie_settlement.py similarity index 93% rename from services/paddle_services/settlement_api.py rename to services/paddle_services/ie_settlement.py index 8421d5a..aa9d667 100644 --- a/services/paddle_services/settlement_api.py +++ b/services/paddle_services/ie_settlement.py @@ -1,5 +1,5 @@ import json -import logging +import logging.config from flask import Flask, request from paddlenlp import Taskflow @@ -20,9 +20,9 @@ SETTLEMENT_IE = Taskflow('information_extraction', schema=SETTLEMENT_LIST_SCHEMA task_path='model/settlement_list_model', layout_analysis=False, precision='fp16') -@app.route('/nlp/settlement', methods=['POST']) +@app.route('/', methods=['POST']) @process_request -def settlement(): +def main(): img_path = request.form.get('img_path') layout = request.form.get('layout') return SETTLEMENT_IE({'doc': img_path, 'layout': json.loads(layout)}) diff --git a/services/paddle_services/model/text_classification/README.md b/services/paddle_services/model/text_classification/README.md new file mode 100644 index 0000000..f81d637 --- /dev/null +++ b/services/paddle_services/model/text_classification/README.md @@ -0,0 +1 @@ +文本分类模型存放目录 \ No newline at end of file diff --git a/services/paddle_services/ocr_api.py b/services/paddle_services/ocr.py similarity index 86% rename from services/paddle_services/ocr_api.py rename to services/paddle_services/ocr.py index 4da924e..9bb79e4 100644 --- a/services/paddle_services/ocr_api.py +++ b/services/paddle_services/ocr.py @@ -1,4 +1,4 @@ -import logging +import logging.config from flask import Flask, request from paddleocr import PaddleOCR @@ -10,9 +10,9 @@ app = Flask(__name__) OCR = PaddleOCR(use_angle_cls=False, show_log=False, gpu_id=0, det_db_box_thresh=0.3) -@app.route('/ocr', methods=['POST']) +@app.route('/', methods=['POST']) @process_request -def ocr(): +def main(): img_path = request.form.get('img_path') return OCR.ocr(img_path, cls=False) diff --git a/util/common_util.py b/util/common_util.py index 7719d61..ffeb4b0 100644 --- a/util/common_util.py +++ b/util/common_util.py @@ -36,7 +36,7 @@ def get_ocr_layout(ocr, img_path): return True layout = [] - ocr_result = model_util.request_ocr(img_path) + ocr_result = model_util.ocr(img_path) ocr_result = ocr_result[0] if not ocr_result: return layout diff --git a/util/model_util.py b/util/model_util.py index 1f4c3f0..3d9634e 100644 --- a/util/model_util.py +++ b/util/model_util.py @@ -7,13 +7,13 @@ from tenacity import retry, stop_after_attempt, wait_random @retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, after=lambda x: logging.warning('OCR识别失败!')) -def request_ocr(img_path): +def ocr(img_path): """ 请求图片OCR识别接口 :param img_path: 待识别图片路径 :return: 识别结果 """ - url = 'http://ocr_api:5001/ocr' + url = 'http://ocr:5001' response = requests.post(url, {'img_path': img_path}) if response.status_code == 200: return response.json() @@ -23,14 +23,14 @@ def request_ocr(img_path): @retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, after=lambda x: logging.warning('抽取基本医保结算单失败!')) -def request_discharge_info(img_path, layout): +def ie_settlement(img_path, layout): """ 请求基本医保结算单信息抽取接口 :param img_path: 待抽取图片路径 :param layout: 图片ocr信息 :return: 抽取结果 """ - url = 'http://settlement_api:5002/nlp/settlement' + url = 'http://ie_settlement:5002' response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)}) if response.status_code == 200: return response.json() @@ -40,14 +40,14 @@ def request_discharge_info(img_path, layout): @retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, after=lambda x: logging.warning('抽取出院记录失败!')) -def request_discharge_info(img_path, layout): +def ie_discharge(img_path, layout): """ 请求出院记录信息抽取接口 :param img_path: 待抽取图片路径 :param layout: 图片ocr信息 :return: 抽取结果 """ - url = 'http://discharge_api:5003/nlp/discharge' + url = 'http://ie_discharge:5003' response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)}) if response.status_code == 200: return response.json() @@ -57,14 +57,14 @@ def request_discharge_info(img_path, layout): @retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, after=lambda x: logging.warning('抽取费用清单失败!')) -def request_cost_info(img_path, layout): +def ie_cost(img_path, layout): """ 请求费用清单信息抽取接口 :param img_path: 待抽取图片路径 :param layout: 图片ocr信息 :return: 抽取结果 """ - url = 'http://cost_api:5004/nlp/cost' + url = 'http://ie_cost:5004' response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)}) if response.status_code == 200: return response.json() @@ -74,13 +74,13 @@ def request_cost_info(img_path, layout): @retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, after=lambda x: logging.warning('获取图片方向失败!')) -def request_image_orientation(img_path): +def clas_orientation(img_path): """ 请求图片方向分类接口 :param img_path: 待分类图片路径 :return: 最有可能的两个图片方向 """ - url = 'http://clas_api:5005/clas/orientation' + url = 'http://clas_orientation:5005' response = requests.post(url, {'img_path': img_path}) if response.status_code == 200: return response.json() @@ -90,13 +90,13 @@ def request_image_orientation(img_path): @retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, after=lambda x: logging.warning('获取文档区域失败!')) -def request_book_areas(img_path): +def det_book(img_path): """ 请求文档区域识别接口 :param img_path: 待识别图片路径 :return: 文档图片路径列表 """ - url = 'http://det_api:5006/det/books' + url = 'http://det_book:5006' response = requests.post(url, {'img_path': img_path}) if response.status_code == 200: return response.json() @@ -106,15 +106,33 @@ def request_book_areas(img_path): @retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, after=lambda x: logging.warning('矫正扭曲图片失败!')) -def request_dewarped_image(img_path): +def dewarp(img_path): """ 请求矫正图片接口 :param img_path: 待矫正图片路径 :return: 矫正后的图片路径 """ - url = 'http://dewarp_api:5007/dewarp' + url = 'http://dewarp:5007' response = requests.post(url, {'img_path': img_path}) if response.status_code == 200: return response.json() else: return img_path + + +@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, + after=lambda x: logging.warning('文本分类失败!')) +def clas_text(text): + """ + 请求文本分类接口 + :param text: 待分类文本 + :return: 分类结果 + """ + if not text: + return None + url = 'http://clas_text:5008' + response = requests.post(url, {'text': text}) + if response.status_code == 200: + return response.json() + else: + return None