统一模型接口,新增文本分类接口

This commit is contained in:
2024-09-27 13:50:55 +08:00
parent 117b29a737
commit f1149854ce
13 changed files with 144 additions and 97 deletions

View File

@@ -20,13 +20,13 @@ x-paddle:
- ./tmp_img:/app/tmp_img
services:
ocr_api:
ocr:
<<: *paddle_template
build:
context: ./services/paddle_services
container_name: ocr_api
hostname: ocr_api
command: [ '-w', '1', 'ocr_api:app', '--bind', '0.0.0.0:5001' ]
container_name: ocr
hostname: ocr
command: [ '-w', '1', 'ocr:app', '--bind', '0.0.0.0:5001' ]
deploy:
resources:
reservations:
@@ -35,11 +35,11 @@ services:
capabilities: [ 'gpu' ]
driver: 'nvidia'
settlement_api:
ie_settlement:
<<: *paddle_template
container_name: settlement_api
hostname: settlement_api
command: [ '-w', '1', 'settlement_api:app', '--bind', '0.0.0.0:5002' ]
container_name: ie_settlement
hostname: ie_settlement
command: [ '-w', '1', 'ie_settlement:app', '--bind', '0.0.0.0:5002' ]
deploy:
resources:
reservations:
@@ -48,11 +48,11 @@ services:
capabilities: [ 'gpu' ]
driver: 'nvidia'
discharge_api:
ie_discharge:
<<: *paddle_template
container_name: discharge_api
hostname: discharge_api
command: [ '-w', '1', 'discharge_api:app', '--bind', '0.0.0.0:5003' ]
container_name: ie_discharge
hostname: ie_discharge
command: [ '-w', '1', 'ie_discharge:app', '--bind', '0.0.0.0:5003' ]
deploy:
resources:
reservations:
@@ -61,11 +61,11 @@ services:
capabilities: [ 'gpu' ]
driver: 'nvidia'
cost_api:
ie_cost:
<<: *paddle_template
container_name: cost_api
hostname: cost_api
command: [ '-w', '1', 'cost_api:app', '--bind', '0.0.0.0:5004' ]
container_name: ie_cost
hostname: ie_cost
command: [ '-w', '1', 'ie_cost:app', '--bind', '0.0.0.0:5004' ]
deploy:
resources:
reservations:
@@ -74,11 +74,11 @@ services:
capabilities: [ 'gpu' ]
driver: 'nvidia'
clas_api:
clas_orientation:
<<: *paddle_template
container_name: clas_api
hostname: clas_api
command: [ '-w', '1', 'clas_api:app', '--bind', '0.0.0.0:5005' ]
container_name: clas_orientation
hostname: clas_orientation
command: [ '-w', '1', 'clas_orientation:app', '--bind', '0.0.0.0:5005' ]
deploy:
resources:
reservations:
@@ -87,11 +87,11 @@ services:
capabilities: [ 'gpu' ]
driver: 'nvidia'
det_api:
det_book:
<<: *paddle_template
container_name: det_api
hostname: det_api
command: [ '-w', '1', 'det_api:app', '--bind', '0.0.0.0:5006' ]
container_name: det_book
hostname: det_book
command: [ '-w', '1', 'det_book:app', '--bind', '0.0.0.0:5006' ]
deploy:
resources:
reservations:
@@ -100,11 +100,11 @@ services:
capabilities: [ 'gpu' ]
driver: 'nvidia'
dewarp_api:
dewarp:
<<: *paddle_template
container_name: dewarp_api
hostname: dewarp_api
command: [ '-w', '1', 'dewarp_api:app', '--bind', '0.0.0.0:5007' ]
container_name: dewarp
hostname: dewarp
command: [ '-w', '1', 'dewarp:app', '--bind', '0.0.0.0:5007' ]
deploy:
resources:
reservations:
@@ -129,37 +129,37 @@ services:
# - dewarp_api
# command: [ 'photo_review.py', '--clean', 'True' ]
# photo_review_2:
# <<: *project_template
# container_name: photo_review_2
# hostname: photo_review_2
# depends_on:
# - photo_review_1
# command: [ 'photo_review.py' ]
#
# photo_review_3:
# <<: *project_template
# container_name: photo_review_3
# hostname: photo_review_3
# depends_on:
# - photo_review_2
# command: [ 'photo_review.py' ]
#
# photo_review_4:
# <<: *project_template
# container_name: photo_review_4
# hostname: photo_review_4
# depends_on:
# - photo_review_3
# command: [ 'photo_review.py' ]
#
# photo_review_5:
# <<: *project_template
# container_name: photo_review_5
# hostname: photo_review_5
# depends_on:
# - photo_review_4
# command: [ 'photo_review.py' ]
# photo_review_2:
# <<: *project_template
# container_name: photo_review_2
# hostname: photo_review_2
# depends_on:
# - photo_review_1
# command: [ 'photo_review.py' ]
#
# photo_review_3:
# <<: *project_template
# container_name: photo_review_3
# hostname: photo_review_3
# depends_on:
# - photo_review_2
# command: [ 'photo_review.py' ]
#
# photo_review_4:
# <<: *project_template
# container_name: photo_review_4
# hostname: photo_review_4
# depends_on:
# - photo_review_3
# command: [ 'photo_review.py' ]
#
# photo_review_5:
# <<: *project_template
# container_name: photo_review_5
# hostname: photo_review_5
# depends_on:
# - photo_review_4
# command: [ 'photo_review.py' ]
# photo_mask_1:
# <<: *project_template

View File

@@ -185,8 +185,8 @@ def information_extraction(ie, phrecs, identity):
target_images = model_util.request_book_areas(img_path) # 识别文档区域并裁剪
angle_count = defaultdict(int, {'0': 0}) # 分割后图片的最优角度统计
for target_image in target_images:
dewarped_image = model_util.request_dewarped_image(target_image) # 去扭曲
angles = model_util.request_image_orientation(dewarped_image)
dewarped_image = model_util.dewarp(target_image) # 去扭曲
angles = model_util.clas_orientation(dewarped_image)
split_results = image_util.split(dewarped_image)
for split_result in split_results:

View File

@@ -1,4 +1,4 @@
import logging
import logging.config
from flask import Flask, request
from paddleclas import PaddleClas
@@ -10,9 +10,9 @@ app = Flask(__name__)
CLAS = PaddleClas(model_name='text_image_orientation')
@app.route('/clas/orientation', methods=['POST'])
@app.route(rule='/', methods=['POST'])
@process_request
def orientation():
def main():
"""
判断图片旋转角度逆时针旋转该角度后为正可能值['0', '90', '180', '270']
:return: 最有可能的两个角度

View File

@@ -0,0 +1,28 @@
import logging.config
from flask import Flask, request
from paddlenlp import Taskflow
from log import LOGGING_CONFIG
from utils import process_request
app = Flask(__name__)
schema = ['基本医保结算单', '出院记录', '费用清单']
CLAS = Taskflow('zero_shot_text_classification', model='utc-xbase', schema=schema,
task_path='model/text_classification', precision='fp16')
@app.route('/', methods=['POST'])
@process_request
def main():
text = request.form.get('text')
cls_result = CLAS(text)
cls_result = cls_result[0].get('predictions')[0]
if cls_result['score'] < 0.8:
raise Exception(f'识别结果置信度过低text: {text}')
return cls_result['label']
if __name__ == '__main__':
logging.config.dictConfig(LOGGING_CONFIG)
app.run('0.0.0.0', 5008)

View File

@@ -1,4 +1,4 @@
import logging
import logging.config
import os.path
import cv2
@@ -11,9 +11,9 @@ from utils import process_request, parse_img_path
app = Flask(__name__)
@app.route('/det/books', methods=['POST'])
@app.route('/', methods=['POST'])
@process_request
def books():
def main():
img_path = request.form.get('img_path')
result = detector.get_book_areas(img_path)

View File

@@ -1,4 +1,4 @@
import logging
import logging.config
import os
import cv2
@@ -11,9 +11,9 @@ from utils import process_request, parse_img_path
app = Flask(__name__)
@app.route('/dewarp', methods=['POST'])
@app.route('/', methods=['POST'])
@process_request
def dewarp():
def main():
img_path = request.form.get('img_path')
img = cv2.imread(img_path)
dewarped_img = dewarper.dewarp_image(img)

View File

@@ -1,5 +1,5 @@
import json
import logging
import logging.config
from flask import Flask, request
from paddlenlp import Taskflow
@@ -14,9 +14,9 @@ COST = Taskflow('information_extraction', schema=COST_LIST_SCHEMA, model='uie-x-
task_path='model/cost_list_model', layout_analysis=False, precision='fp16')
@app.route('/nlp/cost', methods=['POST'])
@app.route('/', methods=['POST'])
@process_request
def cost():
def main():
img_path = request.form.get('img_path')
layout = request.form.get('layout')
return COST({'doc': img_path, 'layout': json.loads(layout)})

View File

@@ -1,5 +1,5 @@
import json
import logging
import logging.config
from flask import Flask, request
from paddlenlp import Taskflow
@@ -16,9 +16,9 @@ DISCHARGE = Taskflow('information_extraction', schema=DISCHARGE_RECORD_SCHEMA, m
task_path='model/discharge_record_model', layout_analysis=False, precision='fp16')
@app.route('/nlp/discharge', methods=['POST'])
@app.route('/', methods=['POST'])
@process_request
def discharge():
def main():
img_path = request.form.get('img_path')
layout = request.form.get('layout')
return DISCHARGE({'doc': img_path, 'layout': json.loads(layout)})

View File

@@ -1,5 +1,5 @@
import json
import logging
import logging.config
from flask import Flask, request
from paddlenlp import Taskflow
@@ -20,9 +20,9 @@ SETTLEMENT_IE = Taskflow('information_extraction', schema=SETTLEMENT_LIST_SCHEMA
task_path='model/settlement_list_model', layout_analysis=False, precision='fp16')
@app.route('/nlp/settlement', methods=['POST'])
@app.route('/', methods=['POST'])
@process_request
def settlement():
def main():
img_path = request.form.get('img_path')
layout = request.form.get('layout')
return SETTLEMENT_IE({'doc': img_path, 'layout': json.loads(layout)})

View File

@@ -0,0 +1 @@
文本分类模型存放目录

View File

@@ -1,4 +1,4 @@
import logging
import logging.config
from flask import Flask, request
from paddleocr import PaddleOCR
@@ -10,9 +10,9 @@ app = Flask(__name__)
OCR = PaddleOCR(use_angle_cls=False, show_log=False, gpu_id=0, det_db_box_thresh=0.3)
@app.route('/ocr', methods=['POST'])
@app.route('/', methods=['POST'])
@process_request
def ocr():
def main():
img_path = request.form.get('img_path')
return OCR.ocr(img_path, cls=False)

View File

@@ -36,7 +36,7 @@ def get_ocr_layout(ocr, img_path):
return True
layout = []
ocr_result = model_util.request_ocr(img_path)
ocr_result = model_util.ocr(img_path)
ocr_result = ocr_result[0]
if not ocr_result:
return layout

View File

@@ -7,13 +7,13 @@ from tenacity import retry, stop_after_attempt, wait_random
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('OCR识别失败'))
def request_ocr(img_path):
def ocr(img_path):
"""
请求图片OCR识别接口
:param img_path: 待识别图片路径
:return: 识别结果
"""
url = 'http://ocr_api:5001/ocr'
url = 'http://ocr:5001'
response = requests.post(url, {'img_path': img_path})
if response.status_code == 200:
return response.json()
@@ -23,14 +23,14 @@ def request_ocr(img_path):
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('抽取基本医保结算单失败!'))
def request_discharge_info(img_path, layout):
def ie_settlement(img_path, layout):
"""
请求基本医保结算单信息抽取接口
:param img_path: 待抽取图片路径
:param layout: 图片ocr信息
:return: 抽取结果
"""
url = 'http://settlement_api:5002/nlp/settlement'
url = 'http://ie_settlement:5002'
response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)})
if response.status_code == 200:
return response.json()
@@ -40,14 +40,14 @@ def request_discharge_info(img_path, layout):
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('抽取出院记录失败!'))
def request_discharge_info(img_path, layout):
def ie_discharge(img_path, layout):
"""
请求出院记录信息抽取接口
:param img_path: 待抽取图片路径
:param layout: 图片ocr信息
:return: 抽取结果
"""
url = 'http://discharge_api:5003/nlp/discharge'
url = 'http://ie_discharge:5003'
response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)})
if response.status_code == 200:
return response.json()
@@ -57,14 +57,14 @@ def request_discharge_info(img_path, layout):
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('抽取费用清单失败!'))
def request_cost_info(img_path, layout):
def ie_cost(img_path, layout):
"""
请求费用清单信息抽取接口
:param img_path: 待抽取图片路径
:param layout: 图片ocr信息
:return: 抽取结果
"""
url = 'http://cost_api:5004/nlp/cost'
url = 'http://ie_cost:5004'
response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)})
if response.status_code == 200:
return response.json()
@@ -74,13 +74,13 @@ def request_cost_info(img_path, layout):
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('获取图片方向失败!'))
def request_image_orientation(img_path):
def clas_orientation(img_path):
"""
请求图片方向分类接口
:param img_path: 待分类图片路径
:return: 最有可能的两个图片方向
"""
url = 'http://clas_api:5005/clas/orientation'
url = 'http://clas_orientation:5005'
response = requests.post(url, {'img_path': img_path})
if response.status_code == 200:
return response.json()
@@ -90,13 +90,13 @@ def request_image_orientation(img_path):
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('获取文档区域失败!'))
def request_book_areas(img_path):
def det_book(img_path):
"""
请求文档区域识别接口
:param img_path: 待识别图片路径
:return: 文档图片路径列表
"""
url = 'http://det_api:5006/det/books'
url = 'http://det_book:5006'
response = requests.post(url, {'img_path': img_path})
if response.status_code == 200:
return response.json()
@@ -106,15 +106,33 @@ def request_book_areas(img_path):
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('矫正扭曲图片失败!'))
def request_dewarped_image(img_path):
def dewarp(img_path):
"""
请求矫正图片接口
:param img_path: 待矫正图片路径
:return: 矫正后的图片路径
"""
url = 'http://dewarp_api:5007/dewarp'
url = 'http://dewarp:5007'
response = requests.post(url, {'img_path': img_path})
if response.status_code == 200:
return response.json()
else:
return img_path
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('文本分类失败!'))
def clas_text(text):
"""
请求文本分类接口
:param text: 待分类文本
:return: 分类结果
"""
if not text:
return None
url = 'http://clas_text:5008'
response = requests.post(url, {'text': text})
if response.status_code == 200:
return response.json()
else:
return None