统一模型接口,新增文本分类接口
This commit is contained in:
@@ -20,13 +20,13 @@ x-paddle:
|
||||
- ./tmp_img:/app/tmp_img
|
||||
|
||||
services:
|
||||
ocr_api:
|
||||
ocr:
|
||||
<<: *paddle_template
|
||||
build:
|
||||
context: ./services/paddle_services
|
||||
container_name: ocr_api
|
||||
hostname: ocr_api
|
||||
command: [ '-w', '1', 'ocr_api:app', '--bind', '0.0.0.0:5001' ]
|
||||
container_name: ocr
|
||||
hostname: ocr
|
||||
command: [ '-w', '1', 'ocr:app', '--bind', '0.0.0.0:5001' ]
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
@@ -35,11 +35,11 @@ services:
|
||||
capabilities: [ 'gpu' ]
|
||||
driver: 'nvidia'
|
||||
|
||||
settlement_api:
|
||||
ie_settlement:
|
||||
<<: *paddle_template
|
||||
container_name: settlement_api
|
||||
hostname: settlement_api
|
||||
command: [ '-w', '1', 'settlement_api:app', '--bind', '0.0.0.0:5002' ]
|
||||
container_name: ie_settlement
|
||||
hostname: ie_settlement
|
||||
command: [ '-w', '1', 'ie_settlement:app', '--bind', '0.0.0.0:5002' ]
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
@@ -48,11 +48,11 @@ services:
|
||||
capabilities: [ 'gpu' ]
|
||||
driver: 'nvidia'
|
||||
|
||||
discharge_api:
|
||||
ie_discharge:
|
||||
<<: *paddle_template
|
||||
container_name: discharge_api
|
||||
hostname: discharge_api
|
||||
command: [ '-w', '1', 'discharge_api:app', '--bind', '0.0.0.0:5003' ]
|
||||
container_name: ie_discharge
|
||||
hostname: ie_discharge
|
||||
command: [ '-w', '1', 'ie_discharge:app', '--bind', '0.0.0.0:5003' ]
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
@@ -61,11 +61,11 @@ services:
|
||||
capabilities: [ 'gpu' ]
|
||||
driver: 'nvidia'
|
||||
|
||||
cost_api:
|
||||
ie_cost:
|
||||
<<: *paddle_template
|
||||
container_name: cost_api
|
||||
hostname: cost_api
|
||||
command: [ '-w', '1', 'cost_api:app', '--bind', '0.0.0.0:5004' ]
|
||||
container_name: ie_cost
|
||||
hostname: ie_cost
|
||||
command: [ '-w', '1', 'ie_cost:app', '--bind', '0.0.0.0:5004' ]
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
@@ -74,11 +74,11 @@ services:
|
||||
capabilities: [ 'gpu' ]
|
||||
driver: 'nvidia'
|
||||
|
||||
clas_api:
|
||||
clas_orientation:
|
||||
<<: *paddle_template
|
||||
container_name: clas_api
|
||||
hostname: clas_api
|
||||
command: [ '-w', '1', 'clas_api:app', '--bind', '0.0.0.0:5005' ]
|
||||
container_name: clas_orientation
|
||||
hostname: clas_orientation
|
||||
command: [ '-w', '1', 'clas_orientation:app', '--bind', '0.0.0.0:5005' ]
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
@@ -87,11 +87,11 @@ services:
|
||||
capabilities: [ 'gpu' ]
|
||||
driver: 'nvidia'
|
||||
|
||||
det_api:
|
||||
det_book:
|
||||
<<: *paddle_template
|
||||
container_name: det_api
|
||||
hostname: det_api
|
||||
command: [ '-w', '1', 'det_api:app', '--bind', '0.0.0.0:5006' ]
|
||||
container_name: det_book
|
||||
hostname: det_book
|
||||
command: [ '-w', '1', 'det_book:app', '--bind', '0.0.0.0:5006' ]
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
@@ -100,11 +100,11 @@ services:
|
||||
capabilities: [ 'gpu' ]
|
||||
driver: 'nvidia'
|
||||
|
||||
dewarp_api:
|
||||
dewarp:
|
||||
<<: *paddle_template
|
||||
container_name: dewarp_api
|
||||
hostname: dewarp_api
|
||||
command: [ '-w', '1', 'dewarp_api:app', '--bind', '0.0.0.0:5007' ]
|
||||
container_name: dewarp
|
||||
hostname: dewarp
|
||||
command: [ '-w', '1', 'dewarp:app', '--bind', '0.0.0.0:5007' ]
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
|
||||
@@ -185,8 +185,8 @@ def information_extraction(ie, phrecs, identity):
|
||||
target_images = model_util.request_book_areas(img_path) # 识别文档区域并裁剪
|
||||
angle_count = defaultdict(int, {'0': 0}) # 分割后图片的最优角度统计
|
||||
for target_image in target_images:
|
||||
dewarped_image = model_util.request_dewarped_image(target_image) # 去扭曲
|
||||
angles = model_util.request_image_orientation(dewarped_image)
|
||||
dewarped_image = model_util.dewarp(target_image) # 去扭曲
|
||||
angles = model_util.clas_orientation(dewarped_image)
|
||||
|
||||
split_results = image_util.split(dewarped_image)
|
||||
for split_result in split_results:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import logging
|
||||
import logging.config
|
||||
|
||||
from flask import Flask, request
|
||||
from paddleclas import PaddleClas
|
||||
@@ -10,9 +10,9 @@ app = Flask(__name__)
|
||||
CLAS = PaddleClas(model_name='text_image_orientation')
|
||||
|
||||
|
||||
@app.route('/clas/orientation', methods=['POST'])
|
||||
@app.route(rule='/', methods=['POST'])
|
||||
@process_request
|
||||
def orientation():
|
||||
def main():
|
||||
"""
|
||||
判断图片旋转角度,逆时针旋转该角度后为正。可能值['0', '90', '180', '270']
|
||||
:return: 最有可能的两个角度
|
||||
28
services/paddle_services/clas_text.py
Normal file
28
services/paddle_services/clas_text.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import logging.config
|
||||
|
||||
from flask import Flask, request
|
||||
from paddlenlp import Taskflow
|
||||
|
||||
from log import LOGGING_CONFIG
|
||||
from utils import process_request
|
||||
|
||||
app = Flask(__name__)
|
||||
schema = ['基本医保结算单', '出院记录', '费用清单']
|
||||
CLAS = Taskflow('zero_shot_text_classification', model='utc-xbase', schema=schema,
|
||||
task_path='model/text_classification', precision='fp16')
|
||||
|
||||
|
||||
@app.route('/', methods=['POST'])
|
||||
@process_request
|
||||
def main():
|
||||
text = request.form.get('text')
|
||||
cls_result = CLAS(text)
|
||||
cls_result = cls_result[0].get('predictions')[0]
|
||||
if cls_result['score'] < 0.8:
|
||||
raise Exception(f'识别结果置信度过低!text: {text}')
|
||||
return cls_result['label']
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.config.dictConfig(LOGGING_CONFIG)
|
||||
app.run('0.0.0.0', 5008)
|
||||
@@ -1,4 +1,4 @@
|
||||
import logging
|
||||
import logging.config
|
||||
import os.path
|
||||
|
||||
import cv2
|
||||
@@ -11,9 +11,9 @@ from utils import process_request, parse_img_path
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
@app.route('/det/books', methods=['POST'])
|
||||
@app.route('/', methods=['POST'])
|
||||
@process_request
|
||||
def books():
|
||||
def main():
|
||||
img_path = request.form.get('img_path')
|
||||
result = detector.get_book_areas(img_path)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import logging
|
||||
import logging.config
|
||||
import os
|
||||
|
||||
import cv2
|
||||
@@ -11,9 +11,9 @@ from utils import process_request, parse_img_path
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
@app.route('/dewarp', methods=['POST'])
|
||||
@app.route('/', methods=['POST'])
|
||||
@process_request
|
||||
def dewarp():
|
||||
def main():
|
||||
img_path = request.form.get('img_path')
|
||||
img = cv2.imread(img_path)
|
||||
dewarped_img = dewarper.dewarp_image(img)
|
||||
@@ -1,5 +1,5 @@
|
||||
import json
|
||||
import logging
|
||||
import logging.config
|
||||
|
||||
from flask import Flask, request
|
||||
from paddlenlp import Taskflow
|
||||
@@ -14,9 +14,9 @@ COST = Taskflow('information_extraction', schema=COST_LIST_SCHEMA, model='uie-x-
|
||||
task_path='model/cost_list_model', layout_analysis=False, precision='fp16')
|
||||
|
||||
|
||||
@app.route('/nlp/cost', methods=['POST'])
|
||||
@app.route('/', methods=['POST'])
|
||||
@process_request
|
||||
def cost():
|
||||
def main():
|
||||
img_path = request.form.get('img_path')
|
||||
layout = request.form.get('layout')
|
||||
return COST({'doc': img_path, 'layout': json.loads(layout)})
|
||||
@@ -1,5 +1,5 @@
|
||||
import json
|
||||
import logging
|
||||
import logging.config
|
||||
|
||||
from flask import Flask, request
|
||||
from paddlenlp import Taskflow
|
||||
@@ -16,9 +16,9 @@ DISCHARGE = Taskflow('information_extraction', schema=DISCHARGE_RECORD_SCHEMA, m
|
||||
task_path='model/discharge_record_model', layout_analysis=False, precision='fp16')
|
||||
|
||||
|
||||
@app.route('/nlp/discharge', methods=['POST'])
|
||||
@app.route('/', methods=['POST'])
|
||||
@process_request
|
||||
def discharge():
|
||||
def main():
|
||||
img_path = request.form.get('img_path')
|
||||
layout = request.form.get('layout')
|
||||
return DISCHARGE({'doc': img_path, 'layout': json.loads(layout)})
|
||||
@@ -1,5 +1,5 @@
|
||||
import json
|
||||
import logging
|
||||
import logging.config
|
||||
|
||||
from flask import Flask, request
|
||||
from paddlenlp import Taskflow
|
||||
@@ -20,9 +20,9 @@ SETTLEMENT_IE = Taskflow('information_extraction', schema=SETTLEMENT_LIST_SCHEMA
|
||||
task_path='model/settlement_list_model', layout_analysis=False, precision='fp16')
|
||||
|
||||
|
||||
@app.route('/nlp/settlement', methods=['POST'])
|
||||
@app.route('/', methods=['POST'])
|
||||
@process_request
|
||||
def settlement():
|
||||
def main():
|
||||
img_path = request.form.get('img_path')
|
||||
layout = request.form.get('layout')
|
||||
return SETTLEMENT_IE({'doc': img_path, 'layout': json.loads(layout)})
|
||||
@@ -0,0 +1 @@
|
||||
文本分类模型存放目录
|
||||
@@ -1,4 +1,4 @@
|
||||
import logging
|
||||
import logging.config
|
||||
|
||||
from flask import Flask, request
|
||||
from paddleocr import PaddleOCR
|
||||
@@ -10,9 +10,9 @@ app = Flask(__name__)
|
||||
OCR = PaddleOCR(use_angle_cls=False, show_log=False, gpu_id=0, det_db_box_thresh=0.3)
|
||||
|
||||
|
||||
@app.route('/ocr', methods=['POST'])
|
||||
@app.route('/', methods=['POST'])
|
||||
@process_request
|
||||
def ocr():
|
||||
def main():
|
||||
img_path = request.form.get('img_path')
|
||||
return OCR.ocr(img_path, cls=False)
|
||||
|
||||
@@ -36,7 +36,7 @@ def get_ocr_layout(ocr, img_path):
|
||||
return True
|
||||
|
||||
layout = []
|
||||
ocr_result = model_util.request_ocr(img_path)
|
||||
ocr_result = model_util.ocr(img_path)
|
||||
ocr_result = ocr_result[0]
|
||||
if not ocr_result:
|
||||
return layout
|
||||
|
||||
@@ -7,13 +7,13 @@ from tenacity import retry, stop_after_attempt, wait_random
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning('OCR识别失败!'))
|
||||
def request_ocr(img_path):
|
||||
def ocr(img_path):
|
||||
"""
|
||||
请求图片OCR识别接口
|
||||
:param img_path: 待识别图片路径
|
||||
:return: 识别结果
|
||||
"""
|
||||
url = 'http://ocr_api:5001/ocr'
|
||||
url = 'http://ocr:5001'
|
||||
response = requests.post(url, {'img_path': img_path})
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
@@ -23,14 +23,14 @@ def request_ocr(img_path):
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning('抽取基本医保结算单失败!'))
|
||||
def request_discharge_info(img_path, layout):
|
||||
def ie_settlement(img_path, layout):
|
||||
"""
|
||||
请求基本医保结算单信息抽取接口
|
||||
:param img_path: 待抽取图片路径
|
||||
:param layout: 图片ocr信息
|
||||
:return: 抽取结果
|
||||
"""
|
||||
url = 'http://settlement_api:5002/nlp/settlement'
|
||||
url = 'http://ie_settlement:5002'
|
||||
response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)})
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
@@ -40,14 +40,14 @@ def request_discharge_info(img_path, layout):
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning('抽取出院记录失败!'))
|
||||
def request_discharge_info(img_path, layout):
|
||||
def ie_discharge(img_path, layout):
|
||||
"""
|
||||
请求出院记录信息抽取接口
|
||||
:param img_path: 待抽取图片路径
|
||||
:param layout: 图片ocr信息
|
||||
:return: 抽取结果
|
||||
"""
|
||||
url = 'http://discharge_api:5003/nlp/discharge'
|
||||
url = 'http://ie_discharge:5003'
|
||||
response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)})
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
@@ -57,14 +57,14 @@ def request_discharge_info(img_path, layout):
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning('抽取费用清单失败!'))
|
||||
def request_cost_info(img_path, layout):
|
||||
def ie_cost(img_path, layout):
|
||||
"""
|
||||
请求费用清单信息抽取接口
|
||||
:param img_path: 待抽取图片路径
|
||||
:param layout: 图片ocr信息
|
||||
:return: 抽取结果
|
||||
"""
|
||||
url = 'http://cost_api:5004/nlp/cost'
|
||||
url = 'http://ie_cost:5004'
|
||||
response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)})
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
@@ -74,13 +74,13 @@ def request_cost_info(img_path, layout):
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning('获取图片方向失败!'))
|
||||
def request_image_orientation(img_path):
|
||||
def clas_orientation(img_path):
|
||||
"""
|
||||
请求图片方向分类接口
|
||||
:param img_path: 待分类图片路径
|
||||
:return: 最有可能的两个图片方向
|
||||
"""
|
||||
url = 'http://clas_api:5005/clas/orientation'
|
||||
url = 'http://clas_orientation:5005'
|
||||
response = requests.post(url, {'img_path': img_path})
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
@@ -90,13 +90,13 @@ def request_image_orientation(img_path):
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning('获取文档区域失败!'))
|
||||
def request_book_areas(img_path):
|
||||
def det_book(img_path):
|
||||
"""
|
||||
请求文档区域识别接口
|
||||
:param img_path: 待识别图片路径
|
||||
:return: 文档图片路径列表
|
||||
"""
|
||||
url = 'http://det_api:5006/det/books'
|
||||
url = 'http://det_book:5006'
|
||||
response = requests.post(url, {'img_path': img_path})
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
@@ -106,15 +106,33 @@ def request_book_areas(img_path):
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning('矫正扭曲图片失败!'))
|
||||
def request_dewarped_image(img_path):
|
||||
def dewarp(img_path):
|
||||
"""
|
||||
请求矫正图片接口
|
||||
:param img_path: 待矫正图片路径
|
||||
:return: 矫正后的图片路径
|
||||
"""
|
||||
url = 'http://dewarp_api:5007/dewarp'
|
||||
url = 'http://dewarp:5007'
|
||||
response = requests.post(url, {'img_path': img_path})
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
return img_path
|
||||
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning('文本分类失败!'))
|
||||
def clas_text(text):
|
||||
"""
|
||||
请求文本分类接口
|
||||
:param text: 待分类文本
|
||||
:return: 分类结果
|
||||
"""
|
||||
if not text:
|
||||
return None
|
||||
url = 'http://clas_text:5008'
|
||||
response = requests.post(url, {'text': text})
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user