项目架构调整,模型全部采用接口调用

This commit is contained in:
2024-09-25 14:46:37 +08:00
parent 7647df7d74
commit b8c1202957
25 changed files with 467 additions and 222 deletions

34
services/__init__.py Normal file
View File

@@ -0,0 +1,34 @@
"""
信息抽取关键词配置
"""
# 患者姓名
PATIENT_NAME = ['患者姓名']
# 入院日期
ADMISSION_DATE = ['入院日期']
# 出院日期
DISCHARGE_DATE = ['出院日期']
# 发生医疗费
MEDICAL_EXPENSES = ['费用总额']
# 个人现金支付
PERSONAL_CASH_PAYMENT = ['个人现金支付']
# 个人账户支付
PERSONAL_ACCOUNT_PAYMENT = ['个人账户支付']
# 个人自费金额
PERSONAL_FUNDED_AMOUNT = ['自费金额', '个人自费']
# 医保类别
MEDICAL_INSURANCE_TYPE = ['医保类型']
# 就诊医院
HOSPITAL = ['医院']
# 就诊科室
DEPARTMENT = ['科室']
# 主治医生
DOCTOR = ['主治医生']
# 住院号
ADMISSION_ID = ['住院号']
# 医保结算单号码
SETTLEMENT_ID = ['医保结算单号码']
# 年龄
AGE = ['年龄']
# 大写总额
UPPERCASE_MEDICAL_EXPENSES = ['大写总额']

26
services/clas_api.py Normal file
View File

@@ -0,0 +1,26 @@
from flask import Flask, request
from paddleclas import PaddleClas
from utils import process_request
app = Flask(__name__)
CLAS = PaddleClas(model_name='text_image_orientation')
@app.route('/clas/orientation', methods=['POST'])
@process_request
def orientation():
"""
判断图片旋转角度,逆时针旋转该角度后为正。可能值['0', '90', '180', '270']
:return: 最有可能的两个角度
"""
img_path = request.form.get('img_path')
clas_result = CLAS.predict(input_data=img_path)
clas_result = next(clas_result)[0]
if clas_result['scores'][0] < 0.5:
return ['0', '90']
return clas_result['label_names']
if __name__ == '__main__':
app.run('0.0.0.0', 5005)

24
services/cost_api.py Normal file
View File

@@ -0,0 +1,24 @@
import json
from flask import Flask, request
from paddlenlp import Taskflow
from utils import process_request
from . import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES
app = Flask(__name__)
COST_LIST_SCHEMA = PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES
COST = Taskflow('information_extraction', schema=COST_LIST_SCHEMA, model='uie-x-base',
task_path='../model/cost_list_model', layout_analysis=False, precision='fp16')
@app.route('/nlp/cost', methods=['POST'])
@process_request
def cost():
img_path = request.form.get('img_path')
layout = request.form.get('layout')
return COST({'doc': img_path, 'layout': json.loads(layout)})
if __name__ == '__main__':
app.run('0.0.0.0', 5004)

28
services/det_api.py Normal file
View File

@@ -0,0 +1,28 @@
import os.path
import cv2
from flask import Flask, request
from paddle_services.paddle_detection import detector
from utils import process_request, parse_img_path
app = Flask(__name__)
@app.route('/det/books', methods=['POST'])
@process_request
def books():
img_path = request.form.get('img_path')
result = detector.get_book_areas(img_path)
dirname, img_name, ext = parse_img_path(img_path)
books_path = []
for i in range(len(result)):
save_path = os.path.join(dirname, img_name + '_book_' + str(i) + '.' + ext)
cv2.imwrite(save_path, result[i])
books_path.append(save_path)
return books_path
if __name__ == '__main__':
app.run('0.0.0.0', 5006)

25
services/dewarp_api.py Normal file
View File

@@ -0,0 +1,25 @@
import os
import cv2
from flask import Flask, request
from paddle_services.doc_dewarp import dewarper
from utils import process_request, parse_img_path
app = Flask(__name__)
@app.route('/dewarp', methods=['POST'])
@process_request
def dewarp():
img_path = request.form.get('img_path')
img = cv2.imread(img_path)
dewarped_img = dewarper.dewarp_image(img)
dirname, img_name, ext = parse_img_path(img_path)
save_path = os.path.join(dirname, img_name + '_dewarped.' + ext)
cv2.imwrite(save_path, dewarped_img)
return save_path
if __name__ == '__main__':
app.run('0.0.0.0', 5007)

26
services/discharge_api.py Normal file
View File

@@ -0,0 +1,26 @@
import json
from flask import Flask, request
from paddlenlp import Taskflow
from utils import process_request
from . import HOSPITAL, DEPARTMENT, PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, DOCTOR, ADMISSION_ID, AGE
app = Flask(__name__)
DISCHARGE_RECORD_SCHEMA = (
HOSPITAL + DEPARTMENT + PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + DOCTOR + ADMISSION_ID + AGE
)
DISCHARGE = Taskflow('information_extraction', schema=DISCHARGE_RECORD_SCHEMA, model='uie-x-base',
task_path='../model/discharge_record_model', layout_analysis=False, precision='fp16')
@app.route('/nlp/discharge', methods=['POST'])
@process_request
def discharge():
img_path = request.form.get('img_path')
layout = request.form.get('layout')
return DISCHARGE({'doc': img_path, 'layout': json.loads(layout)})
if __name__ == '__main__':
app.run('0.0.0.0', 5003)

18
services/ocr_api.py Normal file
View File

@@ -0,0 +1,18 @@
from flask import Flask, request
from paddleocr import PaddleOCR
from utils import process_request
app = Flask(__name__)
OCR = PaddleOCR(use_angle_cls=False, show_log=False, gpu_id=0, det_db_box_thresh=0.3)
@app.route('/ocr', methods=['POST'])
@process_request
def ocr():
img_path = request.form.get('img_path')
return OCR.ocr(img_path, cls=False)
if __name__ == '__main__':
app.run('0.0.0.0', 5001)

View File

@@ -0,0 +1,29 @@
# 使用官方的paddle镜像作为基础
FROM registry.baidubce.com/paddlepaddle/paddle:2.6.1-gpu-cuda12.0-cudnn8.9-trt8.6
# 设置工作目录
WORKDIR /app
# 设置环境变量
ENV PYTHONUNBUFFERED=1 \
# 设置时区
TZ=Asia/Shanghai \
# 设置pip镜像地址加快安装速度
PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
# 安装依赖
COPY requirements.txt /app/requirements.txt
COPY packages /app/packages
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo '$TZ' > /etc/timezone \
&& pip install --no-cache-dir -r requirements.txt \
&& pip uninstall -y onnxruntime onnxruntime-gpu \
&& pip install onnxruntime-gpu==1.18.0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
# 将当前目录内容复制到容器的/app内
COPY . /app
# 暴露端口
# EXPOSE 8081
# 运行api接口具体接口在命令行或docker-compose.yml文件中定义
ENTRYPOINT ["gunicorn"]

View File

View File

@@ -1,4 +1,7 @@
import os.path
from onnxruntime import InferenceSession
DOC_TR = InferenceSession("model/dewarp_model/doc_tr_pp.onnx",
providers=["CUDAExecutionProvider"], provider_options=[{"device_id": 0}])
MODEL_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))),
'model', 'dewarp_model', 'doc_tr_pp.onnx')
DOC_TR = InferenceSession(MODEL_PATH, providers=['CUDAExecutionProvider'], provider_options=[{'device_id': 0}])

View File

@@ -11,10 +11,10 @@ def dewarp_image(image):
y = to_tensor(image)
img = np.transpose(img, (2, 0, 1))
bm = DOC_TR.run(None, {"image": img[None,]})[0]
bm = DOC_TR.run(None, {'image': img[None,]})[0]
bm = paddle.to_tensor(bm)
bm = paddle.nn.functional.interpolate(
bm, y.shape[2:], mode="bilinear", align_corners=False
bm, y.shape[2:], mode='bilinear', align_corners=False
)
bm_nhwc = np.transpose(bm, (0, 2, 3, 1))
out = paddle.nn.functional.grid_sample(y, (bm_nhwc / 288 - 0.5) * 2)

View File

@@ -1,4 +1,8 @@
import os
from onnxruntime import InferenceSession
PADDLE_DET = InferenceSession("model/object_det_model/ppyoloe_plus_crn_l_80e_coco_w_nms.onnx",
providers=["CPUExecutionProvider"], provider_options=[{"device_id": 0}])
MODEL_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))),
'model', 'object_det_model')
PADDLE_DET = InferenceSession(os.path.join(MODEL_DIR, 'ppyoloe_plus_crn_l_80e_coco_w_nms.onnx'),
providers=['CPUExecutionProvider'], provider_options=[{'device_id': 0}])

View File

@@ -1,13 +1,13 @@
import tempfile
import os.path
from collections import defaultdict
import cv2
import numpy as np
from paddle_detection import PADDLE_DET
from paddle_detection.deploy.third_engine.onnx.infer import PredictConfig
from paddle_detection.deploy.third_engine.onnx.preprocess import Compose
from util import image_util, common_util
from util import image_util
from . import PADDLE_DET, MODEL_DIR
from .deploy.third_engine.onnx.infer import PredictConfig
from .deploy.third_engine.onnx.preprocess import Compose
def predict_image(infer_config, predictor, img_path):
@@ -15,7 +15,7 @@ def predict_image(infer_config, predictor, img_path):
transforms = Compose(infer_config.preprocess_infos)
# predict image
inputs = transforms(img_path)
inputs["image"] = np.array(inputs["image"]).astype('float32')
inputs['image'] = np.array(inputs['image']).astype('float32')
inputs_name = [var.name for var in predictor.get_inputs()]
inputs = {k: inputs[k][None,] for k in inputs_name}
@@ -25,25 +25,23 @@ def predict_image(infer_config, predictor, img_path):
result = defaultdict(list)
for bbox in bboxes:
if bbox[0] > -1 and bbox[1] > infer_config.draw_threshold:
result[bbox[0]].append({"score": bbox[1], "box": bbox[2:]})
result[bbox[0]].append({'score': bbox[1], 'box': bbox[2:]})
return result
def detect_image(img_path):
infer_cfg = "model/object_det_model/infer_cfg.yml"
infer_cfg = os.path.join(MODEL_DIR, 'infer_cfg.yml')
# load infer config
infer_config = PredictConfig(infer_cfg)
return predict_image(infer_config, PADDLE_DET, img_path)
def get_book_areas(image):
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
cv2.imwrite(temp_file.name, image)
detect_result = detect_image(temp_file.name)
common_util.delete_temp_file(temp_file.name)
def get_book_areas(img_path):
detect_result = detect_image(img_path)
book_areas = detect_result[73]
result = []
image = cv2.imread(img_path)
for book_area in book_areas:
result.append(image_util.capture(image, book_area["box"]))
result.append(image_util.capture(image, book_area['box']))
return result

View File

@@ -0,0 +1,16 @@
numpy==1.26.4
onnxconverter-common==1.14.0
OpenCC==1.1.6
opencv-python==4.6.0.66
paddle2onnx==1.2.3
paddleclas==2.5.2
paddlenlp==2.6.1
paddleocr==2.7.3
pillow==10.4.0
pymysql==1.1.1
requests==2.32.3
sqlacodegen==2.3.0.post1
sqlalchemy==1.4.52
tenacity==8.5.0
ufile==3.2.9
zxing-cpp==2.2.0

View File

@@ -0,0 +1,30 @@
import json
from flask import Flask, request
from paddlenlp import Taskflow
from utils import process_request
from . import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, ADMISSION_ID, SETTLEMENT_ID, \
UPPERCASE_MEDICAL_EXPENSES
app = Flask(__name__)
SETTLEMENT_LIST_SCHEMA = (
PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES + PERSONAL_CASH_PAYMENT
+ PERSONAL_ACCOUNT_PAYMENT + PERSONAL_FUNDED_AMOUNT + MEDICAL_INSURANCE_TYPE + ADMISSION_ID + SETTLEMENT_ID
+ UPPERCASE_MEDICAL_EXPENSES
)
SETTLEMENT_IE = Taskflow('information_extraction', schema=SETTLEMENT_LIST_SCHEMA, model='uie-x-base',
task_path='../model/settlement_list_model', layout_analysis=False, precision='fp16')
@app.route('/nlp/settlement', methods=['POST'])
@process_request
def settlement():
img_path = request.form.get('img_path')
layout = request.form.get('layout')
return SETTLEMENT_IE({'doc': img_path, 'layout': json.loads(layout)})
if __name__ == '__main__':
app.run('0.0.0.0', 5002)

26
services/utils.py Normal file
View File

@@ -0,0 +1,26 @@
import logging
import os
from flask import jsonify
def process_request(func):
"""
api通用处理函数
"""
def wrapper(*args, **kwargs):
try:
result = func(*args, **kwargs)
return jsonify(result), 200
except Exception as e:
logging.getLogger('error').error(f'Error: {e}', exc_info=e)
return jsonify({'error': str(e)}), 500
return wrapper
def parse_img_path(img_path):
dirname = os.path.dirname(img_path)
img_name, ext = os.path.basename(img_path).rsplit('.', 1)
return dirname, img_name, ext