项目架构调整,模型全部采用接口调用

This commit is contained in:
2024-09-25 14:46:37 +08:00
parent 7647df7d74
commit b8c1202957
25 changed files with 467 additions and 222 deletions

View File

@@ -1,5 +1,5 @@
# 使用官方的paddle镜像作为基础 # 使用官方的paddle镜像作为基础
FROM registry.baidubce.com/paddlepaddle/paddle:2.6.1-gpu-cuda12.0-cudnn8.9-trt8.6 FROM python:3.10-alpine
# 设置工作目录 # 设置工作目录
WORKDIR /app WORKDIR /app
@@ -15,9 +15,7 @@ ENV PYTHONUNBUFFERED=1 \
COPY requirements.txt /app/requirements.txt COPY requirements.txt /app/requirements.txt
COPY packages /app/packages COPY packages /app/packages
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo '$TZ' > /etc/timezone \ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo '$TZ' > /etc/timezone \
&& pip install --no-cache-dir -r requirements.txt \ && pip install --no-cache-dir -r requirements.txt
&& pip uninstall -y onnxruntime onnxruntime-gpu \
&& pip install onnxruntime-gpu==1.18.0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
# 将当前目录内容复制到容器的/app内 # 将当前目录内容复制到容器的/app内
COPY . /app COPY . /app

View File

@@ -126,3 +126,5 @@ bash update.sh
2. 新增扭曲矫正功能 2. 新增扭曲矫正功能
21. 版本号1.14.0 21. 版本号1.14.0
1. 新增二维码识别替换高清图片功能 1. 新增二维码识别替换高清图片功能
22. 版本号2.0.0
1. 项目架构调整,模型全部采用接口调用

View File

@@ -1,27 +1,69 @@
x-env: x-base:
&template &base_template
image: fcb_photo_review:1.14.6
restart: always restart: always
x-review: x-project:
&review_template &project_template
<<: *template <<: *base_template
image: fcb_photo_review:1.14.6
volumes:
- ./log:/app/log
x-paddle:
&paddle_template
<<: *base_template
image: fcb_paddle:0.0.1
volumes: volumes:
- ./log:/app/log - ./log:/app/log
- ./model:/app/model - ./model:/app/model
services:
ocr_api:
<<: *paddle_template
build:
context: ./services/paddle_services
container_name: ocr_api
hostname: ocr_api
command: [ '-w 5 ./services/ocr_api:app --bind 0.0.0.0:5001' ]
deploy: deploy:
resources: resources:
reservations: reservations:
devices: devices:
- device_ids: [ '0', '1' ] - device_ids: [ '0' ]
capabilities: [ 'gpu' ] capabilities: [ 'gpu' ]
driver: 'nvidia' driver: 'nvidia'
x-mask: settlement_api:
&mask_template <<: *paddle_template
<<: *template container_name: settlement_api
volumes: hostname: settlement_api
- ./log:/app/log command: [ '-w 5 ./services/settlement_api:app --bind 0.0.0.0:5002' ]
deploy:
resources:
reservations:
devices:
- device_ids: [ '0' ]
capabilities: [ 'gpu' ]
driver: 'nvidia'
discharge_api:
<<: *paddle_template
container_name: discharge_api
hostname: discharge_api
command: [ '-w 5 ./services/discharge_api:app --bind 0.0.0.0:5003' ]
deploy:
resources:
reservations:
devices:
- device_ids: [ '0' ]
capabilities: [ 'gpu' ]
driver: 'nvidia'
cost_api:
<<: *paddle_template
container_name: cost_api
hostname: cost_api
command: [ '-w 5 ./services/cost_api:app --bind 0.0.0.0:5004' ]
deploy: deploy:
resources: resources:
reservations: reservations:
@@ -30,17 +72,37 @@ x-mask:
capabilities: [ 'gpu' ] capabilities: [ 'gpu' ]
driver: 'nvidia' driver: 'nvidia'
services: clas_api:
<<: *paddle_template
container_name: clas_api
hostname: clas_api
command: [ '-w 5 ./services/clas_api:app --bind 0.0.0.0:5005' ]
deploy:
resources:
reservations:
devices:
- device_ids: [ '0' ]
capabilities: [ 'gpu' ]
driver: 'nvidia'
det_api: det_api:
<<: *template <<: *paddle_template
build:
context: .
container_name: det_api container_name: det_api
hostname: det_api hostname: det_api
volumes: command: [ '-w 5 ./services/det_api:app --bind 0.0.0.0:5006' ]
- ./log:/app/log deploy:
- ./model:/app/model resources:
# command: [ 'det_api.py' ] reservations:
devices:
- device_ids: [ '1' ]
capabilities: [ 'gpu' ]
driver: 'nvidia'
dewarp_api:
<<: *paddle_template
container_name: dewarp_api
hostname: dewarp_api
command: [ '-w 5 ./services/dewarp_api:app --bind 0.0.0.0:5007' ]
deploy: deploy:
resources: resources:
reservations: reservations:
@@ -50,15 +112,23 @@ services:
driver: 'nvidia' driver: 'nvidia'
photo_review_1: photo_review_1:
<<: *review_template <<: *project_template
build:
context: .
container_name: photo_review_1 container_name: photo_review_1
hostname: photo_review_1 hostname: photo_review_1
depends_on: depends_on:
- ocr_api
- settlement_api
- discharge_api
- cost_api
- clas_api
- det_api - det_api
- dewarp_api
command: [ 'photo_review.py', '--clean', 'True' ] command: [ 'photo_review.py', '--clean', 'True' ]
photo_review_2: photo_review_2:
<<: *review_template <<: *project_template
container_name: photo_review_2 container_name: photo_review_2
hostname: photo_review_2 hostname: photo_review_2
depends_on: depends_on:
@@ -66,7 +136,7 @@ services:
command: [ 'photo_review.py' ] command: [ 'photo_review.py' ]
photo_review_3: photo_review_3:
<<: *review_template <<: *project_template
container_name: photo_review_3 container_name: photo_review_3
hostname: photo_review_3 hostname: photo_review_3
depends_on: depends_on:
@@ -74,7 +144,7 @@ services:
command: [ 'photo_review.py' ] command: [ 'photo_review.py' ]
photo_review_4: photo_review_4:
<<: *review_template <<: *project_template
container_name: photo_review_4 container_name: photo_review_4
hostname: photo_review_4 hostname: photo_review_4
depends_on: depends_on:
@@ -82,7 +152,7 @@ services:
command: [ 'photo_review.py' ] command: [ 'photo_review.py' ]
photo_review_5: photo_review_5:
<<: *review_template <<: *project_template
container_name: photo_review_5 container_name: photo_review_5
hostname: photo_review_5 hostname: photo_review_5
depends_on: depends_on:
@@ -90,33 +160,23 @@ services:
command: [ 'photo_review.py' ] command: [ 'photo_review.py' ]
photo_mask_1: photo_mask_1:
<<: *mask_template <<: *project_template
container_name: photo_mask_1 container_name: photo_mask_1
hostname: photo_mask_1 hostname: photo_mask_1
depends_on: depends_on:
- photo_review_5 - ocr_api
- settlement_api
- discharge_api
- cost_api
- clas_api
- det_api
- dewarp_api
command: [ 'photo_mask.py', '--clean', 'True' ] command: [ 'photo_mask.py', '--clean', 'True' ]
photo_mask_2: photo_mask_2:
<<: *mask_template <<: *project_template
container_name: photo_mask_2 container_name: photo_mask_2
hostname: photo_mask_2 hostname: photo_mask_2
depends_on: depends_on:
- photo_mask_1 - photo_mask_1
command: [ 'photo_mask.py' ] command: [ 'photo_mask.py' ]
#
# photo_review_6:
# <<: *review_template
# container_name: photo_review_6
# hostname: photo_review_6
# depends_on:
# - photo_mask_2
# command: [ 'photo_review.py' ]
#
# photo_review_7:
# <<: *review_template
# container_name: photo_review_7
# hostname: photo_review_7
# depends_on:
# - photo_review_6
# command: [ 'photo_review.py' ]

View File

@@ -1,6 +1,4 @@
import jieba import jieba
from paddlenlp import Taskflow
from paddleocr import PaddleOCR
''' '''
项目配置 项目配置
@@ -11,52 +9,6 @@ PHHD_BATCH_SIZE = 10
SLEEP_MINUTES = 5 SLEEP_MINUTES = 5
# 是否发送报错邮件 # 是否发送报错邮件
SEND_ERROR_EMAIL = True SEND_ERROR_EMAIL = True
# 是否开启布局分析
LAYOUT_ANALYSIS = False
"""
信息抽取关键词配置
"""
# 患者姓名
PATIENT_NAME = ['患者姓名']
# 入院日期
ADMISSION_DATE = ['入院日期']
# 出院日期
DISCHARGE_DATE = ['出院日期']
# 发生医疗费
MEDICAL_EXPENSES = ['费用总额']
# 个人现金支付
PERSONAL_CASH_PAYMENT = ['个人现金支付']
# 个人账户支付
PERSONAL_ACCOUNT_PAYMENT = ['个人账户支付']
# 个人自费金额
PERSONAL_FUNDED_AMOUNT = ['自费金额', '个人自费']
# 医保类别
MEDICAL_INSURANCE_TYPE = ['医保类型']
# 就诊医院
HOSPITAL = ['医院']
# 就诊科室
DEPARTMENT = ['科室']
# 主治医生
DOCTOR = ['主治医生']
# 住院号
ADMISSION_ID = ['住院号']
# 医保结算单号码
SETTLEMENT_ID = ['医保结算单号码']
# 年龄
AGE = ['年龄']
# 大写总额
UPPERCASE_MEDICAL_EXPENSES = ['大写总额']
SETTLEMENT_LIST_SCHEMA = \
(PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES + PERSONAL_CASH_PAYMENT
+ PERSONAL_ACCOUNT_PAYMENT + PERSONAL_FUNDED_AMOUNT + MEDICAL_INSURANCE_TYPE + ADMISSION_ID + SETTLEMENT_ID
+ UPPERCASE_MEDICAL_EXPENSES)
DISCHARGE_RECORD_SCHEMA = \
HOSPITAL + DEPARTMENT + PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + DOCTOR + ADMISSION_ID + AGE
COST_LIST_SCHEMA = PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES
''' '''
别名配置 别名配置
@@ -90,15 +42,3 @@ jieba.suggest_freq(('骨', '伤'), True)
jieba.suggest_freq(('感染', ''), True) jieba.suggest_freq(('感染', ''), True)
jieba.suggest_freq(('', ''), True) jieba.suggest_freq(('', ''), True)
jieba.suggest_freq(('', ''), True) jieba.suggest_freq(('', ''), True)
'''
模型配置
'''
SETTLEMENT_IE = Taskflow('information_extraction', schema=SETTLEMENT_LIST_SCHEMA, model='uie-x-base',
task_path='model/settlement_list_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
DISCHARGE_IE = Taskflow('information_extraction', schema=DISCHARGE_RECORD_SCHEMA, model='uie-x-base',
task_path='model/discharge_record_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
COST_IE = Taskflow('information_extraction', schema=COST_LIST_SCHEMA, model='uie-x-base', device_id=1,
task_path='model/cost_list_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
OCR = PaddleOCR(use_angle_cls=False, show_log=False, gpu_id=1, det_db_box_thresh=0.3)

View File

@@ -18,15 +18,15 @@ from sqlalchemy import update
from db import MysqlSession from db import MysqlSession
from db.mysql import BdYljg, BdYlks, ZxIeResult, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec from db.mysql import BdYljg, BdYlks, ZxIeResult, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec
from log import HOSTNAME from log import HOSTNAME
from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \ from photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, \
DEPARTMENT_FILTER
from services import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \ PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \
ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \ ADMISSION_ID, SETTLEMENT_ID, AGE, UPPERCASE_MEDICAL_EXPENSES
UPPERCASE_MEDICAL_EXPENSES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, DEPARTMENT_FILTER
from ucloud import ufile from ucloud import ufile
from util import image_util, common_util, html_util, model_util from util import image_util, common_util, html_util, model_util
from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \ from util.data_util import handle_date, handle_decimal, parse_department, handle_name, handle_insurance_type, \
handle_insurance_type, handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, \ handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, parse_hospital
parse_hospital
# 合并信息抽取结果 # 合并信息抽取结果

View File

@@ -1,16 +1,11 @@
numpy==1.26.4 numpy==1.26.4
onnxconverter-common==1.14.0 OpenCC==1.1.6 # 中文繁简转换
OpenCC==1.1.6
opencv-python==4.6.0.66 opencv-python==4.6.0.66
paddle2onnx==1.2.3
paddleclas==2.5.2
paddlenlp==2.6.1
paddleocr==2.7.3
pillow==10.4.0 pillow==10.4.0
pymysql==1.1.1 pymysql==1.1.1
requests==2.32.3 requests==2.32.3
sqlacodegen==2.3.0.post1 sqlacodegen==2.3.0.post1 # 实体类生成
sqlalchemy==1.4.52 sqlalchemy==1.4.52 # ORM框架
tenacity==8.5.0 tenacity==8.5.0 # 重试
ufile==3.2.9 ufile==3.2.9 # 云空间
zxing-cpp==2.2.0 zxing-cpp==2.2.0 # 二维码识别

34
services/__init__.py Normal file
View File

@@ -0,0 +1,34 @@
"""
信息抽取关键词配置
"""
# 患者姓名
PATIENT_NAME = ['患者姓名']
# 入院日期
ADMISSION_DATE = ['入院日期']
# 出院日期
DISCHARGE_DATE = ['出院日期']
# 发生医疗费
MEDICAL_EXPENSES = ['费用总额']
# 个人现金支付
PERSONAL_CASH_PAYMENT = ['个人现金支付']
# 个人账户支付
PERSONAL_ACCOUNT_PAYMENT = ['个人账户支付']
# 个人自费金额
PERSONAL_FUNDED_AMOUNT = ['自费金额', '个人自费']
# 医保类别
MEDICAL_INSURANCE_TYPE = ['医保类型']
# 就诊医院
HOSPITAL = ['医院']
# 就诊科室
DEPARTMENT = ['科室']
# 主治医生
DOCTOR = ['主治医生']
# 住院号
ADMISSION_ID = ['住院号']
# 医保结算单号码
SETTLEMENT_ID = ['医保结算单号码']
# 年龄
AGE = ['年龄']
# 大写总额
UPPERCASE_MEDICAL_EXPENSES = ['大写总额']

View File

@@ -1,26 +1,26 @@
from flask import Flask, request from flask import Flask, request
from paddleclas import PaddleClas from paddleclas import PaddleClas
from util.common_util import process_request from utils import process_request
app = Flask(__name__) app = Flask(__name__)
CLAS = PaddleClas(model_name="text_image_orientation") CLAS = PaddleClas(model_name='text_image_orientation')
@app.route('/clas/orientation', methods=['POST']) @app.route('/clas/orientation', methods=['POST'])
@process_request @process_request
def orientation(): def orientation():
""" """
判断图片旋转角度逆时针旋转该角度后为正可能值["0", "90", "180", "270"] 判断图片旋转角度逆时针旋转该角度后为正可能值['0', '90', '180', '270']
:return: 最有可能的两个角度 :return: 最有可能的两个角度
""" """
img_path = request.form.get('img_path') img_path = request.form.get('img_path')
clas_result = CLAS.predict(input_data=img_path) clas_result = CLAS.predict(input_data=img_path)
clas_result = next(clas_result)[0] clas_result = next(clas_result)[0]
if clas_result["scores"][0] < 0.5: if clas_result['scores'][0] < 0.5:
return ['0', '90'] return ['0', '90']
return clas_result["label_names"] return clas_result['label_names']
if __name__ == '__main__': if __name__ == '__main__':
app.run('0.0.0.0', 5002) app.run('0.0.0.0', 5005)

24
services/cost_api.py Normal file
View File

@@ -0,0 +1,24 @@
import json
from flask import Flask, request
from paddlenlp import Taskflow
from utils import process_request
from . import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES
app = Flask(__name__)
COST_LIST_SCHEMA = PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES
COST = Taskflow('information_extraction', schema=COST_LIST_SCHEMA, model='uie-x-base',
task_path='../model/cost_list_model', layout_analysis=False, precision='fp16')
@app.route('/nlp/cost', methods=['POST'])
@process_request
def cost():
img_path = request.form.get('img_path')
layout = request.form.get('layout')
return COST({'doc': img_path, 'layout': json.loads(layout)})
if __name__ == '__main__':
app.run('0.0.0.0', 5004)

View File

@@ -3,9 +3,8 @@ import os.path
import cv2 import cv2
from flask import Flask, request from flask import Flask, request
from paddle_detection import detector from paddle_services.paddle_detection import detector
from util import image_util from utils import process_request, parse_img_path
from util.common_util import process_request
app = Flask(__name__) app = Flask(__name__)
@@ -14,10 +13,9 @@ app = Flask(__name__)
@process_request @process_request
def books(): def books():
img_path = request.form.get('img_path') img_path = request.form.get('img_path')
image = cv2.imread(img_path) result = detector.get_book_areas(img_path)
result = detector.get_book_areas(image)
dirname, img_name, ext = image_util.parse_path(img_path) dirname, img_name, ext = parse_img_path(img_path)
books_path = [] books_path = []
for i in range(len(result)): for i in range(len(result)):
save_path = os.path.join(dirname, img_name + '_book_' + str(i) + '.' + ext) save_path = os.path.join(dirname, img_name + '_book_' + str(i) + '.' + ext)
@@ -27,4 +25,4 @@ def books():
if __name__ == '__main__': if __name__ == '__main__':
app.run('0.0.0.0', 5000) app.run('0.0.0.0', 5006)

View File

@@ -3,9 +3,8 @@ import os
import cv2 import cv2
from flask import Flask, request from flask import Flask, request
from doc_dewarp import dewarp from paddle_services.doc_dewarp import dewarper
from util import image_util from utils import process_request, parse_img_path
from util.common_util import process_request
app = Flask(__name__) app = Flask(__name__)
@@ -15,12 +14,12 @@ app = Flask(__name__)
def dewarp(): def dewarp():
img_path = request.form.get('img_path') img_path = request.form.get('img_path')
img = cv2.imread(img_path) img = cv2.imread(img_path)
dewarped_img = dewarp.dewarp_image(img) dewarped_img = dewarper.dewarp_image(img)
dirname, img_name, ext = image_util.parse_path(img_path) dirname, img_name, ext = parse_img_path(img_path)
save_path = os.path.join(dirname, img_name + '_dewarped.' + ext) save_path = os.path.join(dirname, img_name + '_dewarped.' + ext)
cv2.imwrite(save_path, dewarped_img) cv2.imwrite(save_path, dewarped_img)
return save_path return save_path
if __name__ == '__main__': if __name__ == '__main__':
app.run('0.0.0.0', 5001) app.run('0.0.0.0', 5007)

26
services/discharge_api.py Normal file
View File

@@ -0,0 +1,26 @@
import json
from flask import Flask, request
from paddlenlp import Taskflow
from utils import process_request
from . import HOSPITAL, DEPARTMENT, PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, DOCTOR, ADMISSION_ID, AGE
app = Flask(__name__)
DISCHARGE_RECORD_SCHEMA = (
HOSPITAL + DEPARTMENT + PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + DOCTOR + ADMISSION_ID + AGE
)
DISCHARGE = Taskflow('information_extraction', schema=DISCHARGE_RECORD_SCHEMA, model='uie-x-base',
task_path='../model/discharge_record_model', layout_analysis=False, precision='fp16')
@app.route('/nlp/discharge', methods=['POST'])
@process_request
def discharge():
img_path = request.form.get('img_path')
layout = request.form.get('layout')
return DISCHARGE({'doc': img_path, 'layout': json.loads(layout)})
if __name__ == '__main__':
app.run('0.0.0.0', 5003)

18
services/ocr_api.py Normal file
View File

@@ -0,0 +1,18 @@
from flask import Flask, request
from paddleocr import PaddleOCR
from utils import process_request
app = Flask(__name__)
OCR = PaddleOCR(use_angle_cls=False, show_log=False, gpu_id=0, det_db_box_thresh=0.3)
@app.route('/ocr', methods=['POST'])
@process_request
def ocr():
img_path = request.form.get('img_path')
return OCR.ocr(img_path, cls=False)
if __name__ == '__main__':
app.run('0.0.0.0', 5001)

View File

@@ -0,0 +1,29 @@
# 使用官方的paddle镜像作为基础
FROM registry.baidubce.com/paddlepaddle/paddle:2.6.1-gpu-cuda12.0-cudnn8.9-trt8.6
# 设置工作目录
WORKDIR /app
# 设置环境变量
ENV PYTHONUNBUFFERED=1 \
# 设置时区
TZ=Asia/Shanghai \
# 设置pip镜像地址加快安装速度
PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
# 安装依赖
COPY requirements.txt /app/requirements.txt
COPY packages /app/packages
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo '$TZ' > /etc/timezone \
&& pip install --no-cache-dir -r requirements.txt \
&& pip uninstall -y onnxruntime onnxruntime-gpu \
&& pip install onnxruntime-gpu==1.18.0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
# 将当前目录内容复制到容器的/app内
COPY . /app
# 暴露端口
# EXPOSE 8081
# 运行api接口具体接口在命令行或docker-compose.yml文件中定义
ENTRYPOINT ["gunicorn"]

View File

@@ -1,4 +1,7 @@
import os.path
from onnxruntime import InferenceSession from onnxruntime import InferenceSession
DOC_TR = InferenceSession("model/dewarp_model/doc_tr_pp.onnx", MODEL_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))),
providers=["CUDAExecutionProvider"], provider_options=[{"device_id": 0}]) 'model', 'dewarp_model', 'doc_tr_pp.onnx')
DOC_TR = InferenceSession(MODEL_PATH, providers=['CUDAExecutionProvider'], provider_options=[{'device_id': 0}])

View File

@@ -11,10 +11,10 @@ def dewarp_image(image):
y = to_tensor(image) y = to_tensor(image)
img = np.transpose(img, (2, 0, 1)) img = np.transpose(img, (2, 0, 1))
bm = DOC_TR.run(None, {"image": img[None,]})[0] bm = DOC_TR.run(None, {'image': img[None,]})[0]
bm = paddle.to_tensor(bm) bm = paddle.to_tensor(bm)
bm = paddle.nn.functional.interpolate( bm = paddle.nn.functional.interpolate(
bm, y.shape[2:], mode="bilinear", align_corners=False bm, y.shape[2:], mode='bilinear', align_corners=False
) )
bm_nhwc = np.transpose(bm, (0, 2, 3, 1)) bm_nhwc = np.transpose(bm, (0, 2, 3, 1))
out = paddle.nn.functional.grid_sample(y, (bm_nhwc / 288 - 0.5) * 2) out = paddle.nn.functional.grid_sample(y, (bm_nhwc / 288 - 0.5) * 2)

View File

@@ -1,4 +1,8 @@
import os
from onnxruntime import InferenceSession from onnxruntime import InferenceSession
PADDLE_DET = InferenceSession("model/object_det_model/ppyoloe_plus_crn_l_80e_coco_w_nms.onnx", MODEL_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))),
providers=["CPUExecutionProvider"], provider_options=[{"device_id": 0}]) 'model', 'object_det_model')
PADDLE_DET = InferenceSession(os.path.join(MODEL_DIR, 'ppyoloe_plus_crn_l_80e_coco_w_nms.onnx'),
providers=['CPUExecutionProvider'], provider_options=[{'device_id': 0}])

View File

@@ -1,13 +1,13 @@
import tempfile import os.path
from collections import defaultdict from collections import defaultdict
import cv2 import cv2
import numpy as np import numpy as np
from paddle_detection import PADDLE_DET from util import image_util
from paddle_detection.deploy.third_engine.onnx.infer import PredictConfig from . import PADDLE_DET, MODEL_DIR
from paddle_detection.deploy.third_engine.onnx.preprocess import Compose from .deploy.third_engine.onnx.infer import PredictConfig
from util import image_util, common_util from .deploy.third_engine.onnx.preprocess import Compose
def predict_image(infer_config, predictor, img_path): def predict_image(infer_config, predictor, img_path):
@@ -15,7 +15,7 @@ def predict_image(infer_config, predictor, img_path):
transforms = Compose(infer_config.preprocess_infos) transforms = Compose(infer_config.preprocess_infos)
# predict image # predict image
inputs = transforms(img_path) inputs = transforms(img_path)
inputs["image"] = np.array(inputs["image"]).astype('float32') inputs['image'] = np.array(inputs['image']).astype('float32')
inputs_name = [var.name for var in predictor.get_inputs()] inputs_name = [var.name for var in predictor.get_inputs()]
inputs = {k: inputs[k][None,] for k in inputs_name} inputs = {k: inputs[k][None,] for k in inputs_name}
@@ -25,25 +25,23 @@ def predict_image(infer_config, predictor, img_path):
result = defaultdict(list) result = defaultdict(list)
for bbox in bboxes: for bbox in bboxes:
if bbox[0] > -1 and bbox[1] > infer_config.draw_threshold: if bbox[0] > -1 and bbox[1] > infer_config.draw_threshold:
result[bbox[0]].append({"score": bbox[1], "box": bbox[2:]}) result[bbox[0]].append({'score': bbox[1], 'box': bbox[2:]})
return result return result
def detect_image(img_path): def detect_image(img_path):
infer_cfg = "model/object_det_model/infer_cfg.yml" infer_cfg = os.path.join(MODEL_DIR, 'infer_cfg.yml')
# load infer config # load infer config
infer_config = PredictConfig(infer_cfg) infer_config = PredictConfig(infer_cfg)
return predict_image(infer_config, PADDLE_DET, img_path) return predict_image(infer_config, PADDLE_DET, img_path)
def get_book_areas(image): def get_book_areas(img_path):
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file: detect_result = detect_image(img_path)
cv2.imwrite(temp_file.name, image)
detect_result = detect_image(temp_file.name)
common_util.delete_temp_file(temp_file.name)
book_areas = detect_result[73] book_areas = detect_result[73]
result = [] result = []
image = cv2.imread(img_path)
for book_area in book_areas: for book_area in book_areas:
result.append(image_util.capture(image, book_area["box"])) result.append(image_util.capture(image, book_area['box']))
return result return result

View File

@@ -0,0 +1,16 @@
numpy==1.26.4
onnxconverter-common==1.14.0
OpenCC==1.1.6
opencv-python==4.6.0.66
paddle2onnx==1.2.3
paddleclas==2.5.2
paddlenlp==2.6.1
paddleocr==2.7.3
pillow==10.4.0
pymysql==1.1.1
requests==2.32.3
sqlacodegen==2.3.0.post1
sqlalchemy==1.4.52
tenacity==8.5.0
ufile==3.2.9
zxing-cpp==2.2.0

View File

@@ -0,0 +1,30 @@
import json
from flask import Flask, request
from paddlenlp import Taskflow
from utils import process_request
from . import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, ADMISSION_ID, SETTLEMENT_ID, \
UPPERCASE_MEDICAL_EXPENSES
app = Flask(__name__)
SETTLEMENT_LIST_SCHEMA = (
PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES + PERSONAL_CASH_PAYMENT
+ PERSONAL_ACCOUNT_PAYMENT + PERSONAL_FUNDED_AMOUNT + MEDICAL_INSURANCE_TYPE + ADMISSION_ID + SETTLEMENT_ID
+ UPPERCASE_MEDICAL_EXPENSES
)
SETTLEMENT_IE = Taskflow('information_extraction', schema=SETTLEMENT_LIST_SCHEMA, model='uie-x-base',
task_path='../model/settlement_list_model', layout_analysis=False, precision='fp16')
@app.route('/nlp/settlement', methods=['POST'])
@process_request
def settlement():
img_path = request.form.get('img_path')
layout = request.form.get('layout')
return SETTLEMENT_IE({'doc': img_path, 'layout': json.loads(layout)})
if __name__ == '__main__':
app.run('0.0.0.0', 5002)

26
services/utils.py Normal file
View File

@@ -0,0 +1,26 @@
import logging
import os
from flask import jsonify
def process_request(func):
"""
api通用处理函数
"""
def wrapper(*args, **kwargs):
try:
result = func(*args, **kwargs)
return jsonify(result), 200
except Exception as e:
logging.getLogger('error').error(f'Error: {e}', exc_info=e)
return jsonify({'error': str(e)}), 500
return wrapper
def parse_img_path(img_path):
dirname = os.path.dirname(img_path)
img_name, ext = os.path.basename(img_path).rsplit('.', 1)
return dirname, img_name, ext

View File

@@ -2,10 +2,9 @@ import logging
import os import os
from datetime import datetime from datetime import datetime
from flask import jsonify
from opencc import OpenCC from opencc import OpenCC
from util import string_util from util import string_util, model_util
# 获取yyyy-MM-dd HH:mm:ss格式的当前时间 # 获取yyyy-MM-dd HH:mm:ss格式的当前时间
@@ -37,7 +36,7 @@ def get_ocr_layout(ocr, img_path):
return True return True
layout = [] layout = []
ocr_result = ocr.ocr(img_path, cls=False) ocr_result = model_util.request_ocr(img_path)
ocr_result = ocr_result[0] ocr_result = ocr_result[0]
if not ocr_result: if not ocr_result:
return layout return layout
@@ -218,19 +217,3 @@ def chinese_money_to_number(chinese_money_amount):
def traditional_to_simple_chinese(traditional_chinese): def traditional_to_simple_chinese(traditional_chinese):
converter = OpenCC('t2s') converter = OpenCC('t2s')
return converter.convert(traditional_chinese) return converter.convert(traditional_chinese)
def process_request(func):
"""
api通用处理函数
"""
def wrapper(*args, **kwargs):
try:
result = func(*args, **kwargs)
return jsonify(result), 200
except Exception as e:
logging.getLogger('error').error(f'Error: {e}')
return jsonify({'error': str(e)}), 500
return wrapper

View File

@@ -283,9 +283,3 @@ def save_to_local(img_url, save_path=None):
file.write(response.content) file.write(response.content)
return save_path return save_path
def parse_path(img_path):
dirname = os.path.dirname(img_path)
img_name, ext = os.path.basename(img_path).rsplit('.', 1)
return dirname, img_name, ext

View File

@@ -1,9 +1,93 @@
import json
import logging import logging
import requests import requests
from tenacity import retry, stop_after_attempt, wait_random from tenacity import retry, stop_after_attempt, wait_random
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('OCR识别失败'))
def request_ocr(img_path):
"""
请求图片OCR识别接口
:param img_path: 待识别图片路径
:return: 识别结果
"""
url = 'http://ocr_api:5001/ocr'
response = requests.post(url, {'img_path': img_path})
if response.status_code == 200:
return response.json()
else:
return None
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('抽取基本医保结算单失败!'))
def request_discharge_info(img_path, layout):
"""
请求基本医保结算单信息抽取接口
:param img_path: 待抽取图片路径
:param layout: 图片ocr信息
:return: 抽取结果
"""
url = 'http://settlement_api:5002/nlp/settlement'
response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)})
if response.status_code == 200:
return response.json()
else:
return None
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('抽取出院记录失败!'))
def request_discharge_info(img_path, layout):
"""
请求出院记录信息抽取接口
:param img_path: 待抽取图片路径
:param layout: 图片ocr信息
:return: 抽取结果
"""
url = 'http://discharge_api:5003/nlp/discharge'
response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)})
if response.status_code == 200:
return response.json()
else:
return None
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('抽取费用清单失败!'))
def request_cost_info(img_path, layout):
"""
请求费用清单信息抽取接口
:param img_path: 待抽取图片路径
:param layout: 图片ocr信息
:return: 抽取结果
"""
url = 'http://cost_api:5004/nlp/cost'
response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)})
if response.status_code == 200:
return response.json()
else:
return None
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('获取图片方向失败!'))
def request_image_orientation(img_path):
"""
请求图片方向分类接口
:param img_path: 待分类图片路径
:return: 最有可能的两个图片方向
"""
url = 'http://clas_api:5005/clas/orientation'
response = requests.post(url, {'img_path': img_path})
if response.status_code == 200:
return response.json()
else:
return ['0', '90']
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, @retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('获取文档区域失败!')) after=lambda x: logging.warning('获取文档区域失败!'))
def request_book_areas(img_path): def request_book_areas(img_path):
@@ -12,7 +96,7 @@ def request_book_areas(img_path):
:param img_path: 待识别图片路径 :param img_path: 待识别图片路径
:return: 文档图片路径列表 :return: 文档图片路径列表
""" """
url = 'http://det_api:5000/det/books' url = 'http://det_api:5006/det/books'
response = requests.post(url, {'img_path': img_path}) response = requests.post(url, {'img_path': img_path})
if response.status_code == 200: if response.status_code == 200:
return response.json() return response.json()
@@ -28,25 +112,9 @@ def request_dewarped_image(img_path):
:param img_path: 待矫正图片路径 :param img_path: 待矫正图片路径
:return: 矫正后的图片路径 :return: 矫正后的图片路径
""" """
url = 'http://det_api:5001/dewarp' url = 'http://127.0.0.1:5007/dewarp'
response = requests.post(url, {'img_path': img_path}) response = requests.post(url, {'img_path': img_path})
if response.status_code == 200: if response.status_code == 200:
return response.json() return response.json()
else: else:
return img_path return img_path
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('获取图片方向失败!'))
def request_image_orientation(img_path):
"""
请求图片方向分类接口
:param img_path: 待分类图片路径
:return: 最有可能的两个图片方向
"""
url = 'http://det_api:5002/clas/orientation'
response = requests.post(url, {'img_path': img_path})
if response.status_code == 200:
return response.json()
else:
return ['0', '90']