项目架构调整,模型全部采用接口调用
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
# 使用官方的paddle镜像作为基础
|
||||
FROM registry.baidubce.com/paddlepaddle/paddle:2.6.1-gpu-cuda12.0-cudnn8.9-trt8.6
|
||||
FROM python:3.10-alpine
|
||||
|
||||
# 设置工作目录
|
||||
WORKDIR /app
|
||||
@@ -15,9 +15,7 @@ ENV PYTHONUNBUFFERED=1 \
|
||||
COPY requirements.txt /app/requirements.txt
|
||||
COPY packages /app/packages
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo '$TZ' > /etc/timezone \
|
||||
&& pip install --no-cache-dir -r requirements.txt \
|
||||
&& pip uninstall -y onnxruntime onnxruntime-gpu \
|
||||
&& pip install onnxruntime-gpu==1.18.0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
|
||||
&& pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# 将当前目录内容复制到容器的/app内
|
||||
COPY . /app
|
||||
|
||||
@@ -126,3 +126,5 @@ bash update.sh
|
||||
2. 新增扭曲矫正功能
|
||||
21. 版本号:1.14.0
|
||||
1. 新增二维码识别替换高清图片功能
|
||||
22. 版本号:2.0.0
|
||||
1. 项目架构调整,模型全部采用接口调用
|
||||
@@ -1,27 +1,69 @@
|
||||
x-env:
|
||||
&template
|
||||
image: fcb_photo_review:1.14.6
|
||||
x-base:
|
||||
&base_template
|
||||
restart: always
|
||||
|
||||
x-review:
|
||||
&review_template
|
||||
<<: *template
|
||||
x-project:
|
||||
&project_template
|
||||
<<: *base_template
|
||||
image: fcb_photo_review:1.14.6
|
||||
volumes:
|
||||
- ./log:/app/log
|
||||
|
||||
x-paddle:
|
||||
&paddle_template
|
||||
<<: *base_template
|
||||
image: fcb_paddle:0.0.1
|
||||
volumes:
|
||||
- ./log:/app/log
|
||||
- ./model:/app/model
|
||||
|
||||
services:
|
||||
ocr_api:
|
||||
<<: *paddle_template
|
||||
build:
|
||||
context: ./services/paddle_services
|
||||
container_name: ocr_api
|
||||
hostname: ocr_api
|
||||
command: [ '-w 5 ./services/ocr_api:app --bind 0.0.0.0:5001' ]
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- device_ids: [ '0', '1' ]
|
||||
- device_ids: [ '0' ]
|
||||
capabilities: [ 'gpu' ]
|
||||
driver: 'nvidia'
|
||||
|
||||
x-mask:
|
||||
&mask_template
|
||||
<<: *template
|
||||
volumes:
|
||||
- ./log:/app/log
|
||||
settlement_api:
|
||||
<<: *paddle_template
|
||||
container_name: settlement_api
|
||||
hostname: settlement_api
|
||||
command: [ '-w 5 ./services/settlement_api:app --bind 0.0.0.0:5002' ]
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- device_ids: [ '0' ]
|
||||
capabilities: [ 'gpu' ]
|
||||
driver: 'nvidia'
|
||||
|
||||
discharge_api:
|
||||
<<: *paddle_template
|
||||
container_name: discharge_api
|
||||
hostname: discharge_api
|
||||
command: [ '-w 5 ./services/discharge_api:app --bind 0.0.0.0:5003' ]
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- device_ids: [ '0' ]
|
||||
capabilities: [ 'gpu' ]
|
||||
driver: 'nvidia'
|
||||
|
||||
cost_api:
|
||||
<<: *paddle_template
|
||||
container_name: cost_api
|
||||
hostname: cost_api
|
||||
command: [ '-w 5 ./services/cost_api:app --bind 0.0.0.0:5004' ]
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
@@ -30,17 +72,37 @@ x-mask:
|
||||
capabilities: [ 'gpu' ]
|
||||
driver: 'nvidia'
|
||||
|
||||
services:
|
||||
clas_api:
|
||||
<<: *paddle_template
|
||||
container_name: clas_api
|
||||
hostname: clas_api
|
||||
command: [ '-w 5 ./services/clas_api:app --bind 0.0.0.0:5005' ]
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- device_ids: [ '0' ]
|
||||
capabilities: [ 'gpu' ]
|
||||
driver: 'nvidia'
|
||||
|
||||
det_api:
|
||||
<<: *template
|
||||
build:
|
||||
context: .
|
||||
<<: *paddle_template
|
||||
container_name: det_api
|
||||
hostname: det_api
|
||||
volumes:
|
||||
- ./log:/app/log
|
||||
- ./model:/app/model
|
||||
# command: [ 'det_api.py' ]
|
||||
command: [ '-w 5 ./services/det_api:app --bind 0.0.0.0:5006' ]
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- device_ids: [ '1' ]
|
||||
capabilities: [ 'gpu' ]
|
||||
driver: 'nvidia'
|
||||
|
||||
dewarp_api:
|
||||
<<: *paddle_template
|
||||
container_name: dewarp_api
|
||||
hostname: dewarp_api
|
||||
command: [ '-w 5 ./services/dewarp_api:app --bind 0.0.0.0:5007' ]
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
@@ -50,15 +112,23 @@ services:
|
||||
driver: 'nvidia'
|
||||
|
||||
photo_review_1:
|
||||
<<: *review_template
|
||||
<<: *project_template
|
||||
build:
|
||||
context: .
|
||||
container_name: photo_review_1
|
||||
hostname: photo_review_1
|
||||
depends_on:
|
||||
- ocr_api
|
||||
- settlement_api
|
||||
- discharge_api
|
||||
- cost_api
|
||||
- clas_api
|
||||
- det_api
|
||||
- dewarp_api
|
||||
command: [ 'photo_review.py', '--clean', 'True' ]
|
||||
|
||||
photo_review_2:
|
||||
<<: *review_template
|
||||
<<: *project_template
|
||||
container_name: photo_review_2
|
||||
hostname: photo_review_2
|
||||
depends_on:
|
||||
@@ -66,7 +136,7 @@ services:
|
||||
command: [ 'photo_review.py' ]
|
||||
|
||||
photo_review_3:
|
||||
<<: *review_template
|
||||
<<: *project_template
|
||||
container_name: photo_review_3
|
||||
hostname: photo_review_3
|
||||
depends_on:
|
||||
@@ -74,7 +144,7 @@ services:
|
||||
command: [ 'photo_review.py' ]
|
||||
|
||||
photo_review_4:
|
||||
<<: *review_template
|
||||
<<: *project_template
|
||||
container_name: photo_review_4
|
||||
hostname: photo_review_4
|
||||
depends_on:
|
||||
@@ -82,7 +152,7 @@ services:
|
||||
command: [ 'photo_review.py' ]
|
||||
|
||||
photo_review_5:
|
||||
<<: *review_template
|
||||
<<: *project_template
|
||||
container_name: photo_review_5
|
||||
hostname: photo_review_5
|
||||
depends_on:
|
||||
@@ -90,33 +160,23 @@ services:
|
||||
command: [ 'photo_review.py' ]
|
||||
|
||||
photo_mask_1:
|
||||
<<: *mask_template
|
||||
<<: *project_template
|
||||
container_name: photo_mask_1
|
||||
hostname: photo_mask_1
|
||||
depends_on:
|
||||
- photo_review_5
|
||||
- ocr_api
|
||||
- settlement_api
|
||||
- discharge_api
|
||||
- cost_api
|
||||
- clas_api
|
||||
- det_api
|
||||
- dewarp_api
|
||||
command: [ 'photo_mask.py', '--clean', 'True' ]
|
||||
|
||||
photo_mask_2:
|
||||
<<: *mask_template
|
||||
<<: *project_template
|
||||
container_name: photo_mask_2
|
||||
hostname: photo_mask_2
|
||||
depends_on:
|
||||
- photo_mask_1
|
||||
command: [ 'photo_mask.py' ]
|
||||
#
|
||||
# photo_review_6:
|
||||
# <<: *review_template
|
||||
# container_name: photo_review_6
|
||||
# hostname: photo_review_6
|
||||
# depends_on:
|
||||
# - photo_mask_2
|
||||
# command: [ 'photo_review.py' ]
|
||||
#
|
||||
# photo_review_7:
|
||||
# <<: *review_template
|
||||
# container_name: photo_review_7
|
||||
# hostname: photo_review_7
|
||||
# depends_on:
|
||||
# - photo_review_6
|
||||
# command: [ 'photo_review.py' ]
|
||||
@@ -1,6 +1,4 @@
|
||||
import jieba
|
||||
from paddlenlp import Taskflow
|
||||
from paddleocr import PaddleOCR
|
||||
|
||||
'''
|
||||
项目配置
|
||||
@@ -11,52 +9,6 @@ PHHD_BATCH_SIZE = 10
|
||||
SLEEP_MINUTES = 5
|
||||
# 是否发送报错邮件
|
||||
SEND_ERROR_EMAIL = True
|
||||
# 是否开启布局分析
|
||||
LAYOUT_ANALYSIS = False
|
||||
|
||||
"""
|
||||
信息抽取关键词配置
|
||||
"""
|
||||
# 患者姓名
|
||||
PATIENT_NAME = ['患者姓名']
|
||||
# 入院日期
|
||||
ADMISSION_DATE = ['入院日期']
|
||||
# 出院日期
|
||||
DISCHARGE_DATE = ['出院日期']
|
||||
# 发生医疗费
|
||||
MEDICAL_EXPENSES = ['费用总额']
|
||||
# 个人现金支付
|
||||
PERSONAL_CASH_PAYMENT = ['个人现金支付']
|
||||
# 个人账户支付
|
||||
PERSONAL_ACCOUNT_PAYMENT = ['个人账户支付']
|
||||
# 个人自费金额
|
||||
PERSONAL_FUNDED_AMOUNT = ['自费金额', '个人自费']
|
||||
# 医保类别
|
||||
MEDICAL_INSURANCE_TYPE = ['医保类型']
|
||||
# 就诊医院
|
||||
HOSPITAL = ['医院']
|
||||
# 就诊科室
|
||||
DEPARTMENT = ['科室']
|
||||
# 主治医生
|
||||
DOCTOR = ['主治医生']
|
||||
# 住院号
|
||||
ADMISSION_ID = ['住院号']
|
||||
# 医保结算单号码
|
||||
SETTLEMENT_ID = ['医保结算单号码']
|
||||
# 年龄
|
||||
AGE = ['年龄']
|
||||
# 大写总额
|
||||
UPPERCASE_MEDICAL_EXPENSES = ['大写总额']
|
||||
|
||||
SETTLEMENT_LIST_SCHEMA = \
|
||||
(PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES + PERSONAL_CASH_PAYMENT
|
||||
+ PERSONAL_ACCOUNT_PAYMENT + PERSONAL_FUNDED_AMOUNT + MEDICAL_INSURANCE_TYPE + ADMISSION_ID + SETTLEMENT_ID
|
||||
+ UPPERCASE_MEDICAL_EXPENSES)
|
||||
|
||||
DISCHARGE_RECORD_SCHEMA = \
|
||||
HOSPITAL + DEPARTMENT + PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + DOCTOR + ADMISSION_ID + AGE
|
||||
|
||||
COST_LIST_SCHEMA = PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES
|
||||
|
||||
'''
|
||||
别名配置
|
||||
@@ -90,15 +42,3 @@ jieba.suggest_freq(('骨', '伤'), True)
|
||||
jieba.suggest_freq(('感染', '性'), True)
|
||||
jieba.suggest_freq(('胆', '道'), True)
|
||||
jieba.suggest_freq(('脾', '胃'), True)
|
||||
|
||||
'''
|
||||
模型配置
|
||||
'''
|
||||
SETTLEMENT_IE = Taskflow('information_extraction', schema=SETTLEMENT_LIST_SCHEMA, model='uie-x-base',
|
||||
task_path='model/settlement_list_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
|
||||
DISCHARGE_IE = Taskflow('information_extraction', schema=DISCHARGE_RECORD_SCHEMA, model='uie-x-base',
|
||||
task_path='model/discharge_record_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
|
||||
COST_IE = Taskflow('information_extraction', schema=COST_LIST_SCHEMA, model='uie-x-base', device_id=1,
|
||||
task_path='model/cost_list_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
|
||||
|
||||
OCR = PaddleOCR(use_angle_cls=False, show_log=False, gpu_id=1, det_db_box_thresh=0.3)
|
||||
|
||||
@@ -18,15 +18,15 @@ from sqlalchemy import update
|
||||
from db import MysqlSession
|
||||
from db.mysql import BdYljg, BdYlks, ZxIeResult, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec
|
||||
from log import HOSTNAME
|
||||
from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
|
||||
from photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, \
|
||||
DEPARTMENT_FILTER
|
||||
from services import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
|
||||
PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \
|
||||
ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \
|
||||
UPPERCASE_MEDICAL_EXPENSES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, DEPARTMENT_FILTER
|
||||
ADMISSION_ID, SETTLEMENT_ID, AGE, UPPERCASE_MEDICAL_EXPENSES
|
||||
from ucloud import ufile
|
||||
from util import image_util, common_util, html_util, model_util
|
||||
from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \
|
||||
handle_insurance_type, handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, \
|
||||
parse_hospital
|
||||
from util.data_util import handle_date, handle_decimal, parse_department, handle_name, handle_insurance_type, \
|
||||
handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, parse_hospital
|
||||
|
||||
|
||||
# 合并信息抽取结果
|
||||
|
||||
@@ -1,16 +1,11 @@
|
||||
numpy==1.26.4
|
||||
onnxconverter-common==1.14.0
|
||||
OpenCC==1.1.6
|
||||
OpenCC==1.1.6 # 中文繁简转换
|
||||
opencv-python==4.6.0.66
|
||||
paddle2onnx==1.2.3
|
||||
paddleclas==2.5.2
|
||||
paddlenlp==2.6.1
|
||||
paddleocr==2.7.3
|
||||
pillow==10.4.0
|
||||
pymysql==1.1.1
|
||||
requests==2.32.3
|
||||
sqlacodegen==2.3.0.post1
|
||||
sqlalchemy==1.4.52
|
||||
tenacity==8.5.0
|
||||
ufile==3.2.9
|
||||
zxing-cpp==2.2.0
|
||||
sqlacodegen==2.3.0.post1 # 实体类生成
|
||||
sqlalchemy==1.4.52 # ORM框架
|
||||
tenacity==8.5.0 # 重试
|
||||
ufile==3.2.9 # 云空间
|
||||
zxing-cpp==2.2.0 # 二维码识别
|
||||
34
services/__init__.py
Normal file
34
services/__init__.py
Normal file
@@ -0,0 +1,34 @@
|
||||
"""
|
||||
信息抽取关键词配置
|
||||
"""
|
||||
|
||||
# 患者姓名
|
||||
PATIENT_NAME = ['患者姓名']
|
||||
# 入院日期
|
||||
ADMISSION_DATE = ['入院日期']
|
||||
# 出院日期
|
||||
DISCHARGE_DATE = ['出院日期']
|
||||
# 发生医疗费
|
||||
MEDICAL_EXPENSES = ['费用总额']
|
||||
# 个人现金支付
|
||||
PERSONAL_CASH_PAYMENT = ['个人现金支付']
|
||||
# 个人账户支付
|
||||
PERSONAL_ACCOUNT_PAYMENT = ['个人账户支付']
|
||||
# 个人自费金额
|
||||
PERSONAL_FUNDED_AMOUNT = ['自费金额', '个人自费']
|
||||
# 医保类别
|
||||
MEDICAL_INSURANCE_TYPE = ['医保类型']
|
||||
# 就诊医院
|
||||
HOSPITAL = ['医院']
|
||||
# 就诊科室
|
||||
DEPARTMENT = ['科室']
|
||||
# 主治医生
|
||||
DOCTOR = ['主治医生']
|
||||
# 住院号
|
||||
ADMISSION_ID = ['住院号']
|
||||
# 医保结算单号码
|
||||
SETTLEMENT_ID = ['医保结算单号码']
|
||||
# 年龄
|
||||
AGE = ['年龄']
|
||||
# 大写总额
|
||||
UPPERCASE_MEDICAL_EXPENSES = ['大写总额']
|
||||
@@ -1,26 +1,26 @@
|
||||
from flask import Flask, request
|
||||
from paddleclas import PaddleClas
|
||||
|
||||
from util.common_util import process_request
|
||||
from utils import process_request
|
||||
|
||||
app = Flask(__name__)
|
||||
CLAS = PaddleClas(model_name="text_image_orientation")
|
||||
CLAS = PaddleClas(model_name='text_image_orientation')
|
||||
|
||||
|
||||
@app.route('/clas/orientation', methods=['POST'])
|
||||
@process_request
|
||||
def orientation():
|
||||
"""
|
||||
判断图片旋转角度,逆时针旋转该角度后为正。可能值["0", "90", "180", "270"]
|
||||
判断图片旋转角度,逆时针旋转该角度后为正。可能值['0', '90', '180', '270']
|
||||
:return: 最有可能的两个角度
|
||||
"""
|
||||
img_path = request.form.get('img_path')
|
||||
clas_result = CLAS.predict(input_data=img_path)
|
||||
clas_result = next(clas_result)[0]
|
||||
if clas_result["scores"][0] < 0.5:
|
||||
if clas_result['scores'][0] < 0.5:
|
||||
return ['0', '90']
|
||||
return clas_result["label_names"]
|
||||
return clas_result['label_names']
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run('0.0.0.0', 5002)
|
||||
app.run('0.0.0.0', 5005)
|
||||
24
services/cost_api.py
Normal file
24
services/cost_api.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import json
|
||||
|
||||
from flask import Flask, request
|
||||
from paddlenlp import Taskflow
|
||||
|
||||
from utils import process_request
|
||||
from . import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES
|
||||
|
||||
app = Flask(__name__)
|
||||
COST_LIST_SCHEMA = PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES
|
||||
COST = Taskflow('information_extraction', schema=COST_LIST_SCHEMA, model='uie-x-base',
|
||||
task_path='../model/cost_list_model', layout_analysis=False, precision='fp16')
|
||||
|
||||
|
||||
@app.route('/nlp/cost', methods=['POST'])
|
||||
@process_request
|
||||
def cost():
|
||||
img_path = request.form.get('img_path')
|
||||
layout = request.form.get('layout')
|
||||
return COST({'doc': img_path, 'layout': json.loads(layout)})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run('0.0.0.0', 5004)
|
||||
@@ -3,9 +3,8 @@ import os.path
|
||||
import cv2
|
||||
from flask import Flask, request
|
||||
|
||||
from paddle_detection import detector
|
||||
from util import image_util
|
||||
from util.common_util import process_request
|
||||
from paddle_services.paddle_detection import detector
|
||||
from utils import process_request, parse_img_path
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@@ -14,10 +13,9 @@ app = Flask(__name__)
|
||||
@process_request
|
||||
def books():
|
||||
img_path = request.form.get('img_path')
|
||||
image = cv2.imread(img_path)
|
||||
result = detector.get_book_areas(image)
|
||||
result = detector.get_book_areas(img_path)
|
||||
|
||||
dirname, img_name, ext = image_util.parse_path(img_path)
|
||||
dirname, img_name, ext = parse_img_path(img_path)
|
||||
books_path = []
|
||||
for i in range(len(result)):
|
||||
save_path = os.path.join(dirname, img_name + '_book_' + str(i) + '.' + ext)
|
||||
@@ -27,4 +25,4 @@ def books():
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run('0.0.0.0', 5000)
|
||||
app.run('0.0.0.0', 5006)
|
||||
@@ -3,9 +3,8 @@ import os
|
||||
import cv2
|
||||
from flask import Flask, request
|
||||
|
||||
from doc_dewarp import dewarp
|
||||
from util import image_util
|
||||
from util.common_util import process_request
|
||||
from paddle_services.doc_dewarp import dewarper
|
||||
from utils import process_request, parse_img_path
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@@ -15,12 +14,12 @@ app = Flask(__name__)
|
||||
def dewarp():
|
||||
img_path = request.form.get('img_path')
|
||||
img = cv2.imread(img_path)
|
||||
dewarped_img = dewarp.dewarp_image(img)
|
||||
dirname, img_name, ext = image_util.parse_path(img_path)
|
||||
dewarped_img = dewarper.dewarp_image(img)
|
||||
dirname, img_name, ext = parse_img_path(img_path)
|
||||
save_path = os.path.join(dirname, img_name + '_dewarped.' + ext)
|
||||
cv2.imwrite(save_path, dewarped_img)
|
||||
return save_path
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run('0.0.0.0', 5001)
|
||||
app.run('0.0.0.0', 5007)
|
||||
26
services/discharge_api.py
Normal file
26
services/discharge_api.py
Normal file
@@ -0,0 +1,26 @@
|
||||
import json
|
||||
|
||||
from flask import Flask, request
|
||||
from paddlenlp import Taskflow
|
||||
|
||||
from utils import process_request
|
||||
from . import HOSPITAL, DEPARTMENT, PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, DOCTOR, ADMISSION_ID, AGE
|
||||
|
||||
app = Flask(__name__)
|
||||
DISCHARGE_RECORD_SCHEMA = (
|
||||
HOSPITAL + DEPARTMENT + PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + DOCTOR + ADMISSION_ID + AGE
|
||||
)
|
||||
DISCHARGE = Taskflow('information_extraction', schema=DISCHARGE_RECORD_SCHEMA, model='uie-x-base',
|
||||
task_path='../model/discharge_record_model', layout_analysis=False, precision='fp16')
|
||||
|
||||
|
||||
@app.route('/nlp/discharge', methods=['POST'])
|
||||
@process_request
|
||||
def discharge():
|
||||
img_path = request.form.get('img_path')
|
||||
layout = request.form.get('layout')
|
||||
return DISCHARGE({'doc': img_path, 'layout': json.loads(layout)})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run('0.0.0.0', 5003)
|
||||
18
services/ocr_api.py
Normal file
18
services/ocr_api.py
Normal file
@@ -0,0 +1,18 @@
|
||||
from flask import Flask, request
|
||||
from paddleocr import PaddleOCR
|
||||
|
||||
from utils import process_request
|
||||
|
||||
app = Flask(__name__)
|
||||
OCR = PaddleOCR(use_angle_cls=False, show_log=False, gpu_id=0, det_db_box_thresh=0.3)
|
||||
|
||||
|
||||
@app.route('/ocr', methods=['POST'])
|
||||
@process_request
|
||||
def ocr():
|
||||
img_path = request.form.get('img_path')
|
||||
return OCR.ocr(img_path, cls=False)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run('0.0.0.0', 5001)
|
||||
29
services/paddle_services/Dockerfile
Normal file
29
services/paddle_services/Dockerfile
Normal file
@@ -0,0 +1,29 @@
|
||||
# 使用官方的paddle镜像作为基础
|
||||
FROM registry.baidubce.com/paddlepaddle/paddle:2.6.1-gpu-cuda12.0-cudnn8.9-trt8.6
|
||||
|
||||
# 设置工作目录
|
||||
WORKDIR /app
|
||||
|
||||
# 设置环境变量
|
||||
ENV PYTHONUNBUFFERED=1 \
|
||||
# 设置时区
|
||||
TZ=Asia/Shanghai \
|
||||
# 设置pip镜像地址,加快安装速度
|
||||
PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
|
||||
# 安装依赖
|
||||
COPY requirements.txt /app/requirements.txt
|
||||
COPY packages /app/packages
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo '$TZ' > /etc/timezone \
|
||||
&& pip install --no-cache-dir -r requirements.txt \
|
||||
&& pip uninstall -y onnxruntime onnxruntime-gpu \
|
||||
&& pip install onnxruntime-gpu==1.18.0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
|
||||
|
||||
# 将当前目录内容复制到容器的/app内
|
||||
COPY . /app
|
||||
|
||||
# 暴露端口
|
||||
# EXPOSE 8081
|
||||
|
||||
# 运行api接口,具体接口在命令行或docker-compose.yml文件中定义
|
||||
ENTRYPOINT ["gunicorn"]
|
||||
@@ -1,4 +1,7 @@
|
||||
import os.path
|
||||
|
||||
from onnxruntime import InferenceSession
|
||||
|
||||
DOC_TR = InferenceSession("model/dewarp_model/doc_tr_pp.onnx",
|
||||
providers=["CUDAExecutionProvider"], provider_options=[{"device_id": 0}])
|
||||
MODEL_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))),
|
||||
'model', 'dewarp_model', 'doc_tr_pp.onnx')
|
||||
DOC_TR = InferenceSession(MODEL_PATH, providers=['CUDAExecutionProvider'], provider_options=[{'device_id': 0}])
|
||||
|
||||
@@ -11,10 +11,10 @@ def dewarp_image(image):
|
||||
y = to_tensor(image)
|
||||
|
||||
img = np.transpose(img, (2, 0, 1))
|
||||
bm = DOC_TR.run(None, {"image": img[None,]})[0]
|
||||
bm = DOC_TR.run(None, {'image': img[None,]})[0]
|
||||
bm = paddle.to_tensor(bm)
|
||||
bm = paddle.nn.functional.interpolate(
|
||||
bm, y.shape[2:], mode="bilinear", align_corners=False
|
||||
bm, y.shape[2:], mode='bilinear', align_corners=False
|
||||
)
|
||||
bm_nhwc = np.transpose(bm, (0, 2, 3, 1))
|
||||
out = paddle.nn.functional.grid_sample(y, (bm_nhwc / 288 - 0.5) * 2)
|
||||
@@ -1,4 +1,8 @@
|
||||
import os
|
||||
|
||||
from onnxruntime import InferenceSession
|
||||
|
||||
PADDLE_DET = InferenceSession("model/object_det_model/ppyoloe_plus_crn_l_80e_coco_w_nms.onnx",
|
||||
providers=["CPUExecutionProvider"], provider_options=[{"device_id": 0}])
|
||||
MODEL_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))),
|
||||
'model', 'object_det_model')
|
||||
PADDLE_DET = InferenceSession(os.path.join(MODEL_DIR, 'ppyoloe_plus_crn_l_80e_coco_w_nms.onnx'),
|
||||
providers=['CPUExecutionProvider'], provider_options=[{'device_id': 0}])
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
import tempfile
|
||||
import os.path
|
||||
from collections import defaultdict
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from paddle_detection import PADDLE_DET
|
||||
from paddle_detection.deploy.third_engine.onnx.infer import PredictConfig
|
||||
from paddle_detection.deploy.third_engine.onnx.preprocess import Compose
|
||||
from util import image_util, common_util
|
||||
from util import image_util
|
||||
from . import PADDLE_DET, MODEL_DIR
|
||||
from .deploy.third_engine.onnx.infer import PredictConfig
|
||||
from .deploy.third_engine.onnx.preprocess import Compose
|
||||
|
||||
|
||||
def predict_image(infer_config, predictor, img_path):
|
||||
@@ -15,7 +15,7 @@ def predict_image(infer_config, predictor, img_path):
|
||||
transforms = Compose(infer_config.preprocess_infos)
|
||||
# predict image
|
||||
inputs = transforms(img_path)
|
||||
inputs["image"] = np.array(inputs["image"]).astype('float32')
|
||||
inputs['image'] = np.array(inputs['image']).astype('float32')
|
||||
inputs_name = [var.name for var in predictor.get_inputs()]
|
||||
inputs = {k: inputs[k][None,] for k in inputs_name}
|
||||
|
||||
@@ -25,25 +25,23 @@ def predict_image(infer_config, predictor, img_path):
|
||||
result = defaultdict(list)
|
||||
for bbox in bboxes:
|
||||
if bbox[0] > -1 and bbox[1] > infer_config.draw_threshold:
|
||||
result[bbox[0]].append({"score": bbox[1], "box": bbox[2:]})
|
||||
result[bbox[0]].append({'score': bbox[1], 'box': bbox[2:]})
|
||||
return result
|
||||
|
||||
|
||||
def detect_image(img_path):
|
||||
infer_cfg = "model/object_det_model/infer_cfg.yml"
|
||||
infer_cfg = os.path.join(MODEL_DIR, 'infer_cfg.yml')
|
||||
# load infer config
|
||||
infer_config = PredictConfig(infer_cfg)
|
||||
|
||||
return predict_image(infer_config, PADDLE_DET, img_path)
|
||||
|
||||
|
||||
def get_book_areas(image):
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
||||
cv2.imwrite(temp_file.name, image)
|
||||
detect_result = detect_image(temp_file.name)
|
||||
common_util.delete_temp_file(temp_file.name)
|
||||
def get_book_areas(img_path):
|
||||
detect_result = detect_image(img_path)
|
||||
book_areas = detect_result[73]
|
||||
result = []
|
||||
image = cv2.imread(img_path)
|
||||
for book_area in book_areas:
|
||||
result.append(image_util.capture(image, book_area["box"]))
|
||||
result.append(image_util.capture(image, book_area['box']))
|
||||
return result
|
||||
|
||||
16
services/paddle_services/requestments.txt
Normal file
16
services/paddle_services/requestments.txt
Normal file
@@ -0,0 +1,16 @@
|
||||
numpy==1.26.4
|
||||
onnxconverter-common==1.14.0
|
||||
OpenCC==1.1.6
|
||||
opencv-python==4.6.0.66
|
||||
paddle2onnx==1.2.3
|
||||
paddleclas==2.5.2
|
||||
paddlenlp==2.6.1
|
||||
paddleocr==2.7.3
|
||||
pillow==10.4.0
|
||||
pymysql==1.1.1
|
||||
requests==2.32.3
|
||||
sqlacodegen==2.3.0.post1
|
||||
sqlalchemy==1.4.52
|
||||
tenacity==8.5.0
|
||||
ufile==3.2.9
|
||||
zxing-cpp==2.2.0
|
||||
30
services/settlement_api.py
Normal file
30
services/settlement_api.py
Normal file
@@ -0,0 +1,30 @@
|
||||
import json
|
||||
|
||||
from flask import Flask, request
|
||||
from paddlenlp import Taskflow
|
||||
|
||||
from utils import process_request
|
||||
from . import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
|
||||
PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, ADMISSION_ID, SETTLEMENT_ID, \
|
||||
UPPERCASE_MEDICAL_EXPENSES
|
||||
|
||||
app = Flask(__name__)
|
||||
SETTLEMENT_LIST_SCHEMA = (
|
||||
PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES + PERSONAL_CASH_PAYMENT
|
||||
+ PERSONAL_ACCOUNT_PAYMENT + PERSONAL_FUNDED_AMOUNT + MEDICAL_INSURANCE_TYPE + ADMISSION_ID + SETTLEMENT_ID
|
||||
+ UPPERCASE_MEDICAL_EXPENSES
|
||||
)
|
||||
SETTLEMENT_IE = Taskflow('information_extraction', schema=SETTLEMENT_LIST_SCHEMA, model='uie-x-base',
|
||||
task_path='../model/settlement_list_model', layout_analysis=False, precision='fp16')
|
||||
|
||||
|
||||
@app.route('/nlp/settlement', methods=['POST'])
|
||||
@process_request
|
||||
def settlement():
|
||||
img_path = request.form.get('img_path')
|
||||
layout = request.form.get('layout')
|
||||
return SETTLEMENT_IE({'doc': img_path, 'layout': json.loads(layout)})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run('0.0.0.0', 5002)
|
||||
26
services/utils.py
Normal file
26
services/utils.py
Normal file
@@ -0,0 +1,26 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
from flask import jsonify
|
||||
|
||||
|
||||
def process_request(func):
|
||||
"""
|
||||
api通用处理函数
|
||||
"""
|
||||
|
||||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
result = func(*args, **kwargs)
|
||||
return jsonify(result), 200
|
||||
except Exception as e:
|
||||
logging.getLogger('error').error(f'Error: {e}', exc_info=e)
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def parse_img_path(img_path):
|
||||
dirname = os.path.dirname(img_path)
|
||||
img_name, ext = os.path.basename(img_path).rsplit('.', 1)
|
||||
return dirname, img_name, ext
|
||||
@@ -2,10 +2,9 @@ import logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
from flask import jsonify
|
||||
from opencc import OpenCC
|
||||
|
||||
from util import string_util
|
||||
from util import string_util, model_util
|
||||
|
||||
|
||||
# 获取yyyy-MM-dd HH:mm:ss格式的当前时间
|
||||
@@ -37,7 +36,7 @@ def get_ocr_layout(ocr, img_path):
|
||||
return True
|
||||
|
||||
layout = []
|
||||
ocr_result = ocr.ocr(img_path, cls=False)
|
||||
ocr_result = model_util.request_ocr(img_path)
|
||||
ocr_result = ocr_result[0]
|
||||
if not ocr_result:
|
||||
return layout
|
||||
@@ -218,19 +217,3 @@ def chinese_money_to_number(chinese_money_amount):
|
||||
def traditional_to_simple_chinese(traditional_chinese):
|
||||
converter = OpenCC('t2s')
|
||||
return converter.convert(traditional_chinese)
|
||||
|
||||
|
||||
def process_request(func):
|
||||
"""
|
||||
api通用处理函数
|
||||
"""
|
||||
|
||||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
result = func(*args, **kwargs)
|
||||
return jsonify(result), 200
|
||||
except Exception as e:
|
||||
logging.getLogger('error').error(f'Error: {e}')
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
return wrapper
|
||||
|
||||
@@ -283,9 +283,3 @@ def save_to_local(img_url, save_path=None):
|
||||
file.write(response.content)
|
||||
|
||||
return save_path
|
||||
|
||||
|
||||
def parse_path(img_path):
|
||||
dirname = os.path.dirname(img_path)
|
||||
img_name, ext = os.path.basename(img_path).rsplit('.', 1)
|
||||
return dirname, img_name, ext
|
||||
|
||||
@@ -1,9 +1,93 @@
|
||||
import json
|
||||
import logging
|
||||
|
||||
import requests
|
||||
from tenacity import retry, stop_after_attempt, wait_random
|
||||
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning('OCR识别失败!'))
|
||||
def request_ocr(img_path):
|
||||
"""
|
||||
请求图片OCR识别接口
|
||||
:param img_path: 待识别图片路径
|
||||
:return: 识别结果
|
||||
"""
|
||||
url = 'http://ocr_api:5001/ocr'
|
||||
response = requests.post(url, {'img_path': img_path})
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning('抽取基本医保结算单失败!'))
|
||||
def request_discharge_info(img_path, layout):
|
||||
"""
|
||||
请求基本医保结算单信息抽取接口
|
||||
:param img_path: 待抽取图片路径
|
||||
:param layout: 图片ocr信息
|
||||
:return: 抽取结果
|
||||
"""
|
||||
url = 'http://settlement_api:5002/nlp/settlement'
|
||||
response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)})
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning('抽取出院记录失败!'))
|
||||
def request_discharge_info(img_path, layout):
|
||||
"""
|
||||
请求出院记录信息抽取接口
|
||||
:param img_path: 待抽取图片路径
|
||||
:param layout: 图片ocr信息
|
||||
:return: 抽取结果
|
||||
"""
|
||||
url = 'http://discharge_api:5003/nlp/discharge'
|
||||
response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)})
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning('抽取费用清单失败!'))
|
||||
def request_cost_info(img_path, layout):
|
||||
"""
|
||||
请求费用清单信息抽取接口
|
||||
:param img_path: 待抽取图片路径
|
||||
:param layout: 图片ocr信息
|
||||
:return: 抽取结果
|
||||
"""
|
||||
url = 'http://cost_api:5004/nlp/cost'
|
||||
response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)})
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning('获取图片方向失败!'))
|
||||
def request_image_orientation(img_path):
|
||||
"""
|
||||
请求图片方向分类接口
|
||||
:param img_path: 待分类图片路径
|
||||
:return: 最有可能的两个图片方向
|
||||
"""
|
||||
url = 'http://clas_api:5005/clas/orientation'
|
||||
response = requests.post(url, {'img_path': img_path})
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
return ['0', '90']
|
||||
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning('获取文档区域失败!'))
|
||||
def request_book_areas(img_path):
|
||||
@@ -12,7 +96,7 @@ def request_book_areas(img_path):
|
||||
:param img_path: 待识别图片路径
|
||||
:return: 文档图片路径列表
|
||||
"""
|
||||
url = 'http://det_api:5000/det/books'
|
||||
url = 'http://det_api:5006/det/books'
|
||||
response = requests.post(url, {'img_path': img_path})
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
@@ -28,25 +112,9 @@ def request_dewarped_image(img_path):
|
||||
:param img_path: 待矫正图片路径
|
||||
:return: 矫正后的图片路径
|
||||
"""
|
||||
url = 'http://det_api:5001/dewarp'
|
||||
url = 'http://127.0.0.1:5007/dewarp'
|
||||
response = requests.post(url, {'img_path': img_path})
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
return img_path
|
||||
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning('获取图片方向失败!'))
|
||||
def request_image_orientation(img_path):
|
||||
"""
|
||||
请求图片方向分类接口
|
||||
:param img_path: 待分类图片路径
|
||||
:return: 最有可能的两个图片方向
|
||||
"""
|
||||
url = 'http://det_api:5002/clas/orientation'
|
||||
response = requests.post(url, {'img_path': img_path})
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
return ['0', '90']
|
||||
|
||||
Reference in New Issue
Block a user