diff --git a/Dockerfile b/Dockerfile index 39a4e3c..634b649 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # 使用官方的paddle镜像作为基础 -FROM registry.baidubce.com/paddlepaddle/paddle:2.6.1-gpu-cuda12.0-cudnn8.9-trt8.6 +FROM python:3.10-alpine # 设置工作目录 WORKDIR /app @@ -15,9 +15,7 @@ ENV PYTHONUNBUFFERED=1 \ COPY requirements.txt /app/requirements.txt COPY packages /app/packages RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo '$TZ' > /etc/timezone \ - && pip install --no-cache-dir -r requirements.txt \ - && pip uninstall -y onnxruntime onnxruntime-gpu \ - && pip install onnxruntime-gpu==1.18.0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/ + && pip install --no-cache-dir -r requirements.txt # 将当前目录内容复制到容器的/app内 COPY . /app diff --git a/README.md b/README.md index f0305dc..c7028a5 100644 --- a/README.md +++ b/README.md @@ -125,4 +125,6 @@ bash update.sh 1. 新增文档检测功能 2. 新增扭曲矫正功能 21. 版本号:1.14.0 - 1. 新增二维码识别替换高清图片功能 \ No newline at end of file + 1. 新增二维码识别替换高清图片功能 +22. 版本号:2.0.0 + 1. 项目架构调整,模型全部采用接口调用 \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 113c00c..17e065d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,46 +1,108 @@ -x-env: - &template - image: fcb_photo_review:1.14.6 +x-base: + &base_template restart: always -x-review: - &review_template - <<: *template +x-project: + &project_template + <<: *base_template + image: fcb_photo_review:1.14.6 + volumes: + - ./log:/app/log + +x-paddle: + &paddle_template + <<: *base_template + image: fcb_paddle:0.0.1 volumes: - ./log:/app/log - ./model:/app/model - deploy: - resources: - reservations: - devices: - - device_ids: [ '0', '1' ] - capabilities: [ 'gpu' ] - driver: 'nvidia' - -x-mask: - &mask_template - <<: *template - volumes: - - ./log:/app/log - deploy: - resources: - reservations: - devices: - - device_ids: [ '1' ] - capabilities: [ 'gpu' ] - driver: 'nvidia' services: - det_api: - <<: *template + ocr_api: + <<: *paddle_template build: - context: . + context: ./services/paddle_services + container_name: ocr_api + hostname: ocr_api + command: [ '-w 5 ./services/ocr_api:app --bind 0.0.0.0:5001' ] + deploy: + resources: + reservations: + devices: + - device_ids: [ '0' ] + capabilities: [ 'gpu' ] + driver: 'nvidia' + + settlement_api: + <<: *paddle_template + container_name: settlement_api + hostname: settlement_api + command: [ '-w 5 ./services/settlement_api:app --bind 0.0.0.0:5002' ] + deploy: + resources: + reservations: + devices: + - device_ids: [ '0' ] + capabilities: [ 'gpu' ] + driver: 'nvidia' + + discharge_api: + <<: *paddle_template + container_name: discharge_api + hostname: discharge_api + command: [ '-w 5 ./services/discharge_api:app --bind 0.0.0.0:5003' ] + deploy: + resources: + reservations: + devices: + - device_ids: [ '0' ] + capabilities: [ 'gpu' ] + driver: 'nvidia' + + cost_api: + <<: *paddle_template + container_name: cost_api + hostname: cost_api + command: [ '-w 5 ./services/cost_api:app --bind 0.0.0.0:5004' ] + deploy: + resources: + reservations: + devices: + - device_ids: [ '1' ] + capabilities: [ 'gpu' ] + driver: 'nvidia' + + clas_api: + <<: *paddle_template + container_name: clas_api + hostname: clas_api + command: [ '-w 5 ./services/clas_api:app --bind 0.0.0.0:5005' ] + deploy: + resources: + reservations: + devices: + - device_ids: [ '0' ] + capabilities: [ 'gpu' ] + driver: 'nvidia' + + det_api: + <<: *paddle_template container_name: det_api hostname: det_api - volumes: - - ./log:/app/log - - ./model:/app/model -# command: [ 'det_api.py' ] + command: [ '-w 5 ./services/det_api:app --bind 0.0.0.0:5006' ] + deploy: + resources: + reservations: + devices: + - device_ids: [ '1' ] + capabilities: [ 'gpu' ] + driver: 'nvidia' + + dewarp_api: + <<: *paddle_template + container_name: dewarp_api + hostname: dewarp_api + command: [ '-w 5 ./services/dewarp_api:app --bind 0.0.0.0:5007' ] deploy: resources: reservations: @@ -50,15 +112,23 @@ services: driver: 'nvidia' photo_review_1: - <<: *review_template + <<: *project_template + build: + context: . container_name: photo_review_1 hostname: photo_review_1 depends_on: + - ocr_api + - settlement_api + - discharge_api + - cost_api + - clas_api - det_api + - dewarp_api command: [ 'photo_review.py', '--clean', 'True' ] photo_review_2: - <<: *review_template + <<: *project_template container_name: photo_review_2 hostname: photo_review_2 depends_on: @@ -66,7 +136,7 @@ services: command: [ 'photo_review.py' ] photo_review_3: - <<: *review_template + <<: *project_template container_name: photo_review_3 hostname: photo_review_3 depends_on: @@ -74,7 +144,7 @@ services: command: [ 'photo_review.py' ] photo_review_4: - <<: *review_template + <<: *project_template container_name: photo_review_4 hostname: photo_review_4 depends_on: @@ -82,7 +152,7 @@ services: command: [ 'photo_review.py' ] photo_review_5: - <<: *review_template + <<: *project_template container_name: photo_review_5 hostname: photo_review_5 depends_on: @@ -90,33 +160,23 @@ services: command: [ 'photo_review.py' ] photo_mask_1: - <<: *mask_template + <<: *project_template container_name: photo_mask_1 hostname: photo_mask_1 depends_on: - - photo_review_5 + - ocr_api + - settlement_api + - discharge_api + - cost_api + - clas_api + - det_api + - dewarp_api command: [ 'photo_mask.py', '--clean', 'True' ] photo_mask_2: - <<: *mask_template + <<: *project_template container_name: photo_mask_2 hostname: photo_mask_2 depends_on: - photo_mask_1 - command: [ 'photo_mask.py' ] -# -# photo_review_6: -# <<: *review_template -# container_name: photo_review_6 -# hostname: photo_review_6 -# depends_on: -# - photo_mask_2 -# command: [ 'photo_review.py' ] -# -# photo_review_7: -# <<: *review_template -# container_name: photo_review_7 -# hostname: photo_review_7 -# depends_on: -# - photo_review_6 -# command: [ 'photo_review.py' ] \ No newline at end of file + command: [ 'photo_mask.py' ] \ No newline at end of file diff --git a/photo_review/__init__.py b/photo_review/__init__.py index 23a4d76..20d6e5d 100644 --- a/photo_review/__init__.py +++ b/photo_review/__init__.py @@ -1,6 +1,4 @@ import jieba -from paddlenlp import Taskflow -from paddleocr import PaddleOCR ''' 项目配置 @@ -11,52 +9,6 @@ PHHD_BATCH_SIZE = 10 SLEEP_MINUTES = 5 # 是否发送报错邮件 SEND_ERROR_EMAIL = True -# 是否开启布局分析 -LAYOUT_ANALYSIS = False - -""" -信息抽取关键词配置 -""" -# 患者姓名 -PATIENT_NAME = ['患者姓名'] -# 入院日期 -ADMISSION_DATE = ['入院日期'] -# 出院日期 -DISCHARGE_DATE = ['出院日期'] -# 发生医疗费 -MEDICAL_EXPENSES = ['费用总额'] -# 个人现金支付 -PERSONAL_CASH_PAYMENT = ['个人现金支付'] -# 个人账户支付 -PERSONAL_ACCOUNT_PAYMENT = ['个人账户支付'] -# 个人自费金额 -PERSONAL_FUNDED_AMOUNT = ['自费金额', '个人自费'] -# 医保类别 -MEDICAL_INSURANCE_TYPE = ['医保类型'] -# 就诊医院 -HOSPITAL = ['医院'] -# 就诊科室 -DEPARTMENT = ['科室'] -# 主治医生 -DOCTOR = ['主治医生'] -# 住院号 -ADMISSION_ID = ['住院号'] -# 医保结算单号码 -SETTLEMENT_ID = ['医保结算单号码'] -# 年龄 -AGE = ['年龄'] -# 大写总额 -UPPERCASE_MEDICAL_EXPENSES = ['大写总额'] - -SETTLEMENT_LIST_SCHEMA = \ - (PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES + PERSONAL_CASH_PAYMENT - + PERSONAL_ACCOUNT_PAYMENT + PERSONAL_FUNDED_AMOUNT + MEDICAL_INSURANCE_TYPE + ADMISSION_ID + SETTLEMENT_ID - + UPPERCASE_MEDICAL_EXPENSES) - -DISCHARGE_RECORD_SCHEMA = \ - HOSPITAL + DEPARTMENT + PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + DOCTOR + ADMISSION_ID + AGE - -COST_LIST_SCHEMA = PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES ''' 别名配置 @@ -90,15 +42,3 @@ jieba.suggest_freq(('骨', '伤'), True) jieba.suggest_freq(('感染', '性'), True) jieba.suggest_freq(('胆', '道'), True) jieba.suggest_freq(('脾', '胃'), True) - -''' -模型配置 -''' -SETTLEMENT_IE = Taskflow('information_extraction', schema=SETTLEMENT_LIST_SCHEMA, model='uie-x-base', - task_path='model/settlement_list_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16') -DISCHARGE_IE = Taskflow('information_extraction', schema=DISCHARGE_RECORD_SCHEMA, model='uie-x-base', - task_path='model/discharge_record_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16') -COST_IE = Taskflow('information_extraction', schema=COST_LIST_SCHEMA, model='uie-x-base', device_id=1, - task_path='model/cost_list_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16') - -OCR = PaddleOCR(use_angle_cls=False, show_log=False, gpu_id=1, det_db_box_thresh=0.3) diff --git a/photo_review/auto_photo_review.py b/photo_review/auto_photo_review.py index 8ecec4f..2b325ab 100644 --- a/photo_review/auto_photo_review.py +++ b/photo_review/auto_photo_review.py @@ -18,15 +18,15 @@ from sqlalchemy import update from db import MysqlSession from db.mysql import BdYljg, BdYlks, ZxIeResult, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec from log import HOSTNAME -from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \ +from photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, \ + DEPARTMENT_FILTER +from services import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \ PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \ - ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \ - UPPERCASE_MEDICAL_EXPENSES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, DEPARTMENT_FILTER + ADMISSION_ID, SETTLEMENT_ID, AGE, UPPERCASE_MEDICAL_EXPENSES from ucloud import ufile from util import image_util, common_util, html_util, model_util -from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \ - handle_insurance_type, handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, \ - parse_hospital +from util.data_util import handle_date, handle_decimal, parse_department, handle_name, handle_insurance_type, \ + handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, parse_hospital # 合并信息抽取结果 diff --git a/requirements.txt b/requirements.txt index 6697c5e..200bc87 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,11 @@ numpy==1.26.4 -onnxconverter-common==1.14.0 -OpenCC==1.1.6 +OpenCC==1.1.6 # 中文繁简转换 opencv-python==4.6.0.66 -paddle2onnx==1.2.3 -paddleclas==2.5.2 -paddlenlp==2.6.1 -paddleocr==2.7.3 pillow==10.4.0 pymysql==1.1.1 requests==2.32.3 -sqlacodegen==2.3.0.post1 -sqlalchemy==1.4.52 -tenacity==8.5.0 -ufile==3.2.9 -zxing-cpp==2.2.0 \ No newline at end of file +sqlacodegen==2.3.0.post1 # 实体类生成 +sqlalchemy==1.4.52 # ORM框架 +tenacity==8.5.0 # 重试 +ufile==3.2.9 # 云空间 +zxing-cpp==2.2.0 # 二维码识别 \ No newline at end of file diff --git a/services/__init__.py b/services/__init__.py new file mode 100644 index 0000000..e012747 --- /dev/null +++ b/services/__init__.py @@ -0,0 +1,34 @@ +""" +信息抽取关键词配置 +""" + +# 患者姓名 +PATIENT_NAME = ['患者姓名'] +# 入院日期 +ADMISSION_DATE = ['入院日期'] +# 出院日期 +DISCHARGE_DATE = ['出院日期'] +# 发生医疗费 +MEDICAL_EXPENSES = ['费用总额'] +# 个人现金支付 +PERSONAL_CASH_PAYMENT = ['个人现金支付'] +# 个人账户支付 +PERSONAL_ACCOUNT_PAYMENT = ['个人账户支付'] +# 个人自费金额 +PERSONAL_FUNDED_AMOUNT = ['自费金额', '个人自费'] +# 医保类别 +MEDICAL_INSURANCE_TYPE = ['医保类型'] +# 就诊医院 +HOSPITAL = ['医院'] +# 就诊科室 +DEPARTMENT = ['科室'] +# 主治医生 +DOCTOR = ['主治医生'] +# 住院号 +ADMISSION_ID = ['住院号'] +# 医保结算单号码 +SETTLEMENT_ID = ['医保结算单号码'] +# 年龄 +AGE = ['年龄'] +# 大写总额 +UPPERCASE_MEDICAL_EXPENSES = ['大写总额'] diff --git a/clas_api.py b/services/clas_api.py similarity index 66% rename from clas_api.py rename to services/clas_api.py index f50b9b0..2f7d45c 100644 --- a/clas_api.py +++ b/services/clas_api.py @@ -1,26 +1,26 @@ from flask import Flask, request from paddleclas import PaddleClas -from util.common_util import process_request +from utils import process_request app = Flask(__name__) -CLAS = PaddleClas(model_name="text_image_orientation") +CLAS = PaddleClas(model_name='text_image_orientation') @app.route('/clas/orientation', methods=['POST']) @process_request def orientation(): """ - 判断图片旋转角度,逆时针旋转该角度后为正。可能值["0", "90", "180", "270"] + 判断图片旋转角度,逆时针旋转该角度后为正。可能值['0', '90', '180', '270'] :return: 最有可能的两个角度 """ img_path = request.form.get('img_path') clas_result = CLAS.predict(input_data=img_path) clas_result = next(clas_result)[0] - if clas_result["scores"][0] < 0.5: + if clas_result['scores'][0] < 0.5: return ['0', '90'] - return clas_result["label_names"] + return clas_result['label_names'] if __name__ == '__main__': - app.run('0.0.0.0', 5002) + app.run('0.0.0.0', 5005) diff --git a/services/cost_api.py b/services/cost_api.py new file mode 100644 index 0000000..97c5ebf --- /dev/null +++ b/services/cost_api.py @@ -0,0 +1,24 @@ +import json + +from flask import Flask, request +from paddlenlp import Taskflow + +from utils import process_request +from . import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES + +app = Flask(__name__) +COST_LIST_SCHEMA = PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES +COST = Taskflow('information_extraction', schema=COST_LIST_SCHEMA, model='uie-x-base', + task_path='../model/cost_list_model', layout_analysis=False, precision='fp16') + + +@app.route('/nlp/cost', methods=['POST']) +@process_request +def cost(): + img_path = request.form.get('img_path') + layout = request.form.get('layout') + return COST({'doc': img_path, 'layout': json.loads(layout)}) + + +if __name__ == '__main__': + app.run('0.0.0.0', 5004) diff --git a/det_api.py b/services/det_api.py similarity index 62% rename from det_api.py rename to services/det_api.py index abd8e2c..503021a 100644 --- a/det_api.py +++ b/services/det_api.py @@ -3,9 +3,8 @@ import os.path import cv2 from flask import Flask, request -from paddle_detection import detector -from util import image_util -from util.common_util import process_request +from paddle_services.paddle_detection import detector +from utils import process_request, parse_img_path app = Flask(__name__) @@ -14,10 +13,9 @@ app = Flask(__name__) @process_request def books(): img_path = request.form.get('img_path') - image = cv2.imread(img_path) - result = detector.get_book_areas(image) + result = detector.get_book_areas(img_path) - dirname, img_name, ext = image_util.parse_path(img_path) + dirname, img_name, ext = parse_img_path(img_path) books_path = [] for i in range(len(result)): save_path = os.path.join(dirname, img_name + '_book_' + str(i) + '.' + ext) @@ -27,4 +25,4 @@ def books(): if __name__ == '__main__': - app.run('0.0.0.0', 5000) + app.run('0.0.0.0', 5006) diff --git a/dewarp_api.py b/services/dewarp_api.py similarity index 62% rename from dewarp_api.py rename to services/dewarp_api.py index f755463..8e9b4a2 100644 --- a/dewarp_api.py +++ b/services/dewarp_api.py @@ -3,9 +3,8 @@ import os import cv2 from flask import Flask, request -from doc_dewarp import dewarp -from util import image_util -from util.common_util import process_request +from paddle_services.doc_dewarp import dewarper +from utils import process_request, parse_img_path app = Flask(__name__) @@ -15,12 +14,12 @@ app = Flask(__name__) def dewarp(): img_path = request.form.get('img_path') img = cv2.imread(img_path) - dewarped_img = dewarp.dewarp_image(img) - dirname, img_name, ext = image_util.parse_path(img_path) + dewarped_img = dewarper.dewarp_image(img) + dirname, img_name, ext = parse_img_path(img_path) save_path = os.path.join(dirname, img_name + '_dewarped.' + ext) cv2.imwrite(save_path, dewarped_img) return save_path if __name__ == '__main__': - app.run('0.0.0.0', 5001) + app.run('0.0.0.0', 5007) diff --git a/services/discharge_api.py b/services/discharge_api.py new file mode 100644 index 0000000..5d8d57a --- /dev/null +++ b/services/discharge_api.py @@ -0,0 +1,26 @@ +import json + +from flask import Flask, request +from paddlenlp import Taskflow + +from utils import process_request +from . import HOSPITAL, DEPARTMENT, PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, DOCTOR, ADMISSION_ID, AGE + +app = Flask(__name__) +DISCHARGE_RECORD_SCHEMA = ( + HOSPITAL + DEPARTMENT + PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + DOCTOR + ADMISSION_ID + AGE +) +DISCHARGE = Taskflow('information_extraction', schema=DISCHARGE_RECORD_SCHEMA, model='uie-x-base', + task_path='../model/discharge_record_model', layout_analysis=False, precision='fp16') + + +@app.route('/nlp/discharge', methods=['POST']) +@process_request +def discharge(): + img_path = request.form.get('img_path') + layout = request.form.get('layout') + return DISCHARGE({'doc': img_path, 'layout': json.loads(layout)}) + + +if __name__ == '__main__': + app.run('0.0.0.0', 5003) diff --git a/services/ocr_api.py b/services/ocr_api.py new file mode 100644 index 0000000..b7d271c --- /dev/null +++ b/services/ocr_api.py @@ -0,0 +1,18 @@ +from flask import Flask, request +from paddleocr import PaddleOCR + +from utils import process_request + +app = Flask(__name__) +OCR = PaddleOCR(use_angle_cls=False, show_log=False, gpu_id=0, det_db_box_thresh=0.3) + + +@app.route('/ocr', methods=['POST']) +@process_request +def ocr(): + img_path = request.form.get('img_path') + return OCR.ocr(img_path, cls=False) + + +if __name__ == '__main__': + app.run('0.0.0.0', 5001) diff --git a/services/paddle_services/Dockerfile b/services/paddle_services/Dockerfile new file mode 100644 index 0000000..8f88a10 --- /dev/null +++ b/services/paddle_services/Dockerfile @@ -0,0 +1,29 @@ +# 使用官方的paddle镜像作为基础 +FROM registry.baidubce.com/paddlepaddle/paddle:2.6.1-gpu-cuda12.0-cudnn8.9-trt8.6 + +# 设置工作目录 +WORKDIR /app + +# 设置环境变量 +ENV PYTHONUNBUFFERED=1 \ + # 设置时区 + TZ=Asia/Shanghai \ + # 设置pip镜像地址,加快安装速度 + PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple + +# 安装依赖 +COPY requirements.txt /app/requirements.txt +COPY packages /app/packages +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo '$TZ' > /etc/timezone \ + && pip install --no-cache-dir -r requirements.txt \ + && pip uninstall -y onnxruntime onnxruntime-gpu \ + && pip install onnxruntime-gpu==1.18.0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/ + +# 将当前目录内容复制到容器的/app内 +COPY . /app + +# 暴露端口 +# EXPOSE 8081 + +# 运行api接口,具体接口在命令行或docker-compose.yml文件中定义 +ENTRYPOINT ["gunicorn"] \ No newline at end of file diff --git a/paddle_detection/test_tipc/test_lite.sh b/services/paddle_services/__init__.py similarity index 100% rename from paddle_detection/test_tipc/test_lite.sh rename to services/paddle_services/__init__.py diff --git a/services/paddle_services/doc_dewarp/__init__.py b/services/paddle_services/doc_dewarp/__init__.py index c98d39c..52aac1a 100644 --- a/services/paddle_services/doc_dewarp/__init__.py +++ b/services/paddle_services/doc_dewarp/__init__.py @@ -1,4 +1,7 @@ +import os.path + from onnxruntime import InferenceSession -DOC_TR = InferenceSession("model/dewarp_model/doc_tr_pp.onnx", - providers=["CUDAExecutionProvider"], provider_options=[{"device_id": 0}]) +MODEL_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))), + 'model', 'dewarp_model', 'doc_tr_pp.onnx') +DOC_TR = InferenceSession(MODEL_PATH, providers=['CUDAExecutionProvider'], provider_options=[{'device_id': 0}]) diff --git a/services/paddle_services/doc_dewarp/dewarp.py b/services/paddle_services/doc_dewarp/dewarper.py similarity index 80% rename from services/paddle_services/doc_dewarp/dewarp.py rename to services/paddle_services/doc_dewarp/dewarper.py index 923f56f..b6ffad7 100644 --- a/services/paddle_services/doc_dewarp/dewarp.py +++ b/services/paddle_services/doc_dewarp/dewarper.py @@ -11,10 +11,10 @@ def dewarp_image(image): y = to_tensor(image) img = np.transpose(img, (2, 0, 1)) - bm = DOC_TR.run(None, {"image": img[None,]})[0] + bm = DOC_TR.run(None, {'image': img[None,]})[0] bm = paddle.to_tensor(bm) bm = paddle.nn.functional.interpolate( - bm, y.shape[2:], mode="bilinear", align_corners=False + bm, y.shape[2:], mode='bilinear', align_corners=False ) bm_nhwc = np.transpose(bm, (0, 2, 3, 1)) out = paddle.nn.functional.grid_sample(y, (bm_nhwc / 288 - 0.5) * 2) diff --git a/services/paddle_services/paddle_detection/__init__.py b/services/paddle_services/paddle_detection/__init__.py index f2fd699..202b03e 100644 --- a/services/paddle_services/paddle_detection/__init__.py +++ b/services/paddle_services/paddle_detection/__init__.py @@ -1,4 +1,8 @@ +import os + from onnxruntime import InferenceSession -PADDLE_DET = InferenceSession("model/object_det_model/ppyoloe_plus_crn_l_80e_coco_w_nms.onnx", - providers=["CPUExecutionProvider"], provider_options=[{"device_id": 0}]) +MODEL_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))), + 'model', 'object_det_model') +PADDLE_DET = InferenceSession(os.path.join(MODEL_DIR, 'ppyoloe_plus_crn_l_80e_coco_w_nms.onnx'), + providers=['CPUExecutionProvider'], provider_options=[{'device_id': 0}]) diff --git a/services/paddle_services/paddle_detection/detector.py b/services/paddle_services/paddle_detection/detector.py index f50d28d..a121682 100644 --- a/services/paddle_services/paddle_detection/detector.py +++ b/services/paddle_services/paddle_detection/detector.py @@ -1,13 +1,13 @@ -import tempfile +import os.path from collections import defaultdict import cv2 import numpy as np -from paddle_detection import PADDLE_DET -from paddle_detection.deploy.third_engine.onnx.infer import PredictConfig -from paddle_detection.deploy.third_engine.onnx.preprocess import Compose -from util import image_util, common_util +from util import image_util +from . import PADDLE_DET, MODEL_DIR +from .deploy.third_engine.onnx.infer import PredictConfig +from .deploy.third_engine.onnx.preprocess import Compose def predict_image(infer_config, predictor, img_path): @@ -15,7 +15,7 @@ def predict_image(infer_config, predictor, img_path): transforms = Compose(infer_config.preprocess_infos) # predict image inputs = transforms(img_path) - inputs["image"] = np.array(inputs["image"]).astype('float32') + inputs['image'] = np.array(inputs['image']).astype('float32') inputs_name = [var.name for var in predictor.get_inputs()] inputs = {k: inputs[k][None,] for k in inputs_name} @@ -25,25 +25,23 @@ def predict_image(infer_config, predictor, img_path): result = defaultdict(list) for bbox in bboxes: if bbox[0] > -1 and bbox[1] > infer_config.draw_threshold: - result[bbox[0]].append({"score": bbox[1], "box": bbox[2:]}) + result[bbox[0]].append({'score': bbox[1], 'box': bbox[2:]}) return result def detect_image(img_path): - infer_cfg = "model/object_det_model/infer_cfg.yml" + infer_cfg = os.path.join(MODEL_DIR, 'infer_cfg.yml') # load infer config infer_config = PredictConfig(infer_cfg) return predict_image(infer_config, PADDLE_DET, img_path) -def get_book_areas(image): - with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file: - cv2.imwrite(temp_file.name, image) - detect_result = detect_image(temp_file.name) - common_util.delete_temp_file(temp_file.name) +def get_book_areas(img_path): + detect_result = detect_image(img_path) book_areas = detect_result[73] result = [] + image = cv2.imread(img_path) for book_area in book_areas: - result.append(image_util.capture(image, book_area["box"])) + result.append(image_util.capture(image, book_area['box'])) return result diff --git a/services/paddle_services/requestments.txt b/services/paddle_services/requestments.txt new file mode 100644 index 0000000..6697c5e --- /dev/null +++ b/services/paddle_services/requestments.txt @@ -0,0 +1,16 @@ +numpy==1.26.4 +onnxconverter-common==1.14.0 +OpenCC==1.1.6 +opencv-python==4.6.0.66 +paddle2onnx==1.2.3 +paddleclas==2.5.2 +paddlenlp==2.6.1 +paddleocr==2.7.3 +pillow==10.4.0 +pymysql==1.1.1 +requests==2.32.3 +sqlacodegen==2.3.0.post1 +sqlalchemy==1.4.52 +tenacity==8.5.0 +ufile==3.2.9 +zxing-cpp==2.2.0 \ No newline at end of file diff --git a/services/settlement_api.py b/services/settlement_api.py new file mode 100644 index 0000000..22e8d27 --- /dev/null +++ b/services/settlement_api.py @@ -0,0 +1,30 @@ +import json + +from flask import Flask, request +from paddlenlp import Taskflow + +from utils import process_request +from . import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \ + PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, ADMISSION_ID, SETTLEMENT_ID, \ + UPPERCASE_MEDICAL_EXPENSES + +app = Flask(__name__) +SETTLEMENT_LIST_SCHEMA = ( + PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES + PERSONAL_CASH_PAYMENT + + PERSONAL_ACCOUNT_PAYMENT + PERSONAL_FUNDED_AMOUNT + MEDICAL_INSURANCE_TYPE + ADMISSION_ID + SETTLEMENT_ID + + UPPERCASE_MEDICAL_EXPENSES +) +SETTLEMENT_IE = Taskflow('information_extraction', schema=SETTLEMENT_LIST_SCHEMA, model='uie-x-base', + task_path='../model/settlement_list_model', layout_analysis=False, precision='fp16') + + +@app.route('/nlp/settlement', methods=['POST']) +@process_request +def settlement(): + img_path = request.form.get('img_path') + layout = request.form.get('layout') + return SETTLEMENT_IE({'doc': img_path, 'layout': json.loads(layout)}) + + +if __name__ == '__main__': + app.run('0.0.0.0', 5002) diff --git a/services/utils.py b/services/utils.py new file mode 100644 index 0000000..5edf5f0 --- /dev/null +++ b/services/utils.py @@ -0,0 +1,26 @@ +import logging +import os + +from flask import jsonify + + +def process_request(func): + """ + api通用处理函数 + """ + + def wrapper(*args, **kwargs): + try: + result = func(*args, **kwargs) + return jsonify(result), 200 + except Exception as e: + logging.getLogger('error').error(f'Error: {e}', exc_info=e) + return jsonify({'error': str(e)}), 500 + + return wrapper + + +def parse_img_path(img_path): + dirname = os.path.dirname(img_path) + img_name, ext = os.path.basename(img_path).rsplit('.', 1) + return dirname, img_name, ext diff --git a/util/common_util.py b/util/common_util.py index 6afe668..091b123 100644 --- a/util/common_util.py +++ b/util/common_util.py @@ -2,10 +2,9 @@ import logging import os from datetime import datetime -from flask import jsonify from opencc import OpenCC -from util import string_util +from util import string_util, model_util # 获取yyyy-MM-dd HH:mm:ss格式的当前时间 @@ -37,7 +36,7 @@ def get_ocr_layout(ocr, img_path): return True layout = [] - ocr_result = ocr.ocr(img_path, cls=False) + ocr_result = model_util.request_ocr(img_path) ocr_result = ocr_result[0] if not ocr_result: return layout @@ -218,19 +217,3 @@ def chinese_money_to_number(chinese_money_amount): def traditional_to_simple_chinese(traditional_chinese): converter = OpenCC('t2s') return converter.convert(traditional_chinese) - - -def process_request(func): - """ - api通用处理函数 - """ - - def wrapper(*args, **kwargs): - try: - result = func(*args, **kwargs) - return jsonify(result), 200 - except Exception as e: - logging.getLogger('error').error(f'Error: {e}') - return jsonify({'error': str(e)}), 500 - - return wrapper diff --git a/util/image_util.py b/util/image_util.py index 8c417dc..5a9d4bb 100644 --- a/util/image_util.py +++ b/util/image_util.py @@ -283,9 +283,3 @@ def save_to_local(img_url, save_path=None): file.write(response.content) return save_path - - -def parse_path(img_path): - dirname = os.path.dirname(img_path) - img_name, ext = os.path.basename(img_path).rsplit('.', 1) - return dirname, img_name, ext diff --git a/util/model_util.py b/util/model_util.py index b3291db..5eba445 100644 --- a/util/model_util.py +++ b/util/model_util.py @@ -1,9 +1,93 @@ +import json import logging import requests from tenacity import retry, stop_after_attempt, wait_random +@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, + after=lambda x: logging.warning('OCR识别失败!')) +def request_ocr(img_path): + """ + 请求图片OCR识别接口 + :param img_path: 待识别图片路径 + :return: 识别结果 + """ + url = 'http://ocr_api:5001/ocr' + response = requests.post(url, {'img_path': img_path}) + if response.status_code == 200: + return response.json() + else: + return None + + +@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, + after=lambda x: logging.warning('抽取基本医保结算单失败!')) +def request_discharge_info(img_path, layout): + """ + 请求基本医保结算单信息抽取接口 + :param img_path: 待抽取图片路径 + :param layout: 图片ocr信息 + :return: 抽取结果 + """ + url = 'http://settlement_api:5002/nlp/settlement' + response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)}) + if response.status_code == 200: + return response.json() + else: + return None + + +@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, + after=lambda x: logging.warning('抽取出院记录失败!')) +def request_discharge_info(img_path, layout): + """ + 请求出院记录信息抽取接口 + :param img_path: 待抽取图片路径 + :param layout: 图片ocr信息 + :return: 抽取结果 + """ + url = 'http://discharge_api:5003/nlp/discharge' + response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)}) + if response.status_code == 200: + return response.json() + else: + return None + + +@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, + after=lambda x: logging.warning('抽取费用清单失败!')) +def request_cost_info(img_path, layout): + """ + 请求费用清单信息抽取接口 + :param img_path: 待抽取图片路径 + :param layout: 图片ocr信息 + :return: 抽取结果 + """ + url = 'http://cost_api:5004/nlp/cost' + response = requests.post(url, {'img_path': img_path, 'layout': json.dumps(layout)}) + if response.status_code == 200: + return response.json() + else: + return None + + +@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, + after=lambda x: logging.warning('获取图片方向失败!')) +def request_image_orientation(img_path): + """ + 请求图片方向分类接口 + :param img_path: 待分类图片路径 + :return: 最有可能的两个图片方向 + """ + url = 'http://clas_api:5005/clas/orientation' + response = requests.post(url, {'img_path': img_path}) + if response.status_code == 200: + return response.json() + else: + return ['0', '90'] + + @retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, after=lambda x: logging.warning('获取文档区域失败!')) def request_book_areas(img_path): @@ -12,7 +96,7 @@ def request_book_areas(img_path): :param img_path: 待识别图片路径 :return: 文档图片路径列表 """ - url = 'http://det_api:5000/det/books' + url = 'http://det_api:5006/det/books' response = requests.post(url, {'img_path': img_path}) if response.status_code == 200: return response.json() @@ -28,25 +112,9 @@ def request_dewarped_image(img_path): :param img_path: 待矫正图片路径 :return: 矫正后的图片路径 """ - url = 'http://det_api:5001/dewarp' + url = 'http://127.0.0.1:5007/dewarp' response = requests.post(url, {'img_path': img_path}) if response.status_code == 200: return response.json() else: return img_path - - -@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, - after=lambda x: logging.warning('获取图片方向失败!')) -def request_image_orientation(img_path): - """ - 请求图片方向分类接口 - :param img_path: 待分类图片路径 - :return: 最有可能的两个图片方向 - """ - url = 'http://det_api:5002/clas/orientation' - response = requests.post(url, {'img_path': img_path}) - if response.status_code == 200: - return response.json() - else: - return ['0', '90']