Compare commits
124 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 3044f1fc08 | |||
| da18d890f7 | |||
| ae52d0594e | |||
| d51e56b8f2 | |||
| 83339b5e58 | |||
| 727743d20e | |||
| 814730a0f0 | |||
| c9894d257e | |||
| 68043e5773 | |||
| fe58bb3bfa | |||
| ce44a81603 | |||
| 4b90bf6dfa | |||
| 248e49bf4b | |||
| 401954dca0 | |||
| 6529dc3d98 | |||
| 3f93bd476a | |||
| d85b3fff8f | |||
| 61a7802674 | |||
| 9556da47e9 | |||
| 3710450221 | |||
| 27a4395ca0 | |||
| f116798c30 | |||
| 8c47beb00c | |||
| 74920869e7 | |||
| 9d0db073d6 | |||
| 8e7745f1f6 | |||
| cc53243647 | |||
| 39da0d8a00 | |||
| a2e1f10261 | |||
| e1bd9f3786 | |||
| 46f295d422 | |||
| 1a0caf30d0 | |||
| 25df420be8 | |||
| b5dffaf5bd | |||
| 0e4cfd10b6 | |||
| f98969d957 | |||
| 0c9bed8661 | |||
| d0b4a77817 | |||
| 00e5ca7c30 | |||
| 5dee4ed568 | |||
| 06869e691f | |||
| 8e06fdafa0 | |||
| 84d106c7de | |||
| 9c41fab95c | |||
| 0060c4ad59 | |||
| d374e0743a | |||
| 947b4f20f3 | |||
| 445d57e8c6 | |||
| b09f16fe23 | |||
| c28fc62d3f | |||
| b332aa00dd | |||
| 5af6256376 | |||
| 15ea3ff96f | |||
| 19237d3a3c | |||
| 0b0882d456 | |||
| 304f6897f0 | |||
| a9f172fdb0 | |||
| ac4e4ff8f8 | |||
| f7fbe709bf | |||
| 396550058f | |||
| b9ac638b38 | |||
| 894cab4f0b | |||
| bb6d9c3b47 | |||
| f8280e87ee | |||
| 608a647621 | |||
| 7b9d9ca589 | |||
| d9b24e906d | |||
| 97c7b2cfce | |||
| 004dd12004 | |||
| cc9d020008 | |||
| 7335553080 | |||
| ebb10b2816 | |||
| 98fb9fa861 | |||
| c75415164e | |||
| 03d8652b8f | |||
| e3be5cf4b2 | |||
| c92b549480 | |||
| d36740d729 | |||
| a1dea6f29c | |||
| 0fc0c80d6f | |||
| f3930cc7bd | |||
| a11cefb999 | |||
| 5c0fc0f819 | |||
| 77010f0598 | |||
| e4b58e30c0 | |||
| 15fe5d4f0d | |||
| fc69aa5b9d | |||
| 795134f566 | |||
| a3fa1e502e | |||
| 7a4cb5263a | |||
| 46be9a26be | |||
| f1149854ce | |||
| 117b29a737 | |||
| 3219f28934 | |||
| 2e1c0a57c7 | |||
| 2dcd2d2a34 | |||
| 153eb70f84 | |||
| b5aba0418b | |||
| 603b027ca6 | |||
| d4c54b04f5 | |||
| fc3e7b4ed4 | |||
| a62c2af816 | |||
| 0618754da2 | |||
| c5a03ad16f | |||
| ff9d612e67 | |||
| 86d28096d4 | |||
| 87180cd282 | |||
| f13ffd1fe9 | |||
| 09f62b36a9 | |||
| 186cab0317 | |||
| 101b2126f4 | |||
| d4a695e9ea | |||
| 72794f699e | |||
| 3189caf7aa | |||
| b8c1202957 | |||
| 7647df7d74 | |||
| 3438cf6e0e | |||
| 90a6d5ec75 | |||
| 9c21152823 | |||
| c091a82a91 | |||
| a2a82df21c | |||
| f0c03e763b | |||
| 7b6e78373c | |||
| 65b7126348 |
@@ -238,8 +238,11 @@ cython_debug/
|
|||||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
#.idea/
|
#.idea/
|
||||||
|
|
||||||
# 模型通过卷绑定挂载到容器中
|
# 通过卷绑定挂载到容器中
|
||||||
/model
|
/log
|
||||||
|
/services/paddle_services/log
|
||||||
|
/services/paddle_services/model
|
||||||
|
/tmp_img
|
||||||
# docker
|
# docker
|
||||||
Dockerfile
|
Dockerfile
|
||||||
docker-compose*.yml
|
docker-compose*.yml
|
||||||
8
.gitignore
vendored
8
.gitignore
vendored
@@ -142,7 +142,11 @@ cython_debug/
|
|||||||
.idea
|
.idea
|
||||||
|
|
||||||
### Model
|
### Model
|
||||||
model
|
services/paddle_services/model
|
||||||
|
|
||||||
### Log Backups
|
### Log Backups
|
||||||
*.log.*-*-*
|
*.log.*-*-*
|
||||||
|
|
||||||
|
### Tmp Files
|
||||||
|
/tmp_img
|
||||||
|
/test_img
|
||||||
11
Dockerfile
11
Dockerfile
@@ -1,5 +1,5 @@
|
|||||||
# 使用官方的paddle镜像作为基础
|
# 使用官方的python镜像作为基础
|
||||||
FROM registry.baidubce.com/paddlepaddle/paddle:2.6.1-gpu-cuda12.0-cudnn8.9-trt8.6
|
FROM python:3.10.15-bookworm
|
||||||
|
|
||||||
# 设置工作目录
|
# 设置工作目录
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
@@ -13,11 +13,10 @@ ENV PYTHONUNBUFFERED=1 \
|
|||||||
|
|
||||||
# 安装依赖
|
# 安装依赖
|
||||||
COPY requirements.txt /app/requirements.txt
|
COPY requirements.txt /app/requirements.txt
|
||||||
COPY packages /app/packages
|
|
||||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo '$TZ' > /etc/timezone \
|
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo '$TZ' > /etc/timezone \
|
||||||
&& pip install --no-cache-dir -r requirements.txt \
|
&& sed -i 's|deb.debian.org|mirrors.aliyun.com|g' /etc/apt/sources.list.d/debian.sources \
|
||||||
&& pip uninstall -y onnxruntime onnxruntime-gpu \
|
&& apt-get update && apt-get install libgl1 -y \
|
||||||
&& pip install onnxruntime-gpu==1.18.0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
|
&& pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
# 将当前目录内容复制到容器的/app内
|
# 将当前目录内容复制到容器的/app内
|
||||||
COPY . /app
|
COPY . /app
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
1. 从Git远程仓库克隆项目到本地。
|
1. 从Git远程仓库克隆项目到本地。
|
||||||
|
|
||||||
2. 将深度学习模型复制到./model目录下,具体请看[模型更新](#模型更新)部分。
|
2. 将深度学习模型复制到./services/paddle_services/model目录下,具体请看[模型更新](#模型更新)部分。
|
||||||
|
|
||||||
3. 安装docker和docker-compose。
|
3. 安装docker和docker-compose。
|
||||||
|
|
||||||
@@ -125,4 +125,6 @@ bash update.sh
|
|||||||
1. 新增文档检测功能
|
1. 新增文档检测功能
|
||||||
2. 新增扭曲矫正功能
|
2. 新增扭曲矫正功能
|
||||||
21. 版本号:1.14.0
|
21. 版本号:1.14.0
|
||||||
1. 新增二维码识别替换高清图片功能
|
1. 新增二维码识别替换高清图片功能
|
||||||
|
22. 版本号:2.0.0
|
||||||
|
1. 项目架构调整,模型全部采用接口调用
|
||||||
4
api_test.py
Normal file
4
api_test.py
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
import time
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
time.sleep(3600)
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
# 自动生成数据库表和sqlalchemy对应的Model
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
from db import DB_URL
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
table = input("请输入表名:")
|
|
||||||
out_file = f"db/{table}.py"
|
|
||||||
command = f"sqlacodegen {DB_URL} --outfile={out_file} --tables={table}"
|
|
||||||
|
|
||||||
try:
|
|
||||||
subprocess.run(command, shell=True, check=True)
|
|
||||||
print(f"{table}.py文件生成成功!请检查并复制到合适的文件中!")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"生成{table}.py文件时发生错误: {e}")
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
|
|
||||||
@@ -1,102 +0,0 @@
|
|||||||
import datetime
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
from decimal import Decimal
|
|
||||||
from io import BytesIO
|
|
||||||
from itertools import groupby
|
|
||||||
|
|
||||||
import requests
|
|
||||||
from PIL import ImageDraw, Image, ImageFont
|
|
||||||
|
|
||||||
from db import MysqlSession
|
|
||||||
from db.mysql import ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxIeResult, ZxPhrec
|
|
||||||
from ucloud import ufile
|
|
||||||
from util import image_util
|
|
||||||
|
|
||||||
|
|
||||||
def check_ie_result(pk_phhd):
|
|
||||||
os.makedirs(f"./check_result/{pk_phhd}", exist_ok=True)
|
|
||||||
json_result = {"pk_phhd": pk_phhd}
|
|
||||||
session = MysqlSession()
|
|
||||||
phhd = session.query(ZxPhhd.cXm).filter(ZxPhhd.pk_phhd == pk_phhd).one()
|
|
||||||
json_result["cXm"] = phhd.cXm
|
|
||||||
settlement = (session.query(ZxIeSettlement.pk_ie_settlement, ZxIeSettlement.name, ZxIeSettlement.admission_date,
|
|
||||||
ZxIeSettlement.discharge_date, ZxIeSettlement.medical_expenses,
|
|
||||||
ZxIeSettlement.personal_cash_payment, ZxIeSettlement.personal_account_payment,
|
|
||||||
ZxIeSettlement.personal_funded_amount, ZxIeSettlement.medical_insurance_type,
|
|
||||||
ZxIeSettlement.admission_id, ZxIeSettlement.settlement_id)
|
|
||||||
.filter(ZxIeSettlement.pk_phhd == pk_phhd).one())
|
|
||||||
settlement_result = settlement._asdict()
|
|
||||||
json_result["settlement"] = settlement_result
|
|
||||||
|
|
||||||
discharge = (session.query(ZxIeDischarge.pk_ie_discharge, ZxIeDischarge.hospital, ZxIeDischarge.pk_yljg,
|
|
||||||
ZxIeDischarge.department, ZxIeDischarge.pk_ylks, ZxIeDischarge.name, ZxIeDischarge.age,
|
|
||||||
ZxIeDischarge.admission_date, ZxIeDischarge.discharge_date, ZxIeDischarge.doctor,
|
|
||||||
ZxIeDischarge.admission_id)
|
|
||||||
.filter(ZxIeDischarge.pk_phhd == pk_phhd).one())
|
|
||||||
discharge_result = discharge._asdict()
|
|
||||||
json_result["discharge"] = discharge_result
|
|
||||||
|
|
||||||
cost = session.query(ZxIeCost.pk_ie_cost, ZxIeCost.name, ZxIeCost.admission_date, ZxIeCost.discharge_date,
|
|
||||||
ZxIeCost.medical_expenses).filter(ZxIeCost.pk_phhd == pk_phhd).one()
|
|
||||||
cost_result = cost._asdict()
|
|
||||||
json_result["cost"] = cost_result
|
|
||||||
|
|
||||||
phrecs = session.query(ZxPhrec.pk_phrec, ZxPhrec.pk_phhd, ZxPhrec.cRectype, ZxPhrec.cfjaddress).filter(
|
|
||||||
ZxPhrec.pk_phhd == pk_phhd).all()
|
|
||||||
for phrec in phrecs:
|
|
||||||
img_name = phrec.cfjaddress
|
|
||||||
img_path = ufile.get_private_url(img_name)
|
|
||||||
|
|
||||||
response = requests.get(img_path)
|
|
||||||
image = Image.open(BytesIO(response.content)).convert("RGB")
|
|
||||||
font_size = image.width * image.height / 200000
|
|
||||||
font = ImageFont.truetype("./font/simfang.ttf", size=font_size)
|
|
||||||
|
|
||||||
ocr = session.query(ZxIeResult.id, ZxIeResult.content, ZxIeResult.rotation_angle, ZxIeResult.x_offset,
|
|
||||||
ZxIeResult.y_offset).filter(ZxIeResult.pk_phrec == phrec.pk_phrec).all()
|
|
||||||
if not ocr:
|
|
||||||
os.makedirs(f"./check_result/{pk_phhd}/0", exist_ok=True)
|
|
||||||
image.save(f"./check_result/{pk_phhd}/0/{img_name}")
|
|
||||||
|
|
||||||
for _, group_results in groupby(ocr, key=lambda x: x.id):
|
|
||||||
draw = ImageDraw.Draw(image)
|
|
||||||
for ocr_item in group_results:
|
|
||||||
result = json.loads(ocr_item.content)
|
|
||||||
rotation_angle = ocr_item.rotation_angle
|
|
||||||
x_offset = ocr_item.x_offset
|
|
||||||
y_offset = ocr_item.y_offset
|
|
||||||
for key in result:
|
|
||||||
for value in result[key]:
|
|
||||||
box = value["bbox"][0]
|
|
||||||
|
|
||||||
if rotation_angle:
|
|
||||||
box = image_util.invert_rotate_rectangle(box, (image.width / 2, image.height / 2),
|
|
||||||
rotation_angle)
|
|
||||||
if x_offset:
|
|
||||||
box[0] += x_offset
|
|
||||||
box[2] += x_offset
|
|
||||||
if y_offset:
|
|
||||||
box[1] += y_offset
|
|
||||||
box[3] += y_offset
|
|
||||||
|
|
||||||
draw.rectangle(box, outline="red", width=2) # 绘制矩形
|
|
||||||
draw.text((box[0], box[1] - font_size), key, fill="blue", font=font) # 在矩形上方绘制文本
|
|
||||||
draw.text((box[0], box[3]), value["text"], fill="blue", font=font) # 在矩形下方绘制文本
|
|
||||||
os.makedirs(f"./check_result/{pk_phhd}/{ocr_item.id}", exist_ok=True)
|
|
||||||
image.save(f"./check_result/{pk_phhd}/{ocr_item.id}/{img_name}")
|
|
||||||
session.close()
|
|
||||||
|
|
||||||
# 自定义JSON处理器
|
|
||||||
def default(obj):
|
|
||||||
if isinstance(obj, Decimal):
|
|
||||||
return float(obj)
|
|
||||||
if isinstance(obj, datetime.date):
|
|
||||||
return obj.strftime("%Y-%m-%d")
|
|
||||||
|
|
||||||
with open(f"./check_result/{pk_phhd}/result.json", "w", encoding="utf-8") as json_file:
|
|
||||||
json.dump(json_result, json_file, indent=4, ensure_ascii=False, default=default)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
check_ie_result(0)
|
|
||||||
12
db/mysql.py
12
db/mysql.py
@@ -63,6 +63,8 @@ class ZxIeCost(Base):
|
|||||||
discharge_date = Column(Date, comment='出院日期')
|
discharge_date = Column(Date, comment='出院日期')
|
||||||
medical_expenses_str = Column(String(255), comment='费用总额字符串')
|
medical_expenses_str = Column(String(255), comment='费用总额字符串')
|
||||||
medical_expenses = Column(DECIMAL(18, 2), comment='费用总额')
|
medical_expenses = Column(DECIMAL(18, 2), comment='费用总额')
|
||||||
|
page_nums = Column(String(255), comment='页码')
|
||||||
|
page_count = Column(TINYINT(4), comment='页数')
|
||||||
create_time = Column(DateTime, server_default=text("CURRENT_TIMESTAMP"), comment='创建时间')
|
create_time = Column(DateTime, server_default=text("CURRENT_TIMESTAMP"), comment='创建时间')
|
||||||
creator = Column(String(255), comment='创建人')
|
creator = Column(String(255), comment='创建人')
|
||||||
update_time = Column(DateTime, server_default=text("CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP"),
|
update_time = Column(DateTime, server_default=text("CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP"),
|
||||||
@@ -120,7 +122,7 @@ class ZxIeResult(Base):
|
|||||||
pk_ocr = Column(INTEGER(11), primary_key=True, comment='图片OCR识别主键')
|
pk_ocr = Column(INTEGER(11), primary_key=True, comment='图片OCR识别主键')
|
||||||
pk_phhd = Column(INTEGER(11), nullable=False, comment='报销单主键')
|
pk_phhd = Column(INTEGER(11), nullable=False, comment='报销单主键')
|
||||||
pk_phrec = Column(INTEGER(11), nullable=False, comment='图片主键')
|
pk_phrec = Column(INTEGER(11), nullable=False, comment='图片主键')
|
||||||
id = Column(INTEGER(11), nullable=False, comment='识别批次')
|
id = Column(CHAR(32), nullable=False, comment='识别批次')
|
||||||
cfjaddress = Column(String(200), nullable=False, comment='云存储文件名')
|
cfjaddress = Column(String(200), nullable=False, comment='云存储文件名')
|
||||||
content = Column(String(5000), comment='OCR识别内容')
|
content = Column(String(5000), comment='OCR识别内容')
|
||||||
rotation_angle = Column(INTEGER(11), comment='旋转角度')
|
rotation_angle = Column(INTEGER(11), comment='旋转角度')
|
||||||
@@ -412,17 +414,19 @@ class ZxIeReview(Base):
|
|||||||
pk_ie_review = Column(INTEGER(11), primary_key=True, comment='自动审核主键')
|
pk_ie_review = Column(INTEGER(11), primary_key=True, comment='自动审核主键')
|
||||||
pk_phhd = Column(INTEGER(11), nullable=False, comment='报销案子主键')
|
pk_phhd = Column(INTEGER(11), nullable=False, comment='报销案子主键')
|
||||||
success = Column(BIT(1))
|
success = Column(BIT(1))
|
||||||
|
integrity = Column(BIT(1))
|
||||||
has_settlement = Column(BIT(1))
|
has_settlement = Column(BIT(1))
|
||||||
has_discharge = Column(BIT(1))
|
has_discharge = Column(BIT(1))
|
||||||
has_cost = Column(BIT(1))
|
has_cost = Column(BIT(1))
|
||||||
full_page = Column(BIT(1))
|
full_page = Column(BIT(1))
|
||||||
page_description = Column(String(255), comment='具体缺页描述')
|
page_description = Column(String(255), comment='具体缺页描述')
|
||||||
|
consistency = Column(BIT(1), comment='三项资料一致性。0:不一致;1:一致')
|
||||||
name_match = Column(CHAR(1), server_default=text("'0'"),
|
name_match = Column(CHAR(1), server_default=text("'0'"),
|
||||||
comment='三项资料姓名是否一致。0:不一致;1:一致;2:结算单不一致;3:出院记录不一致;4:费用清单不一致;5:与报销申请对象不一致')
|
comment='三项资料姓名是否一致。0:都不一致;1:一致;2:结算单不一致;3:出院记录不一致;4:费用清单不一致;5:与报销申请对象不一致')
|
||||||
admission_date_match = Column(CHAR(1), server_default=text("'0'"),
|
admission_date_match = Column(CHAR(1), server_default=text("'0'"),
|
||||||
comment='三项资料入院日期是否一致。0:不一致;1:一致;2:结算单不一致;3:出院记录不一致;4:费用清单不一致')
|
comment='三项资料入院日期是否一致。0:都不一致;1:一致;2:结算单不一致;3:出院记录不一致;4:费用清单不一致')
|
||||||
discharge_date_match = Column(CHAR(1), server_default=text("'0'"),
|
discharge_date_match = Column(CHAR(1), server_default=text("'0'"),
|
||||||
comment='三项资料出院日期是否一致。0:不一致;1:一致;2:结算单不一致;3:出院记录不一致;4:费用清单不一致')
|
comment='三项资料出院日期是否一致。0:都不一致;1:一致;2:结算单不一致;3:出院记录不一致;4:费用清单不一致')
|
||||||
create_time = Column(DateTime, server_default=text("CURRENT_TIMESTAMP"), comment='创建时间')
|
create_time = Column(DateTime, server_default=text("CURRENT_TIMESTAMP"), comment='创建时间')
|
||||||
creator = Column(String(255), comment='创建人')
|
creator = Column(String(255), comment='创建人')
|
||||||
update_time = Column(DateTime, server_default=text("CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP"),
|
update_time = Column(DateTime, server_default=text("CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP"),
|
||||||
|
|||||||
32
det_api.py
32
det_api.py
@@ -1,32 +0,0 @@
|
|||||||
import base64
|
|
||||||
|
|
||||||
import cv2
|
|
||||||
import numpy as np
|
|
||||||
from flask import Flask, request, jsonify
|
|
||||||
|
|
||||||
from paddle_detection import detector
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/det/detect_books", methods=['POST'])
|
|
||||||
def detect_books():
|
|
||||||
try:
|
|
||||||
file = request.files['image']
|
|
||||||
image_data = file.read()
|
|
||||||
nparr = np.frombuffer(image_data, np.uint8)
|
|
||||||
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
|
|
||||||
result = detector.get_book_areas(image)
|
|
||||||
encoded_images = []
|
|
||||||
for i in result:
|
|
||||||
_, encoded_image = cv2.imencode('.jpg', i)
|
|
||||||
byte_stream = encoded_image.tobytes()
|
|
||||||
img_str = base64.b64encode(byte_stream).decode('utf-8')
|
|
||||||
encoded_images.append(img_str)
|
|
||||||
return jsonify(encoded_images), 200
|
|
||||||
except Exception as e:
|
|
||||||
return jsonify({'error': str(e)}), 500
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
app.run("0.0.0.0")
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
from onnxruntime import InferenceSession
|
|
||||||
|
|
||||||
DOC_TR = InferenceSession("model/dewarp_model/doc_tr_pp.onnx",
|
|
||||||
providers=["CUDAExecutionProvider"], provider_options=[{"device_id": 0}])
|
|
||||||
@@ -1,46 +1,32 @@
|
|||||||
x-env:
|
x-base:
|
||||||
&template
|
&base_template
|
||||||
image: fcb_photo_review:1.14.6
|
|
||||||
restart: always
|
restart: always
|
||||||
|
|
||||||
x-review:
|
x-project:
|
||||||
&review_template
|
&project_template
|
||||||
<<: *template
|
<<: *base_template
|
||||||
|
image: fcb_photo_review:2.0.0
|
||||||
volumes:
|
volumes:
|
||||||
- ./log:/app/log
|
- ./log:/app/log
|
||||||
- ./model:/app/model
|
- ./tmp_img:/app/tmp_img
|
||||||
deploy:
|
|
||||||
resources:
|
|
||||||
reservations:
|
|
||||||
devices:
|
|
||||||
- device_ids: [ '0', '1' ]
|
|
||||||
capabilities: [ 'gpu' ]
|
|
||||||
driver: 'nvidia'
|
|
||||||
|
|
||||||
x-mask:
|
x-paddle:
|
||||||
&mask_template
|
&paddle_template
|
||||||
<<: *template
|
<<: *base_template
|
||||||
|
image: fcb_paddle:0.0.1
|
||||||
volumes:
|
volumes:
|
||||||
- ./log:/app/log
|
- ./services/paddle_services/log:/app/log
|
||||||
deploy:
|
- ./services/paddle_services/model:/app/model
|
||||||
resources:
|
- ./tmp_img:/app/tmp_img
|
||||||
reservations:
|
|
||||||
devices:
|
|
||||||
- device_ids: [ '1' ]
|
|
||||||
capabilities: [ 'gpu' ]
|
|
||||||
driver: 'nvidia'
|
|
||||||
|
|
||||||
services:
|
services:
|
||||||
det_api:
|
ocr:
|
||||||
<<: *template
|
<<: *paddle_template
|
||||||
build:
|
build:
|
||||||
context: .
|
context: ./services/paddle_services
|
||||||
container_name: det_api
|
container_name: ocr
|
||||||
hostname: det_api
|
hostname: ocr
|
||||||
volumes:
|
command: [ '-w', '4', 'ocr:app', '--bind', '0.0.0.0:5001' ]
|
||||||
- ./log:/app/log
|
|
||||||
- ./model:/app/model
|
|
||||||
# command: [ 'det_api.py' ]
|
|
||||||
deploy:
|
deploy:
|
||||||
resources:
|
resources:
|
||||||
reservations:
|
reservations:
|
||||||
@@ -49,16 +35,116 @@ services:
|
|||||||
capabilities: [ 'gpu' ]
|
capabilities: [ 'gpu' ]
|
||||||
driver: 'nvidia'
|
driver: 'nvidia'
|
||||||
|
|
||||||
|
ie_settlement:
|
||||||
|
<<: *paddle_template
|
||||||
|
container_name: ie_settlement
|
||||||
|
hostname: ie_settlement
|
||||||
|
command: [ '-w', '5', 'ie_settlement:app', '--bind', '0.0.0.0:5002' ]
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- device_ids: [ '0' ]
|
||||||
|
capabilities: [ 'gpu' ]
|
||||||
|
driver: 'nvidia'
|
||||||
|
|
||||||
|
ie_discharge:
|
||||||
|
<<: *paddle_template
|
||||||
|
container_name: ie_discharge
|
||||||
|
hostname: ie_discharge
|
||||||
|
command: [ '-w', '5', 'ie_discharge:app', '--bind', '0.0.0.0:5003' ]
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- device_ids: [ '1' ]
|
||||||
|
capabilities: [ 'gpu' ]
|
||||||
|
driver: 'nvidia'
|
||||||
|
|
||||||
|
ie_cost:
|
||||||
|
<<: *paddle_template
|
||||||
|
container_name: ie_cost
|
||||||
|
hostname: ie_cost
|
||||||
|
command: [ '-w', '5', 'ie_cost:app', '--bind', '0.0.0.0:5004' ]
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- device_ids: [ '1' ]
|
||||||
|
capabilities: [ 'gpu' ]
|
||||||
|
driver: 'nvidia'
|
||||||
|
|
||||||
|
clas_orientation:
|
||||||
|
<<: *paddle_template
|
||||||
|
container_name: clas_orientation
|
||||||
|
hostname: clas_orientation
|
||||||
|
command: [ '-w', '3', 'clas_orientation:app', '--bind', '0.0.0.0:5005' ]
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- device_ids: [ '0' ]
|
||||||
|
capabilities: [ 'gpu' ]
|
||||||
|
driver: 'nvidia'
|
||||||
|
|
||||||
|
det_book:
|
||||||
|
<<: *paddle_template
|
||||||
|
container_name: det_book
|
||||||
|
hostname: det_book
|
||||||
|
command: [ '-w', '4', 'det_book:app', '--bind', '0.0.0.0:5006' ]
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- device_ids: [ '1' ]
|
||||||
|
capabilities: [ 'gpu' ]
|
||||||
|
driver: 'nvidia'
|
||||||
|
|
||||||
|
dewarp:
|
||||||
|
<<: *paddle_template
|
||||||
|
container_name: dewarp
|
||||||
|
hostname: dewarp
|
||||||
|
command: [ '-w', '4', 'dewarp:app', '--bind', '0.0.0.0:5007' ]
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- device_ids: [ '0' ]
|
||||||
|
capabilities: [ 'gpu' ]
|
||||||
|
driver: 'nvidia'
|
||||||
|
|
||||||
|
# clas_text:
|
||||||
|
# <<: *paddle_template
|
||||||
|
# container_name: clas_text
|
||||||
|
# hostname: clas_text
|
||||||
|
# command: [ '-w', '3', 'clas_text:app', '--bind', '0.0.0.0:5008' ]
|
||||||
|
# deploy:
|
||||||
|
# resources:
|
||||||
|
# reservations:
|
||||||
|
# devices:
|
||||||
|
# - device_ids: [ '1' ]
|
||||||
|
# capabilities: [ 'gpu' ]
|
||||||
|
# driver: 'nvidia'
|
||||||
|
|
||||||
photo_review_1:
|
photo_review_1:
|
||||||
<<: *review_template
|
<<: *project_template
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
container_name: photo_review_1
|
container_name: photo_review_1
|
||||||
hostname: photo_review_1
|
hostname: photo_review_1
|
||||||
depends_on:
|
depends_on:
|
||||||
- det_api
|
- ocr
|
||||||
|
- ie_settlement
|
||||||
|
- ie_discharge
|
||||||
|
- ie_cost
|
||||||
|
- clas_orientation
|
||||||
|
- det_book
|
||||||
|
- dewarp
|
||||||
|
# - clas_text
|
||||||
command: [ 'photo_review.py', '--clean', 'True' ]
|
command: [ 'photo_review.py', '--clean', 'True' ]
|
||||||
|
|
||||||
photo_review_2:
|
photo_review_2:
|
||||||
<<: *review_template
|
<<: *project_template
|
||||||
container_name: photo_review_2
|
container_name: photo_review_2
|
||||||
hostname: photo_review_2
|
hostname: photo_review_2
|
||||||
depends_on:
|
depends_on:
|
||||||
@@ -66,57 +152,41 @@ services:
|
|||||||
command: [ 'photo_review.py' ]
|
command: [ 'photo_review.py' ]
|
||||||
|
|
||||||
photo_review_3:
|
photo_review_3:
|
||||||
<<: *review_template
|
<<: *project_template
|
||||||
container_name: photo_review_3
|
container_name: photo_review_3
|
||||||
hostname: photo_review_3
|
hostname: photo_review_3
|
||||||
depends_on:
|
depends_on:
|
||||||
- photo_review_2
|
- photo_review_1
|
||||||
command: [ 'photo_review.py' ]
|
command: [ 'photo_review.py' ]
|
||||||
|
|
||||||
photo_review_4:
|
photo_review_4:
|
||||||
<<: *review_template
|
<<: *project_template
|
||||||
container_name: photo_review_4
|
container_name: photo_review_4
|
||||||
hostname: photo_review_4
|
hostname: photo_review_4
|
||||||
depends_on:
|
depends_on:
|
||||||
- photo_review_3
|
- photo_review_1
|
||||||
command: [ 'photo_review.py' ]
|
command: [ 'photo_review.py' ]
|
||||||
|
|
||||||
photo_review_5:
|
photo_review_5:
|
||||||
<<: *review_template
|
<<: *project_template
|
||||||
container_name: photo_review_5
|
container_name: photo_review_5
|
||||||
hostname: photo_review_5
|
hostname: photo_review_5
|
||||||
depends_on:
|
depends_on:
|
||||||
- photo_review_4
|
- photo_review_1
|
||||||
command: [ 'photo_review.py' ]
|
command: [ 'photo_review.py' ]
|
||||||
|
|
||||||
photo_mask_1:
|
photo_mask_1:
|
||||||
<<: *mask_template
|
<<: *project_template
|
||||||
container_name: photo_mask_1
|
container_name: photo_mask_1
|
||||||
hostname: photo_mask_1
|
hostname: photo_mask_1
|
||||||
depends_on:
|
depends_on:
|
||||||
- photo_review_5
|
- photo_review_1
|
||||||
command: [ 'photo_mask.py', '--clean', 'True' ]
|
command: [ 'photo_mask.py', '--clean', 'True' ]
|
||||||
|
|
||||||
photo_mask_2:
|
photo_mask_2:
|
||||||
<<: *mask_template
|
<<: *project_template
|
||||||
container_name: photo_mask_2
|
container_name: photo_mask_2
|
||||||
hostname: photo_mask_2
|
hostname: photo_mask_2
|
||||||
depends_on:
|
depends_on:
|
||||||
- photo_mask_1
|
- photo_mask_1
|
||||||
command: [ 'photo_mask.py' ]
|
command: [ 'photo_mask.py' ]
|
||||||
#
|
|
||||||
# photo_review_6:
|
|
||||||
# <<: *review_template
|
|
||||||
# container_name: photo_review_6
|
|
||||||
# hostname: photo_review_6
|
|
||||||
# depends_on:
|
|
||||||
# - photo_mask_2
|
|
||||||
# command: [ 'photo_review.py' ]
|
|
||||||
#
|
|
||||||
# photo_review_7:
|
|
||||||
# <<: *review_template
|
|
||||||
# container_name: photo_review_7
|
|
||||||
# hostname: photo_review_7
|
|
||||||
# depends_on:
|
|
||||||
# - photo_review_6
|
|
||||||
# command: [ 'photo_review.py' ]
|
|
||||||
@@ -1,13 +1,15 @@
|
|||||||
import os
|
import os
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
|
# 项目根目录
|
||||||
|
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
# 获取主机名,方便区分容器
|
# 获取主机名,方便区分容器
|
||||||
HOSTNAME = socket.gethostname()
|
HOSTNAME = socket.gethostname()
|
||||||
# 检测日志文件的路径是否存在,不存在则创建
|
# 检测日志文件的路径是否存在,不存在则创建
|
||||||
LOG_PATHS = [
|
LOG_PATHS = [
|
||||||
f"log/{HOSTNAME}/ucloud",
|
os.path.join(PROJECT_ROOT, 'log', HOSTNAME, 'ucloud'),
|
||||||
f"log/{HOSTNAME}/error",
|
os.path.join(PROJECT_ROOT, 'log', HOSTNAME, 'error'),
|
||||||
f"log/{HOSTNAME}/qr",
|
os.path.join(PROJECT_ROOT, 'log', HOSTNAME, 'qr'),
|
||||||
]
|
]
|
||||||
for path in LOG_PATHS:
|
for path in LOG_PATHS:
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
|
|||||||
@@ -8,15 +8,13 @@ MAX_WAIT_TIME = 3
|
|||||||
# 程序异常短信配置
|
# 程序异常短信配置
|
||||||
ERROR_EMAIL_CONFIG = {
|
ERROR_EMAIL_CONFIG = {
|
||||||
# SMTP服务器地址
|
# SMTP服务器地址
|
||||||
"smtp_server": "smtp.163.com",
|
'smtp_server': 'smtp.163.com',
|
||||||
# 连接SMTP的端口
|
# 连接SMTP的端口
|
||||||
"port": 994,
|
'port': 994,
|
||||||
# 发件人邮箱地址,请确保开启了SMTP邮件服务!
|
# 发件人邮箱地址,请确保开启了SMTP邮件服务!
|
||||||
"sender": "EchoLiu618@163.com",
|
'sender': 'EchoLiu618@163.com',
|
||||||
# 授权码--用于登录第三方邮件客户端的专用密码,不是邮箱密码
|
# 授权码--用于登录第三方邮件客户端的专用密码,不是邮箱密码
|
||||||
"authorization_code": "OKPQLIIVLVGRZYVH",
|
'authorization_code': 'OKPQLIIVLVGRZYVH',
|
||||||
# 收件人邮箱地址
|
# 收件人邮箱地址
|
||||||
"receivers": ["1515783401@qq.com"],
|
'receivers': ['1515783401@qq.com'],
|
||||||
# 尝试次数
|
|
||||||
"retry_times": 3,
|
|
||||||
}
|
}
|
||||||
@@ -5,18 +5,18 @@ from email.mime.text import MIMEText
|
|||||||
|
|
||||||
from tenacity import retry, stop_after_attempt, wait_random
|
from tenacity import retry, stop_after_attempt, wait_random
|
||||||
|
|
||||||
from auto_email import ERROR_EMAIL_CONFIG, TRY_TIMES, MIN_WAIT_TIME, MAX_WAIT_TIME
|
|
||||||
from log import HOSTNAME
|
from log import HOSTNAME
|
||||||
|
from my_email import ERROR_EMAIL_CONFIG, TRY_TIMES, MIN_WAIT_TIME, MAX_WAIT_TIME
|
||||||
|
|
||||||
|
|
||||||
@retry(stop=stop_after_attempt(TRY_TIMES), wait=wait_random(MIN_WAIT_TIME, MAX_WAIT_TIME), reraise=True,
|
@retry(stop=stop_after_attempt(TRY_TIMES), wait=wait_random(MIN_WAIT_TIME, MAX_WAIT_TIME), reraise=True,
|
||||||
after=lambda x: logging.warning("发送邮件失败!"))
|
after=lambda x: logging.warning('发送邮件失败!'))
|
||||||
def send_email(email_config, massage):
|
def send_email(email_config, massage):
|
||||||
smtp_server = email_config["smtp_server"]
|
smtp_server = email_config['smtp_server']
|
||||||
port = email_config["port"]
|
port = email_config['port']
|
||||||
sender = email_config["sender"]
|
sender = email_config['sender']
|
||||||
authorization_code = email_config["authorization_code"]
|
authorization_code = email_config['authorization_code']
|
||||||
receivers = email_config["receivers"]
|
receivers = email_config['receivers']
|
||||||
mail = smtplib.SMTP_SSL(smtp_server, port) # 连接SMTP服务
|
mail = smtplib.SMTP_SSL(smtp_server, port) # 连接SMTP服务
|
||||||
mail.login(sender, authorization_code) # 登录到SMTP服务
|
mail.login(sender, authorization_code) # 登录到SMTP服务
|
||||||
mail.sendmail(sender, receivers, massage.as_string()) # 发送邮件
|
mail.sendmail(sender, receivers, massage.as_string()) # 发送邮件
|
||||||
@@ -34,13 +34,13 @@ def send_error_email(program_name, error_name, error_detail):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# SMTP 服务器配置
|
# SMTP 服务器配置
|
||||||
sender = ERROR_EMAIL_CONFIG["sender"]
|
sender = ERROR_EMAIL_CONFIG['sender']
|
||||||
receivers = ERROR_EMAIL_CONFIG["receivers"]
|
receivers = ERROR_EMAIL_CONFIG['receivers']
|
||||||
|
|
||||||
# 获取程序出错的时间
|
# 获取程序出错的时间
|
||||||
error_time = datetime.datetime.strftime(datetime.datetime.today(), "%Y-%m-%d %H:%M:%S:%f")
|
error_time = datetime.datetime.strftime(datetime.datetime.today(), '%Y-%m-%d %H:%M:%S:%f')
|
||||||
# 邮件内容
|
# 邮件内容
|
||||||
subject = f"【程序异常提醒】{program_name}({HOSTNAME}) {error_time}" # 邮件的标题
|
subject = f'【程序异常提醒】{program_name}({HOSTNAME}) {error_time}' # 邮件的标题
|
||||||
content = f'''<div class="emailcontent" style="width:100%;max-width:720px;text-align:left;margin:0 auto;padding-top:80px;padding-bottom:20px">
|
content = f'''<div class="emailcontent" style="width:100%;max-width:720px;text-align:left;margin:0 auto;padding-top:80px;padding-bottom:20px">
|
||||||
<div class="emailtitle">
|
<div class="emailtitle">
|
||||||
<h1 style="color:#fff;background:#51a0e3;line-height:70px;font-size:24px;font-weight:400;padding-left:40px;margin:0">程序运行异常通知</h1>
|
<h1 style="color:#fff;background:#51a0e3;line-height:70px;font-size:24px;font-weight:400;padding-left:40px;margin:0">程序运行异常通知</h1>
|
||||||
@@ -1 +0,0 @@
|
|||||||
README_cn.md
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
from onnxruntime import InferenceSession
|
|
||||||
|
|
||||||
PADDLE_DET = InferenceSession("model/object_det_model/ppyoloe_plus_crn_l_80e_coco_w_nms.onnx",
|
|
||||||
providers=["CPUExecutionProvider"], provider_options=[{"device_id": 0}])
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
README_cn.md
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
README_cn.md
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
README_cn.md
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
README_cn.md
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
README_cn.md
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
README_cn.md
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
README_cn.md
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
README_cn.md
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
README_cn.md
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
README_en.md
|
|
||||||
@@ -1,76 +0,0 @@
|
|||||||
import base64
|
|
||||||
import logging
|
|
||||||
import tempfile
|
|
||||||
from collections import defaultdict
|
|
||||||
|
|
||||||
import cv2
|
|
||||||
import numpy as np
|
|
||||||
import requests
|
|
||||||
from tenacity import retry, stop_after_attempt, wait_random
|
|
||||||
|
|
||||||
from paddle_detection import PADDLE_DET
|
|
||||||
from paddle_detection.deploy.third_engine.onnx.infer import PredictConfig
|
|
||||||
from paddle_detection.deploy.third_engine.onnx.preprocess import Compose
|
|
||||||
from util import image_util, util
|
|
||||||
|
|
||||||
|
|
||||||
def predict_image(infer_config, predictor, img_path):
|
|
||||||
# load preprocess transforms
|
|
||||||
transforms = Compose(infer_config.preprocess_infos)
|
|
||||||
# predict image
|
|
||||||
inputs = transforms(img_path)
|
|
||||||
inputs["image"] = np.array(inputs["image"]).astype('float32')
|
|
||||||
inputs_name = [var.name for var in predictor.get_inputs()]
|
|
||||||
inputs = {k: inputs[k][None,] for k in inputs_name}
|
|
||||||
|
|
||||||
outputs = predictor.run(output_names=None, input_feed=inputs)
|
|
||||||
|
|
||||||
bboxes = np.array(outputs[0])
|
|
||||||
result = defaultdict(list)
|
|
||||||
for bbox in bboxes:
|
|
||||||
if bbox[0] > -1 and bbox[1] > infer_config.draw_threshold:
|
|
||||||
result[bbox[0]].append({"score": bbox[1], "box": bbox[2:]})
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def detect_image(img_path):
|
|
||||||
infer_cfg = "model/object_det_model/infer_cfg.yml"
|
|
||||||
# load infer config
|
|
||||||
infer_config = PredictConfig(infer_cfg)
|
|
||||||
|
|
||||||
return predict_image(infer_config, PADDLE_DET, img_path)
|
|
||||||
|
|
||||||
|
|
||||||
def get_book_areas(image):
|
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
|
||||||
cv2.imwrite(temp_file.name, image)
|
|
||||||
detect_result = detect_image(temp_file.name)
|
|
||||||
util.delete_temp_file(temp_file.name)
|
|
||||||
book_areas = detect_result[73]
|
|
||||||
result = []
|
|
||||||
for book_area in book_areas:
|
|
||||||
result.append(image_util.capture(image, book_area["box"]))
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
|
||||||
after=lambda x: logging.warning("获取文档区域失败!"))
|
|
||||||
def request_book_areas(image):
|
|
||||||
url = "http://det_api:5000/det/detect_books"
|
|
||||||
_, encoded_image = cv2.imencode('.jpg', image)
|
|
||||||
byte_stream = encoded_image.tobytes()
|
|
||||||
files = {"image": ("image.jpg", byte_stream)}
|
|
||||||
response = requests.post(url, files=files)
|
|
||||||
if response.status_code == 200:
|
|
||||||
img_str_list = response.json()
|
|
||||||
result = []
|
|
||||||
for img_str in img_str_list:
|
|
||||||
img_data = base64.b64decode(img_str)
|
|
||||||
np_array = np.frombuffer(img_data, np.uint8)
|
|
||||||
img = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
|
|
||||||
height, width = img.shape[:2]
|
|
||||||
if max(height, width) / min(height, width) <= 6.5:
|
|
||||||
result.append(img) # 过滤异常结果
|
|
||||||
return result
|
|
||||||
else:
|
|
||||||
return []
|
|
||||||
@@ -5,35 +5,36 @@ from time import sleep
|
|||||||
|
|
||||||
from sqlalchemy import update
|
from sqlalchemy import update
|
||||||
|
|
||||||
from auto_email.error_email import send_error_email
|
|
||||||
from db import MysqlSession
|
from db import MysqlSession
|
||||||
from db.mysql import ZxPhhd
|
from db.mysql import ZxPhhd
|
||||||
from log import LOGGING_CONFIG
|
from log import LOGGING_CONFIG
|
||||||
|
from my_email.error_email import send_error_email
|
||||||
from photo_mask import auto_photo_mask, SEND_ERROR_EMAIL
|
from photo_mask import auto_photo_mask, SEND_ERROR_EMAIL
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
program_name = "照片审核自动涂抹脚本"
|
program_name = '照片审核自动涂抹脚本'
|
||||||
logging.config.dictConfig(LOGGING_CONFIG)
|
logging.config.dictConfig(LOGGING_CONFIG)
|
||||||
|
|
||||||
|
logging.info('等待接口服务启动...')
|
||||||
|
sleep(60)
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--clean", default=False, type=bool, help="是否将涂抹中的案子改为待涂抹状态")
|
parser.add_argument('--clean', default=False, type=bool, help='是否将涂抹中的案子改为待涂抹状态')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.clean:
|
if args.clean:
|
||||||
# 主要用于启动时,清除仍在涂抹中的案子
|
# 主要用于启动时,清除仍在涂抹中的案子
|
||||||
session = MysqlSession()
|
session = MysqlSession()
|
||||||
update_flag = (update(ZxPhhd).where(ZxPhhd.paint_flag == "2").values(paint_flag="1"))
|
update_flag = (update(ZxPhhd).where(ZxPhhd.paint_flag == '2').values(paint_flag='1'))
|
||||||
session.execute(update_flag)
|
session.execute(update_flag)
|
||||||
session.commit()
|
session.commit()
|
||||||
session.close()
|
session.close()
|
||||||
logging.info("已释放残余的涂抹案子!")
|
logging.info('已释放残余的涂抹案子!')
|
||||||
else:
|
|
||||||
sleep(5)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
logging.info(f"【{program_name}】开始运行")
|
logging.info(f'【{program_name}】开始运行')
|
||||||
auto_photo_mask.main()
|
auto_photo_mask.main()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_logger = logging.getLogger("error")
|
error_logger = logging.getLogger('error')
|
||||||
error_logger.error(traceback.format_exc())
|
error_logger.error(traceback.format_exc())
|
||||||
if SEND_ERROR_EMAIL:
|
if SEND_ERROR_EMAIL:
|
||||||
send_error_email(program_name, repr(e), traceback.format_exc())
|
send_error_email(program_name, repr(e), traceback.format_exc())
|
||||||
|
|||||||
@@ -1,5 +1,3 @@
|
|||||||
from paddleocr import PaddleOCR
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
项目配置
|
项目配置
|
||||||
"""
|
"""
|
||||||
@@ -40,13 +38,3 @@ SIMILAR_CHAR = {
|
|||||||
"侯": ["候"],
|
"侯": ["候"],
|
||||||
"宇": ["字"],
|
"宇": ["字"],
|
||||||
}
|
}
|
||||||
|
|
||||||
# 如果不希望识别出空格,可以设置use_space_char=False。做此项设置一定要测试,2.7.3版本此项设置有bug,会导致识别失败
|
|
||||||
OCR = PaddleOCR(
|
|
||||||
gpu_id=0,
|
|
||||||
show_log=False,
|
|
||||||
det_db_thresh=0.1,
|
|
||||||
det_db_box_thresh=0.3,
|
|
||||||
det_limit_side_len=1248,
|
|
||||||
drop_score=0.3
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
import logging.config
|
import logging.config
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import tempfile
|
import shutil
|
||||||
import time
|
import time
|
||||||
|
import uuid
|
||||||
from time import sleep
|
from time import sleep
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
@@ -10,9 +12,10 @@ from sqlalchemy import update, and_
|
|||||||
from db import MysqlSession
|
from db import MysqlSession
|
||||||
from db.mysql import ZxPhrec, ZxPhhd
|
from db.mysql import ZxPhrec, ZxPhhd
|
||||||
from log import HOSTNAME
|
from log import HOSTNAME
|
||||||
from photo_mask import OCR, PHHD_BATCH_SIZE, SLEEP_MINUTES, NAME_KEYS, ID_CARD_NUM_KEYS, SIMILAR_CHAR
|
from photo_mask import PHHD_BATCH_SIZE, SLEEP_MINUTES, NAME_KEYS, ID_CARD_NUM_KEYS, SIMILAR_CHAR
|
||||||
|
from photo_review import set_batch_id
|
||||||
from ucloud import BUCKET, ufile
|
from ucloud import BUCKET, ufile
|
||||||
from util import image_util, util
|
from util import image_util, common_util, model_util
|
||||||
|
|
||||||
|
|
||||||
def find_boxes(content, layout, offset=0, length=None, improve=False, image_path=None, extra_content=None):
|
def find_boxes(content, layout, offset=0, length=None, improve=False, image_path=None, extra_content=None):
|
||||||
@@ -55,14 +58,15 @@ def find_boxes(content, layout, offset=0, length=None, improve=False, image_path
|
|||||||
if improve:
|
if improve:
|
||||||
# 再次识别,提高精度
|
# 再次识别,提高精度
|
||||||
image = cv2.imread(image_path)
|
image = cv2.imread(image_path)
|
||||||
|
img_name, img_ext = common_util.parse_save_path(image_path)
|
||||||
# 截图时偏大一点
|
# 截图时偏大一点
|
||||||
capture_box = util.zoom_rectangle(box, 0.2)
|
capture_box = common_util.zoom_rectangle(box, 0.2)
|
||||||
captured_image = image_util.capture(image, capture_box)
|
captured_image = image_util.capture(image, capture_box)
|
||||||
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_file:
|
captured_image_path = common_util.get_processed_img_path(f'{img_name}.capture.{img_ext}')
|
||||||
captured_image, offset_x, offset_y = image_util.expand_to_a4_size(captured_image)
|
cv2.imwrite(captured_image_path, captured_image)
|
||||||
cv2.imwrite(temp_file.name, captured_image)
|
captured_a4_img_path, offset_x, offset_y = image_util.expand_to_a4_size(captured_image_path)
|
||||||
try:
|
try:
|
||||||
layouts = util.get_ocr_layout(OCR, temp_file.name)
|
layouts = common_util.ocr_result_to_layout(model_util.ocr(captured_a4_img_path))
|
||||||
except TypeError:
|
except TypeError:
|
||||||
# 如果是类型错误,大概率是没识别到文字
|
# 如果是类型错误,大概率是没识别到文字
|
||||||
layouts = []
|
layouts = []
|
||||||
@@ -86,22 +90,17 @@ def find_boxes(content, layout, offset=0, length=None, improve=False, image_path
|
|||||||
temp_box[3] + capture_box[1] - offset_y,
|
temp_box[3] + capture_box[1] - offset_y,
|
||||||
])
|
])
|
||||||
break
|
break
|
||||||
util.delete_temp_file(temp_file.name)
|
|
||||||
|
|
||||||
if not boxes:
|
if not boxes:
|
||||||
boxes.append(box)
|
boxes.append(box)
|
||||||
return boxes
|
return boxes
|
||||||
|
|
||||||
|
|
||||||
def get_mask_layout(image, name, id_card_num):
|
def get_mask_layout(img_path, name, id_card_num):
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
|
||||||
cv2.imwrite(temp_file.name, image)
|
|
||||||
|
|
||||||
result = []
|
result = []
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
layouts = util.get_ocr_layout(OCR, temp_file.name)
|
layouts = common_util.ocr_result_to_layout(model_util.ocr(img_path))
|
||||||
# layouts = OCR.parse({"doc": temp_file.name})["layout"]
|
|
||||||
except TypeError:
|
except TypeError:
|
||||||
# 如果是类型错误,大概率是没识别到文字
|
# 如果是类型错误,大概率是没识别到文字
|
||||||
layouts = []
|
layouts = []
|
||||||
@@ -135,12 +134,12 @@ def get_mask_layout(image, name, id_card_num):
|
|||||||
find_id_card_num_by_key = True
|
find_id_card_num_by_key = True
|
||||||
matches = re.findall(r, layout[1])
|
matches = re.findall(r, layout[1])
|
||||||
for match in matches:
|
for match in matches:
|
||||||
result += find_boxes(match, layout, improve=True, image_path=temp_file.name, extra_content=r)
|
result += find_boxes(match, layout, improve=True, image_path=img_path, extra_content=r)
|
||||||
find_name_by_key = False
|
find_name_by_key = False
|
||||||
break
|
break
|
||||||
|
|
||||||
if id_card_num in layout[1]:
|
if id_card_num in layout[1]:
|
||||||
result += find_boxes(id_card_num, layout, improve=True, image_path=temp_file.name)
|
result += find_boxes(id_card_num, layout, improve=True, image_path=img_path)
|
||||||
find_id_card_num_by_key = False
|
find_id_card_num_by_key = False
|
||||||
|
|
||||||
def _find_boxes_by_keys(keys):
|
def _find_boxes_by_keys(keys):
|
||||||
@@ -163,8 +162,6 @@ def get_mask_layout(image, name, id_card_num):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("涂抹时出错!", exc_info=e)
|
logging.error("涂抹时出错!", exc_info=e)
|
||||||
return result
|
return result
|
||||||
finally:
|
|
||||||
util.delete_temp_file(temp_file.name)
|
|
||||||
|
|
||||||
|
|
||||||
def handle_image_for_mask(split_result):
|
def handle_image_for_mask(split_result):
|
||||||
@@ -174,11 +171,15 @@ def handle_image_for_mask(split_result):
|
|||||||
return expand_img, split_result["x_offset"], split_result["y_offset"]
|
return expand_img, split_result["x_offset"], split_result["y_offset"]
|
||||||
|
|
||||||
|
|
||||||
def mask_photo(img_url, name, id_card_num, color=(255, 255, 255)):
|
def mask_photo(img_path, name, id_card_num, color=(255, 255, 255)):
|
||||||
def _mask(i, n, icn, c):
|
def _mask(ip, n, icn, c):
|
||||||
|
i = cv2.imread(ip)
|
||||||
|
img_name, img_ext = common_util.parse_save_path(ip)
|
||||||
do_mask = False
|
do_mask = False
|
||||||
split_results = image_util.split(i)
|
split_results = image_util.split(ip)
|
||||||
for split_result in split_results:
|
for split_result in split_results:
|
||||||
|
if not split_result['img']:
|
||||||
|
continue
|
||||||
to_mask_img, x_offset, y_offset = handle_image_for_mask(split_result)
|
to_mask_img, x_offset, y_offset = handle_image_for_mask(split_result)
|
||||||
results = get_mask_layout(to_mask_img, n, icn)
|
results = get_mask_layout(to_mask_img, n, icn)
|
||||||
|
|
||||||
@@ -193,27 +194,27 @@ def mask_photo(img_url, name, id_card_num, color=(255, 255, 255)):
|
|||||||
result[3] + y_offset,
|
result[3] + y_offset,
|
||||||
)
|
)
|
||||||
cv2.rectangle(i, (int(result[0]), int(result[1])), (int(result[2]), int(result[3])), c, -1, 0)
|
cv2.rectangle(i, (int(result[0]), int(result[1])), (int(result[2]), int(result[3])), c, -1, 0)
|
||||||
return do_mask, i
|
masked_path = common_util.get_processed_img_path(f'{img_name}.mask.{img_ext}')
|
||||||
|
cv2.imwrite(masked_path, i)
|
||||||
|
return do_mask, masked_path
|
||||||
|
|
||||||
# 打开图片
|
original_image = img_path
|
||||||
image = image_util.read(img_url)
|
is_masked, img_path = _mask(img_path, name, id_card_num, color)
|
||||||
original_image = image
|
|
||||||
is_masked, image = _mask(image, name, id_card_num, color)
|
|
||||||
if not is_masked:
|
if not is_masked:
|
||||||
# 如果没有涂抹,可能是图片方向不对
|
# 如果没有涂抹,可能是图片方向不对
|
||||||
angles = image_util.parse_rotation_angles(image)
|
angles = model_util.clas_orientation(img_path)
|
||||||
angle = angles[0]
|
angle = angles[0]
|
||||||
if angle != "0":
|
if angle != "0":
|
||||||
image = image_util.rotate(image, int(angle))
|
img_path = image_util.rotate(img_path, int(angle))
|
||||||
is_masked, image = _mask(image, name, id_card_num, color)
|
is_masked, img_path = _mask(img_path, name, id_card_num, color)
|
||||||
if not is_masked:
|
if not is_masked:
|
||||||
# 如果旋转后也没有涂抹,恢复原来的方向
|
# 如果旋转后也没有涂抹,恢复原来的方向
|
||||||
image = original_image
|
img_path = original_image
|
||||||
else:
|
else:
|
||||||
# 如果旋转有效果,打一个日志
|
# 如果旋转有效果,打一个日志
|
||||||
logging.info(f"图片旋转了{angle}°")
|
logging.info(f"图片旋转了{angle}°")
|
||||||
|
|
||||||
return is_masked, image
|
return is_masked, img_path
|
||||||
|
|
||||||
|
|
||||||
def photo_mask(pk_phhd, name, id_card_num):
|
def photo_mask(pk_phhd, name, id_card_num):
|
||||||
@@ -223,32 +224,37 @@ def photo_mask(pk_phhd, name, id_card_num):
|
|||||||
ZxPhrec.cRectype.in_(["3", "4"])
|
ZxPhrec.cRectype.in_(["3", "4"])
|
||||||
)).all()
|
)).all()
|
||||||
session.close()
|
session.close()
|
||||||
|
# 同一批图的标识
|
||||||
|
set_batch_id(uuid.uuid4().hex)
|
||||||
|
processed_img_dir = common_util.get_processed_img_path('')
|
||||||
|
os.makedirs(processed_img_dir, exist_ok=True)
|
||||||
for phrec in phrecs:
|
for phrec in phrecs:
|
||||||
img_url = ufile.get_private_url(phrec.cfjaddress)
|
img_url = ufile.get_private_url(phrec.cfjaddress)
|
||||||
if not img_url:
|
if not img_url:
|
||||||
continue
|
continue
|
||||||
|
original_img_path = common_util.save_to_local(img_url)
|
||||||
is_masked, image = mask_photo(img_url, name, id_card_num)
|
img_path = common_util.get_processed_img_path(phrec.cfjaddress)
|
||||||
|
shutil.copy2(original_img_path, img_path)
|
||||||
|
is_masked, image = mask_photo(img_path, name, id_card_num)
|
||||||
|
|
||||||
# 如果涂抹了要备份以及更新
|
# 如果涂抹了要备份以及更新
|
||||||
if is_masked:
|
if is_masked:
|
||||||
ufile.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress)
|
|
||||||
|
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
|
||||||
cv2.imwrite(temp_file.name, image)
|
|
||||||
try:
|
try:
|
||||||
ufile.upload_file(phrec.cfjaddress, temp_file.name)
|
ufile.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress)
|
||||||
|
ufile.upload_file(phrec.cfjaddress, image)
|
||||||
session = MysqlSession()
|
session = MysqlSession()
|
||||||
update_flag = (update(ZxPhrec).where(ZxPhrec.pk_phrec == phrec.pk_phrec).values(
|
update_flag = (update(ZxPhrec).where(ZxPhrec.pk_phrec == phrec.pk_phrec).values(
|
||||||
paint_user=HOSTNAME,
|
paint_user=HOSTNAME,
|
||||||
paint_date=util.get_default_datetime()))
|
paint_date=common_util.get_default_datetime()))
|
||||||
session.execute(update_flag)
|
session.execute(update_flag)
|
||||||
session.commit()
|
session.commit()
|
||||||
session.close()
|
session.close()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("上传图片出错", exc_info=e)
|
logging.error("上传图片出错", exc_info=e)
|
||||||
finally:
|
|
||||||
util.delete_temp_file(temp_file.name)
|
# 删除多余图片
|
||||||
|
if os.path.exists(processed_img_dir) and os.path.isdir(processed_img_dir):
|
||||||
|
shutil.rmtree(processed_img_dir)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@@ -278,7 +284,7 @@ def main():
|
|||||||
update_flag = (update(ZxPhhd).where(ZxPhhd.pk_phhd == pk_phhd).values(
|
update_flag = (update(ZxPhhd).where(ZxPhhd.pk_phhd == pk_phhd).values(
|
||||||
paint_flag="8",
|
paint_flag="8",
|
||||||
paint_user=HOSTNAME,
|
paint_user=HOSTNAME,
|
||||||
paint_date=util.get_default_datetime(),
|
paint_date=common_util.get_default_datetime(),
|
||||||
fZcfwfy=time.time() - start_time))
|
fZcfwfy=time.time() - start_time))
|
||||||
session.execute(update_flag)
|
session.execute(update_flag)
|
||||||
session.commit()
|
session.commit()
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from db import MysqlSession
|
|||||||
from db.mysql import ZxIeOcrerror, ZxPhrec
|
from db.mysql import ZxIeOcrerror, ZxPhrec
|
||||||
from photo_mask.auto_photo_mask import mask_photo
|
from photo_mask.auto_photo_mask import mask_photo
|
||||||
from ucloud import ufile
|
from ucloud import ufile
|
||||||
from util import image_util, util
|
from util import image_util, common_util
|
||||||
|
|
||||||
|
|
||||||
def check_error(error_ocr):
|
def check_error(error_ocr):
|
||||||
@@ -91,7 +91,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
session = MysqlSession()
|
session = MysqlSession()
|
||||||
update_error = (update(ZxIeOcrerror).where(ZxIeOcrerror.pk_phrec == ocr_error.pk_phrec).values(
|
update_error = (update(ZxIeOcrerror).where(ZxIeOcrerror.pk_phrec == ocr_error.pk_phrec).values(
|
||||||
checktime=util.get_default_datetime(), cfjaddress2=error_descript))
|
checktime=common_util.get_default_datetime(), cfjaddress2=error_descript))
|
||||||
session.execute(update_error)
|
session.execute(update_error)
|
||||||
session.commit()
|
session.commit()
|
||||||
session.close()
|
session.close()
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from sqlalchemy import update, and_
|
|||||||
from db import MysqlSession
|
from db import MysqlSession
|
||||||
from db.mysql import ZxIeOcrerror
|
from db.mysql import ZxIeOcrerror
|
||||||
from photo_mask.photo_mask_error_check import auto_check_error
|
from photo_mask.photo_mask_error_check import auto_check_error
|
||||||
from util import util
|
from util import common_util
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
today = date.today()
|
today = date.today()
|
||||||
@@ -29,7 +29,7 @@ if __name__ == '__main__':
|
|||||||
if error_descript == "未知错误":
|
if error_descript == "未知错误":
|
||||||
check_time = None
|
check_time = None
|
||||||
else:
|
else:
|
||||||
check_time = util.get_default_datetime()
|
check_time = common_util.get_default_datetime()
|
||||||
|
|
||||||
session = MysqlSession()
|
session = MysqlSession()
|
||||||
update_error = (update(ZxIeOcrerror).where(ZxIeOcrerror.pk_phrec == ocr_error.pk_phrec).values(
|
update_error = (update(ZxIeOcrerror).where(ZxIeOcrerror.pk_phrec == ocr_error.pk_phrec).values(
|
||||||
@@ -41,5 +41,5 @@ if __name__ == '__main__':
|
|||||||
print(result)
|
print(result)
|
||||||
with open("photo_mask_error_report.txt", 'w', encoding='utf-8') as file:
|
with open("photo_mask_error_report.txt", 'w', encoding='utf-8') as file:
|
||||||
file.write(json.dumps(result, indent=4, ensure_ascii=False))
|
file.write(json.dumps(result, indent=4, ensure_ascii=False))
|
||||||
file.write(util.get_default_datetime())
|
file.write(common_util.get_default_datetime())
|
||||||
print("结果已保存。")
|
print("结果已保存。")
|
||||||
|
|||||||
@@ -5,36 +5,36 @@ from time import sleep
|
|||||||
|
|
||||||
from sqlalchemy import update
|
from sqlalchemy import update
|
||||||
|
|
||||||
from auto_email.error_email import send_error_email
|
|
||||||
from db import MysqlSession
|
from db import MysqlSession
|
||||||
from db.mysql import ZxPhhd
|
from db.mysql import ZxPhhd
|
||||||
from log import LOGGING_CONFIG
|
from log import LOGGING_CONFIG
|
||||||
|
from my_email.error_email import send_error_email
|
||||||
from photo_review import auto_photo_review, SEND_ERROR_EMAIL
|
from photo_review import auto_photo_review, SEND_ERROR_EMAIL
|
||||||
|
|
||||||
# 项目必须从此处启动,否则代码中的相对路径可能导致错误的发生
|
# 照片审核自动识别脚本入口
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
program_name = '照片审核自动识别脚本'
|
program_name = '照片审核自动识别脚本'
|
||||||
logging.config.dictConfig(LOGGING_CONFIG)
|
logging.config.dictConfig(LOGGING_CONFIG)
|
||||||
|
|
||||||
|
logging.info('等待接口服务启动...')
|
||||||
|
sleep(60)
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--clean", default=False, type=bool, help="是否将识别中的案子改为待识别状态")
|
parser.add_argument('--clean', default=False, type=bool, help='是否将识别中的案子改为待识别状态')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.clean:
|
if args.clean:
|
||||||
# 主要用于启动时,清除仍在涂抹中的案子
|
# 启动时清除仍在识别中的案子
|
||||||
session = MysqlSession()
|
session = MysqlSession()
|
||||||
update_flag = (update(ZxPhhd).where(ZxPhhd.exsuccess_flag == "2").values(exsuccess_flag="1"))
|
update_flag = (update(ZxPhhd).where(ZxPhhd.exsuccess_flag == '2').values(exsuccess_flag='1'))
|
||||||
session.execute(update_flag)
|
session.execute(update_flag)
|
||||||
session.commit()
|
session.commit()
|
||||||
session.close()
|
session.close()
|
||||||
logging.info("已释放残余的识别案子!")
|
logging.info('已释放残余的识别案子!')
|
||||||
else:
|
|
||||||
sleep(5)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
logging.info(f"【{program_name}】开始运行")
|
logging.info(f'【{program_name}】开始运行')
|
||||||
auto_photo_review.main()
|
auto_photo_review.main()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_logger = logging.getLogger('error')
|
logging.getLogger('error').error(traceback.format_exc())
|
||||||
error_logger.error(traceback.format_exc())
|
|
||||||
if SEND_ERROR_EMAIL:
|
if SEND_ERROR_EMAIL:
|
||||||
send_error_email(program_name, repr(e), traceback.format_exc())
|
send_error_email(program_name, repr(e), traceback.format_exc())
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
import jieba
|
import jieba
|
||||||
from paddlenlp import Taskflow
|
|
||||||
from paddleocr import PaddleOCR
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
项目配置
|
项目配置
|
||||||
@@ -11,52 +9,8 @@ PHHD_BATCH_SIZE = 10
|
|||||||
SLEEP_MINUTES = 5
|
SLEEP_MINUTES = 5
|
||||||
# 是否发送报错邮件
|
# 是否发送报错邮件
|
||||||
SEND_ERROR_EMAIL = True
|
SEND_ERROR_EMAIL = True
|
||||||
# 是否开启布局分析
|
# 处理批号(这里仅起声明作用)
|
||||||
LAYOUT_ANALYSIS = False
|
BATCH_ID = ''
|
||||||
|
|
||||||
"""
|
|
||||||
信息抽取关键词配置
|
|
||||||
"""
|
|
||||||
# 患者姓名
|
|
||||||
PATIENT_NAME = ['患者姓名']
|
|
||||||
# 入院日期
|
|
||||||
ADMISSION_DATE = ['入院日期']
|
|
||||||
# 出院日期
|
|
||||||
DISCHARGE_DATE = ['出院日期']
|
|
||||||
# 发生医疗费
|
|
||||||
MEDICAL_EXPENSES = ['费用总额']
|
|
||||||
# 个人现金支付
|
|
||||||
PERSONAL_CASH_PAYMENT = ['个人现金支付']
|
|
||||||
# 个人账户支付
|
|
||||||
PERSONAL_ACCOUNT_PAYMENT = ['个人账户支付']
|
|
||||||
# 个人自费金额
|
|
||||||
PERSONAL_FUNDED_AMOUNT = ['自费金额', '个人自费']
|
|
||||||
# 医保类别
|
|
||||||
MEDICAL_INSURANCE_TYPE = ['医保类型']
|
|
||||||
# 就诊医院
|
|
||||||
HOSPITAL = ['医院']
|
|
||||||
# 就诊科室
|
|
||||||
DEPARTMENT = ['科室']
|
|
||||||
# 主治医生
|
|
||||||
DOCTOR = ['主治医生']
|
|
||||||
# 住院号
|
|
||||||
ADMISSION_ID = ['住院号']
|
|
||||||
# 医保结算单号码
|
|
||||||
SETTLEMENT_ID = ['医保结算单号码']
|
|
||||||
# 年龄
|
|
||||||
AGE = ['年龄']
|
|
||||||
# 大写总额
|
|
||||||
UPPERCASE_MEDICAL_EXPENSES = ['大写总额']
|
|
||||||
|
|
||||||
SETTLEMENT_LIST_SCHEMA = \
|
|
||||||
(PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES + PERSONAL_CASH_PAYMENT
|
|
||||||
+ PERSONAL_ACCOUNT_PAYMENT + PERSONAL_FUNDED_AMOUNT + MEDICAL_INSURANCE_TYPE + ADMISSION_ID + SETTLEMENT_ID
|
|
||||||
+ UPPERCASE_MEDICAL_EXPENSES)
|
|
||||||
|
|
||||||
DISCHARGE_RECORD_SCHEMA = \
|
|
||||||
HOSPITAL + DEPARTMENT + PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + DOCTOR + ADMISSION_ID + AGE
|
|
||||||
|
|
||||||
COST_LIST_SCHEMA = PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPENSES
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
别名配置
|
别名配置
|
||||||
@@ -92,13 +46,32 @@ jieba.suggest_freq(('胆', '道'), True)
|
|||||||
jieba.suggest_freq(('脾', '胃'), True)
|
jieba.suggest_freq(('脾', '胃'), True)
|
||||||
|
|
||||||
'''
|
'''
|
||||||
模型配置
|
出院记录缺页判断关键词配置
|
||||||
'''
|
'''
|
||||||
SETTLEMENT_IE = Taskflow('information_extraction', schema=SETTLEMENT_LIST_SCHEMA, model='uie-x-base',
|
DISCHARGE_KEY = {
|
||||||
task_path='model/settlement_list_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
|
'入院诊断': ['入院诊断'],
|
||||||
DISCHARGE_IE = Taskflow('information_extraction', schema=DISCHARGE_RECORD_SCHEMA, model='uie-x-base',
|
'入院情况': ['入院情况', '入院时情况', '入院时主要症状'],
|
||||||
task_path='model/discharge_record_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
|
'入院日期': ['入院日期', '入院时间'],
|
||||||
COST_IE = Taskflow('information_extraction', schema=COST_LIST_SCHEMA, model='uie-x-base', device_id=1,
|
'诊疗经过': ['诊疗经过', '住院经过', '治疗经过'],
|
||||||
task_path='model/cost_list_model', layout_analysis=LAYOUT_ANALYSIS, precision='fp16')
|
'出院诊断': ['出院诊断'],
|
||||||
|
'出院情况': ['出院情况', '出院时情况'],
|
||||||
|
'出院日期': ['出院日期', '出院时间'],
|
||||||
|
'出院医嘱': ['出院医嘱', '出院医瞩']
|
||||||
|
}
|
||||||
|
|
||||||
OCR = PaddleOCR(use_angle_cls=False, show_log=False, gpu_id=1, det_db_box_thresh=0.3)
|
|
||||||
|
def get_batch_id():
|
||||||
|
"""
|
||||||
|
获取处理批号
|
||||||
|
:return: 处理批号
|
||||||
|
"""
|
||||||
|
return BATCH_ID
|
||||||
|
|
||||||
|
|
||||||
|
def set_batch_id(batch_id):
|
||||||
|
"""
|
||||||
|
修改处理批号哦
|
||||||
|
:param batch_id: 新批号
|
||||||
|
"""
|
||||||
|
global BATCH_ID
|
||||||
|
BATCH_ID = batch_id
|
||||||
|
|||||||
@@ -1,113 +1,76 @@
|
|||||||
|
import copy
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import re
|
||||||
|
import shutil
|
||||||
import time
|
import time
|
||||||
|
import uuid
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from time import sleep
|
from time import sleep
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
import fitz
|
|
||||||
import jieba
|
import jieba
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import requests
|
|
||||||
import zxingcpp
|
import zxingcpp
|
||||||
from rapidfuzz import process, fuzz
|
from rapidfuzz import process, fuzz
|
||||||
from sqlalchemy import update
|
from sqlalchemy import update
|
||||||
|
|
||||||
from db import MysqlSession
|
from db import MysqlSession
|
||||||
from db.mysql import BdYljg, BdYlks, ZxIeResult, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec
|
from db.mysql import BdYljg, BdYlks, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec, ZxIeReview, ZxIeResult
|
||||||
from log import HOSTNAME
|
from log import HOSTNAME
|
||||||
from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
|
from photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, \
|
||||||
PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \
|
DEPARTMENT_FILTER, DISCHARGE_KEY, set_batch_id, get_batch_id
|
||||||
ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \
|
from services.paddle_services import IE_KEY
|
||||||
UPPERCASE_MEDICAL_EXPENSES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, DEPARTMENT_FILTER
|
from ucloud import ufile, BUCKET
|
||||||
from ucloud import ufile
|
from util import image_util, common_util, html_util, model_util
|
||||||
from util import image_util, util, html_util
|
from util.data_util import handle_date, handle_decimal, parse_department, handle_name, handle_insurance_type, \
|
||||||
from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \
|
handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, parse_hospital, \
|
||||||
handle_insurance_type, handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, \
|
parse_page_num, handle_tiny_int
|
||||||
parse_hospital
|
|
||||||
|
|
||||||
|
|
||||||
# 合并信息抽取结果
|
def parse_qrcode(img_path, image_id):
|
||||||
def merge_result(result1, result2):
|
"""
|
||||||
for key in result2:
|
解析二维码,尝试从中获取高清图片
|
||||||
result1[key] = result1.get(key, []) + result2[key]
|
:param img_path: 待解析图片
|
||||||
return result1
|
:param image_id: 图片id
|
||||||
|
:return: 解析结果
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
def ie_temp_image(ie, ocr, image):
|
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
|
||||||
cv2.imwrite(temp_file.name, image)
|
|
||||||
|
|
||||||
ie_result = []
|
|
||||||
try:
|
|
||||||
layout = util.get_ocr_layout(ocr, temp_file.name)
|
|
||||||
if not layout:
|
|
||||||
# 无识别结果
|
|
||||||
ie_result = []
|
|
||||||
else:
|
|
||||||
ie_result = ie({"doc": temp_file.name, "layout": layout})[0]
|
|
||||||
except Exception as e:
|
|
||||||
logging.error("信息抽取时出错", exc_info=e)
|
|
||||||
finally:
|
|
||||||
try:
|
|
||||||
os.remove(temp_file.name)
|
|
||||||
except Exception as e:
|
|
||||||
logging.info(f"删除临时文件 {temp_file.name} 时出错", exc_info=e)
|
|
||||||
return ie_result
|
|
||||||
|
|
||||||
|
|
||||||
# 关键信息提取
|
|
||||||
def request_ie_result(task_enum, phrecs):
|
|
||||||
url = task_enum.request_url()
|
|
||||||
identity = int(time.time())
|
|
||||||
images = []
|
|
||||||
for phrec in phrecs:
|
|
||||||
images.append({"name": phrec.cfjaddress, "pk": phrec.pk_phrec})
|
|
||||||
payload = {"images": images, "schema": task_enum.schema(), "pk_phhd": phrecs[0].pk_phhd, "identity": identity}
|
|
||||||
response = requests.post(url, json=payload)
|
|
||||||
|
|
||||||
if response.status_code == 200:
|
|
||||||
return response.json()["data"]
|
|
||||||
else:
|
|
||||||
raise Exception(f"请求信息抽取结果失败,状态码:{response.status_code}")
|
|
||||||
|
|
||||||
|
|
||||||
# 尝试从二维码中获取高清图片
|
|
||||||
def get_better_image_from_qrcode(image, image_id, dpi=150):
|
|
||||||
def _parse_pdf_url(pdf_url_to_parse):
|
def _parse_pdf_url(pdf_url_to_parse):
|
||||||
pdf_file = None
|
|
||||||
local_pdf_path = None
|
local_pdf_path = None
|
||||||
|
img_name, img_ext = common_util.parse_save_path(img_path)
|
||||||
try:
|
try:
|
||||||
local_pdf_path = html_util.download_pdf(pdf_url_to_parse)
|
local_pdf_path = html_util.download_pdf(pdf_url_to_parse)
|
||||||
# 打开PDF文件
|
pdf_imgs = image_util.pdf_to_imgs(local_pdf_path)
|
||||||
pdf_file = fitz.open(local_pdf_path)
|
# 结算单部分
|
||||||
# 选择第一页
|
better_settlement_path = common_util.get_processed_img_path(f'{img_name}.better_settlement.jpg')
|
||||||
page = pdf_file[0]
|
cv2.imwrite(better_settlement_path, pdf_imgs[0][0])
|
||||||
# 定义缩放系数(DPI)
|
# 费用清单部分
|
||||||
default_dpi = 72
|
better_cost_path = common_util.get_processed_img_path(f'{img_name}.better_cost.jpg')
|
||||||
zoom = dpi / default_dpi
|
total_height = sum([p[0].shape[0] for p in pdf_imgs[1:]])
|
||||||
# 设置矩阵变换参数
|
common_width = pdf_imgs[1][0].shape[1]
|
||||||
mat = fitz.Matrix(zoom, zoom)
|
better_cost_img = np.zeros((total_height, common_width, 3), dtype=np.uint8)
|
||||||
# 渲染页面
|
current_y = 0
|
||||||
pix = page.get_pixmap(matrix=mat)
|
for pdf in pdf_imgs[1:]:
|
||||||
# 将渲染结果转换为OpenCV兼容的格式
|
height = pdf[0].shape[0]
|
||||||
img = np.frombuffer(pix.samples, dtype=np.uint8).reshape((pix.height, pix.width, -1))
|
better_cost_img[current_y:current_y + height, :, :] = pdf[0]
|
||||||
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
current_y += height
|
||||||
return img, page.get_text()
|
# cost_text += pdf[1] # 费用清单文本暂时没用到
|
||||||
|
cv2.imwrite(better_cost_path, better_cost_img)
|
||||||
|
|
||||||
|
return better_settlement_path, pdf_imgs[0][1], better_cost_path
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
logging.getLogger('error').error('解析pdf失败!', exc_info=ex)
|
logging.getLogger('error').error('解析pdf失败!', exc_info=ex)
|
||||||
return None, None
|
return None, None, None
|
||||||
finally:
|
finally:
|
||||||
if pdf_file:
|
|
||||||
pdf_file.close()
|
|
||||||
if local_pdf_path:
|
if local_pdf_path:
|
||||||
util.delete_temp_file(local_pdf_path)
|
common_util.delete_temp_file(local_pdf_path)
|
||||||
|
|
||||||
jsczt_base_url = 'http://einvoice.jsczt.cn'
|
jsczt_base_url = 'http://einvoice.jsczt.cn'
|
||||||
try:
|
try:
|
||||||
results = zxingcpp.read_barcodes(image)
|
img = cv2.imread(img_path)
|
||||||
|
results = zxingcpp.read_barcodes(img, text_mode=zxingcpp.TextMode.HRI)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.getLogger('error').info('二维码识别失败', exc_info=e)
|
logging.getLogger('error').info('二维码识别失败', exc_info=e)
|
||||||
results = []
|
results = []
|
||||||
@@ -132,138 +95,122 @@ def get_better_image_from_qrcode(image, image_id, dpi=150):
|
|||||||
if not pdf_url:
|
if not pdf_url:
|
||||||
continue
|
continue
|
||||||
return _parse_pdf_url(pdf_url)
|
return _parse_pdf_url(pdf_url)
|
||||||
elif url.startswith('http://weixin.qq.com'):
|
elif (url.startswith('http://weixin.qq.com')
|
||||||
|
or url == 'https://ybj.jszwfw.gov.cn/hsa-app-panel/index.html'):
|
||||||
# 无效地址
|
# 无效地址
|
||||||
continue
|
continue
|
||||||
|
elif url.startswith('http://dzpj.ntzyy.com'):
|
||||||
|
# 南通市中医院
|
||||||
|
return _parse_pdf_url(url)
|
||||||
|
# elif url.startswith('https://apph5.ztejsapp.cn/nj/view/elecInvoiceForOther/QRCode2Invoice'):
|
||||||
|
# pdf_url = html_util.get_dtsrmyy_pdf_url(url)
|
||||||
|
# if not pdf_url:
|
||||||
|
# continue
|
||||||
|
# return _parse_pdf_url(pdf_url)
|
||||||
else:
|
else:
|
||||||
logging.getLogger('qr').info(f'[{image_id}]中有未知二维码内容:{url}')
|
logging.getLogger('qr').info(f'[{image_id}]中有未知二维码内容:{url}')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.getLogger('error').error('从二维码中获取高清图片时出错', exc_info=e)
|
logging.getLogger('error').error('从二维码中获取高清图片时出错', exc_info=e)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
return None, None
|
return None, None, None
|
||||||
|
|
||||||
|
|
||||||
# 关键信息提取
|
# 关键信息提取
|
||||||
def information_extraction(ie, phrecs, identity):
|
def information_extraction(phrec, pk_phhd):
|
||||||
result = {}
|
"""
|
||||||
for phrec in phrecs:
|
处理单张图片
|
||||||
img_path = ufile.get_private_url(phrec.cfjaddress)
|
:param phrec:图片信息
|
||||||
if not img_path:
|
:param pk_phhd:案子主键
|
||||||
continue
|
:return:记录类型,信息抽取结果
|
||||||
|
"""
|
||||||
image = image_util.read(img_path)
|
img_path = common_util.get_processed_img_path(phrec.cfjaddress)
|
||||||
|
if not os.path.exists(img_path):
|
||||||
# 尝试从二维码中获取高清图片
|
original_img_path = common_util.get_img_path(phrec.cfjaddress)
|
||||||
better_image, text = get_better_image_from_qrcode(image, phrec.cfjaddress)
|
if not original_img_path:
|
||||||
if phrec.cRectype != '1':
|
img_url = ufile.get_private_url(phrec.cfjaddress)
|
||||||
better_image = None # 非结算单暂时不进行替换
|
if not img_url:
|
||||||
zx_ie_results = []
|
return None, None, None
|
||||||
if better_image is not None:
|
original_img_path = common_util.save_to_local(img_url)
|
||||||
img_angle = '0'
|
shutil.copy2(original_img_path, img_path)
|
||||||
image = better_image
|
if image_util.is_photo(img_path):
|
||||||
if text:
|
book_img_path = model_util.det_book(img_path) # 识别文档区域并裁剪
|
||||||
info_extract = ie(text)[0]
|
dewarped_img_path = model_util.dewarp(book_img_path) # 去扭曲
|
||||||
else:
|
else: # todo:也可能是图片,后续添加细分逻辑
|
||||||
info_extract = ie_temp_image(ie, OCR, image)
|
dewarped_img_path = img_path
|
||||||
ie_result = {'result': info_extract, 'angle': '0'}
|
angles = model_util.clas_orientation(dewarped_img_path)
|
||||||
|
ocr_text = ''
|
||||||
now = util.get_default_datetime()
|
info_extract = []
|
||||||
if not ie_result['result']:
|
rec_type = None
|
||||||
|
for angle in angles:
|
||||||
|
ocr_result = []
|
||||||
|
rotated_img = image_util.rotate(dewarped_img_path, int(angle))
|
||||||
|
split_results = image_util.split(rotated_img)
|
||||||
|
for split_result in split_results:
|
||||||
|
if split_result['img'] is None:
|
||||||
continue
|
continue
|
||||||
|
a4_img = image_util.expand_to_a4_size(split_result['img'])
|
||||||
|
tmp_ocr_result = model_util.ocr(a4_img)
|
||||||
|
if tmp_ocr_result:
|
||||||
|
ocr_result += tmp_ocr_result
|
||||||
|
tmp_ocr_text = common_util.ocr_result_to_text(ocr_result)
|
||||||
|
|
||||||
result_json = json.dumps(ie_result['result'], ensure_ascii=False)
|
# if any(key in tmp_ocr_text for key in ['出院记录', '出院小结', '死亡记录']):
|
||||||
if len(result_json) > 5000:
|
# tmp_rec_type = '出院记录'
|
||||||
result_json = result_json[:5000]
|
# elif any(key in tmp_ocr_text for key in ['费用汇总清单', '费用清单', '费用明细', '结账清单', '费用小项统计']):
|
||||||
zx_ie_results.append(ZxIeResult(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, id=identity,
|
# tmp_rec_type = '费用清单'
|
||||||
cfjaddress=phrec.cfjaddress, content=result_json,
|
# elif any(key in tmp_ocr_text for key in ['住院收费票据', '结算单', '财政部监制', '结算凭证']):
|
||||||
rotation_angle=int(ie_result['angle']),
|
# tmp_rec_type = '基本医保结算单'
|
||||||
x_offset=0, y_offset=0, create_time=now,
|
# else:
|
||||||
creator=HOSTNAME, update_time=now, updater=HOSTNAME))
|
# tmp_rec_type = model_util.clas_text(tmp_ocr_text) if tmp_ocr_text else None
|
||||||
|
# if not tmp_rec_type:
|
||||||
result = merge_result(result, ie_result['result'])
|
rec_dict = {
|
||||||
|
'1': '基本医保结算单',
|
||||||
|
'3': '出院记录',
|
||||||
|
'4': '费用清单',
|
||||||
|
}
|
||||||
|
tmp_rec_type = rec_dict.get(phrec.cRectype)
|
||||||
|
if tmp_rec_type == '基本医保结算单':
|
||||||
|
tmp_info_extract = model_util.ie_settlement(rotated_img, common_util.ocr_result_to_layout(ocr_result))
|
||||||
|
elif tmp_rec_type == '出院记录':
|
||||||
|
tmp_info_extract = model_util.ie_discharge(rotated_img, common_util.ocr_result_to_layout(ocr_result))
|
||||||
|
elif tmp_rec_type == '费用清单':
|
||||||
|
tmp_info_extract = model_util.ie_cost(rotated_img, common_util.ocr_result_to_layout(ocr_result))
|
||||||
else:
|
else:
|
||||||
target_images = []
|
tmp_info_extract = []
|
||||||
# target_images += detector.request_book_areas(image) # 识别文档区域并裁剪
|
|
||||||
if not target_images:
|
|
||||||
target_images.append(image) # 识别失败
|
|
||||||
angle_count = defaultdict(int, {'0': 0}) # 分割后图片的最优角度统计
|
|
||||||
for target_image in target_images:
|
|
||||||
# dewarped_image = dewarp.dewarp_image(target_image) # 去扭曲
|
|
||||||
dewarped_image = target_image
|
|
||||||
angles = image_util.parse_rotation_angles(dewarped_image)
|
|
||||||
|
|
||||||
split_results = image_util.split(dewarped_image)
|
if tmp_info_extract and len(tmp_info_extract) > len(info_extract):
|
||||||
for split_result in split_results:
|
info_extract = tmp_info_extract
|
||||||
if split_result['img'] is None or split_result['img'].size == 0:
|
ocr_text = tmp_ocr_text
|
||||||
continue
|
rec_type = tmp_rec_type
|
||||||
rotated_img = image_util.rotate(split_result['img'], int(angles[0]))
|
|
||||||
ie_results = [{'result': ie_temp_image(ie, OCR, rotated_img), 'angle': angles[0]}]
|
|
||||||
if not ie_results[0]['result'] or len(ie_results[0]['result']) < len(ie.kwargs.get('schema')):
|
|
||||||
rotated_img = image_util.rotate(split_result['img'], int(angles[1]))
|
|
||||||
ie_results.append({'result': ie_temp_image(ie, OCR, rotated_img), 'angle': angles[1]})
|
|
||||||
now = util.get_default_datetime()
|
|
||||||
best_angle = ['0', 0]
|
|
||||||
for ie_result in ie_results:
|
|
||||||
if not ie_result['result']:
|
|
||||||
continue
|
|
||||||
|
|
||||||
result_json = json.dumps(ie_result['result'], ensure_ascii=False)
|
if info_extract:
|
||||||
if len(result_json) > 5000:
|
result_json = json.dumps(info_extract, ensure_ascii=False)
|
||||||
result_json = result_json[:5000]
|
if len(result_json) > 5000:
|
||||||
zx_ie_results.append(ZxIeResult(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, id=identity,
|
result_json = result_json[:5000]
|
||||||
cfjaddress=phrec.cfjaddress, content=result_json,
|
|
||||||
rotation_angle=int(ie_result['angle']),
|
|
||||||
x_offset=split_result['x_offset'],
|
|
||||||
y_offset=split_result['y_offset'], create_time=now,
|
|
||||||
creator=HOSTNAME, update_time=now, updater=HOSTNAME))
|
|
||||||
|
|
||||||
result = merge_result(result, ie_result['result'])
|
|
||||||
|
|
||||||
if len(ie_result['result']) > best_angle[1]:
|
|
||||||
best_angle = [ie_result['angle'], len(ie_result['result'])]
|
|
||||||
|
|
||||||
angle_count[best_angle[0]] += 1
|
|
||||||
img_angle = max(angle_count, key=angle_count.get)
|
|
||||||
|
|
||||||
if img_angle != '0' or better_image is not None:
|
|
||||||
image = image_util.rotate(image, int(img_angle))
|
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as temp_file:
|
|
||||||
cv2.imwrite(temp_file.name, image)
|
|
||||||
try:
|
|
||||||
ufile.upload_file(phrec.cfjaddress, temp_file.name)
|
|
||||||
if img_angle != '0':
|
|
||||||
logging.info(f'旋转图片[{phrec.cfjaddress}]替换成功,已旋转{img_angle}度。')
|
|
||||||
# 修正旋转角度
|
|
||||||
for zx_ie_result in zx_ie_results:
|
|
||||||
zx_ie_result.rotation_angle -= int(img_angle)
|
|
||||||
else:
|
|
||||||
logging.info(f'高清图片[{phrec.cfjaddress}]替换成功!')
|
|
||||||
except Exception as e:
|
|
||||||
logging.error(f'上传图片({phrec.cfjaddress})失败', exc_info=e)
|
|
||||||
finally:
|
|
||||||
util.delete_temp_file(temp_file.name)
|
|
||||||
|
|
||||||
|
now = common_util.get_default_datetime()
|
||||||
session = MysqlSession()
|
session = MysqlSession()
|
||||||
session.add_all(zx_ie_results)
|
session.add(ZxIeResult(pk_phhd=pk_phhd, pk_phrec=phrec.pk_phrec, id=get_batch_id(),
|
||||||
|
cfjaddress=phrec.cfjaddress, content=result_json, create_time=now,
|
||||||
|
creator=HOSTNAME, update_time=now, updater=HOSTNAME))
|
||||||
session.commit()
|
session.commit()
|
||||||
session.close()
|
session.close()
|
||||||
|
return rec_type, info_extract, ocr_text
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
# 从keys中获取准确率最高的value
|
# 从keys中获取准确率最高的value
|
||||||
def get_best_value_in_keys(source, keys):
|
def get_best_value_of_key(source, key):
|
||||||
# 最终结果
|
# 最终结果
|
||||||
result = None
|
result = None
|
||||||
# 最大可能性
|
# 最大可能性
|
||||||
best_probability = 0
|
best_probability = 0
|
||||||
for key in keys:
|
values = source.get(key)
|
||||||
values = source.get(key)
|
if values:
|
||||||
if values:
|
for value in values:
|
||||||
for value in values:
|
for v in value:
|
||||||
text = value.get("text")
|
text = v.get("text")
|
||||||
probability = value.get("probability")
|
probability = v.get("probability")
|
||||||
if text and probability > best_probability:
|
if text and probability > best_probability:
|
||||||
result = text
|
result = text
|
||||||
best_probability = probability
|
best_probability = probability
|
||||||
@@ -271,11 +218,11 @@ def get_best_value_in_keys(source, keys):
|
|||||||
|
|
||||||
|
|
||||||
# 从keys中获取所有value组成list
|
# 从keys中获取所有value组成list
|
||||||
def get_values_of_keys(source, keys):
|
def get_values_of_key(source, key):
|
||||||
result = []
|
result = []
|
||||||
for key in keys:
|
values = source.get(key)
|
||||||
value = source.get(key)
|
if values:
|
||||||
if value:
|
for value in values:
|
||||||
for v in value:
|
for v in value:
|
||||||
v = v.get("text")
|
v = v.get("text")
|
||||||
if v:
|
if v:
|
||||||
@@ -289,7 +236,7 @@ def save_or_update_ie(table, pk_phhd, data):
|
|||||||
obj = table(**data)
|
obj = table(**data)
|
||||||
session = MysqlSession()
|
session = MysqlSession()
|
||||||
db_data = session.query(table).filter_by(pk_phhd=pk_phhd).one_or_none()
|
db_data = session.query(table).filter_by(pk_phhd=pk_phhd).one_or_none()
|
||||||
now = util.get_default_datetime()
|
now = common_util.get_default_datetime()
|
||||||
if db_data:
|
if db_data:
|
||||||
# 更新
|
# 更新
|
||||||
db_data.update_time = now
|
db_data.update_time = now
|
||||||
@@ -364,23 +311,24 @@ def search_department(department):
|
|||||||
return best_match
|
return best_match
|
||||||
|
|
||||||
|
|
||||||
def settlement_task(pk_phhd, settlement_list, identity):
|
def settlement_task(pk_phhd, settlement_list_ie_result):
|
||||||
settlement_list_ie_result = information_extraction(SETTLEMENT_IE, settlement_list, identity)
|
|
||||||
settlement_data = {
|
settlement_data = {
|
||||||
"pk_phhd": pk_phhd,
|
"pk_phhd": pk_phhd,
|
||||||
"name": handle_name(get_best_value_in_keys(settlement_list_ie_result, PATIENT_NAME)),
|
"name": handle_name(get_best_value_of_key(settlement_list_ie_result, IE_KEY['name'])),
|
||||||
"admission_date_str": handle_original_data(get_best_value_in_keys(settlement_list_ie_result, ADMISSION_DATE)),
|
"admission_date_str": handle_original_data(
|
||||||
"discharge_date_str": handle_original_data(get_best_value_in_keys(settlement_list_ie_result, DISCHARGE_DATE)),
|
get_best_value_of_key(settlement_list_ie_result, IE_KEY['admission_date'])),
|
||||||
|
"discharge_date_str": handle_original_data(
|
||||||
|
get_best_value_of_key(settlement_list_ie_result, IE_KEY['discharge_date'])),
|
||||||
"personal_cash_payment_str": handle_original_data(
|
"personal_cash_payment_str": handle_original_data(
|
||||||
get_best_value_in_keys(settlement_list_ie_result, PERSONAL_CASH_PAYMENT)),
|
get_best_value_of_key(settlement_list_ie_result, IE_KEY['personal_cash_payment'])),
|
||||||
"personal_account_payment_str": handle_original_data(
|
"personal_account_payment_str": handle_original_data(
|
||||||
get_best_value_in_keys(settlement_list_ie_result, PERSONAL_ACCOUNT_PAYMENT)),
|
get_best_value_of_key(settlement_list_ie_result, IE_KEY['personal_account_payment'])),
|
||||||
"personal_funded_amount_str": handle_original_data(
|
"personal_funded_amount_str": handle_original_data(
|
||||||
get_best_value_in_keys(settlement_list_ie_result, PERSONAL_FUNDED_AMOUNT)),
|
get_best_value_of_key(settlement_list_ie_result, IE_KEY['personal_funded_amount'])),
|
||||||
"medical_insurance_type_str": handle_original_data(
|
"medical_insurance_type_str": handle_original_data(
|
||||||
get_best_value_in_keys(settlement_list_ie_result, MEDICAL_INSURANCE_TYPE)),
|
get_best_value_of_key(settlement_list_ie_result, IE_KEY['medical_insurance_type'])),
|
||||||
"admission_id": handle_id(get_best_value_in_keys(settlement_list_ie_result, ADMISSION_ID)),
|
"admission_id": handle_id(get_best_value_of_key(settlement_list_ie_result, IE_KEY['admission_id'])),
|
||||||
"settlement_id": handle_id(get_best_value_in_keys(settlement_list_ie_result, SETTLEMENT_ID)),
|
"settlement_id": handle_id(get_best_value_of_key(settlement_list_ie_result, IE_KEY['settlement_id'])),
|
||||||
}
|
}
|
||||||
settlement_data["admission_date"] = handle_date(settlement_data["admission_date_str"])
|
settlement_data["admission_date"] = handle_date(settlement_data["admission_date_str"])
|
||||||
settlement_data["admission_date"] = handle_date(settlement_data["admission_date_str"])
|
settlement_data["admission_date"] = handle_date(settlement_data["admission_date_str"])
|
||||||
@@ -390,27 +338,30 @@ def settlement_task(pk_phhd, settlement_list, identity):
|
|||||||
settlement_data["personal_funded_amount"] = handle_decimal(settlement_data["personal_funded_amount_str"])
|
settlement_data["personal_funded_amount"] = handle_decimal(settlement_data["personal_funded_amount_str"])
|
||||||
settlement_data["medical_insurance_type"] = handle_insurance_type(settlement_data["medical_insurance_type_str"])
|
settlement_data["medical_insurance_type"] = handle_insurance_type(settlement_data["medical_insurance_type_str"])
|
||||||
|
|
||||||
parse_money_result = parse_money(get_best_value_in_keys(settlement_list_ie_result, UPPERCASE_MEDICAL_EXPENSES),
|
parse_money_result = parse_money(
|
||||||
get_best_value_in_keys(settlement_list_ie_result, MEDICAL_EXPENSES))
|
get_best_value_of_key(settlement_list_ie_result, IE_KEY['uppercase_medical_expenses']),
|
||||||
|
get_best_value_of_key(settlement_list_ie_result, IE_KEY['medical_expenses']))
|
||||||
settlement_data["medical_expenses_str"] = handle_original_data(parse_money_result[0])
|
settlement_data["medical_expenses_str"] = handle_original_data(parse_money_result[0])
|
||||||
settlement_data["medical_expenses"] = parse_money_result[1]
|
settlement_data["medical_expenses"] = parse_money_result[1]
|
||||||
save_or_update_ie(ZxIeSettlement, pk_phhd, settlement_data)
|
save_or_update_ie(ZxIeSettlement, pk_phhd, settlement_data)
|
||||||
|
return settlement_data
|
||||||
|
|
||||||
|
|
||||||
def discharge_task(pk_phhd, discharge_record, identity):
|
def discharge_task(pk_phhd, discharge_record_ie_result):
|
||||||
discharge_record_ie_result = information_extraction(DISCHARGE_IE, discharge_record, identity)
|
hospitals = get_values_of_key(discharge_record_ie_result, IE_KEY['hospital'])
|
||||||
hospitals = get_values_of_keys(discharge_record_ie_result, HOSPITAL)
|
departments = get_values_of_key(discharge_record_ie_result, IE_KEY['department'])
|
||||||
departments = get_values_of_keys(discharge_record_ie_result, DEPARTMENT)
|
|
||||||
discharge_data = {
|
discharge_data = {
|
||||||
"pk_phhd": pk_phhd,
|
"pk_phhd": pk_phhd,
|
||||||
"hospital": handle_hospital(",".join(hospitals)),
|
"hospital": handle_hospital(",".join(hospitals)),
|
||||||
"department": handle_department(",".join(departments)),
|
"department": handle_department(",".join(departments)),
|
||||||
"name": handle_name(get_best_value_in_keys(discharge_record_ie_result, PATIENT_NAME)),
|
"name": handle_name(get_best_value_of_key(discharge_record_ie_result, IE_KEY['name'])),
|
||||||
"admission_date_str": handle_original_data(get_best_value_in_keys(discharge_record_ie_result, ADMISSION_DATE)),
|
"admission_date_str": handle_original_data(
|
||||||
"discharge_date_str": handle_original_data(get_best_value_in_keys(discharge_record_ie_result, DISCHARGE_DATE)),
|
get_best_value_of_key(discharge_record_ie_result, IE_KEY['admission_date'])),
|
||||||
"doctor": handle_name(get_best_value_in_keys(discharge_record_ie_result, DOCTOR)),
|
"discharge_date_str": handle_original_data(
|
||||||
"admission_id": handle_id(get_best_value_in_keys(discharge_record_ie_result, ADMISSION_ID)),
|
get_best_value_of_key(discharge_record_ie_result, IE_KEY['discharge_date'])),
|
||||||
"age": handle_age(get_best_value_in_keys(discharge_record_ie_result, AGE)),
|
"doctor": handle_name(get_best_value_of_key(discharge_record_ie_result, IE_KEY['doctor'])),
|
||||||
|
"admission_id": handle_id(get_best_value_of_key(discharge_record_ie_result, IE_KEY['admission_id'])),
|
||||||
|
"age": handle_age(get_best_value_of_key(discharge_record_ie_result, IE_KEY['age'])),
|
||||||
}
|
}
|
||||||
discharge_data["admission_date"] = handle_date(discharge_data["admission_date_str"])
|
discharge_data["admission_date"] = handle_date(discharge_data["admission_date_str"])
|
||||||
discharge_data["discharge_date"] = handle_date(discharge_data["discharge_date_str"])
|
discharge_data["discharge_date"] = handle_date(discharge_data["discharge_date_str"])
|
||||||
@@ -467,52 +418,270 @@ def discharge_task(pk_phhd, discharge_record, identity):
|
|||||||
if best_match:
|
if best_match:
|
||||||
discharge_data["pk_ylks"] = best_match[2]
|
discharge_data["pk_ylks"] = best_match[2]
|
||||||
save_or_update_ie(ZxIeDischarge, pk_phhd, discharge_data)
|
save_or_update_ie(ZxIeDischarge, pk_phhd, discharge_data)
|
||||||
|
return discharge_data
|
||||||
|
|
||||||
|
|
||||||
def cost_task(pk_phhd, cost_list, identity):
|
def cost_task(pk_phhd, cost_list_ie_result):
|
||||||
cost_list_ie_result = information_extraction(COST_IE, cost_list, identity)
|
|
||||||
cost_data = {
|
cost_data = {
|
||||||
"pk_phhd": pk_phhd,
|
"pk_phhd": pk_phhd,
|
||||||
"name": handle_name(get_best_value_in_keys(cost_list_ie_result, PATIENT_NAME)),
|
"name": handle_name(get_best_value_of_key(cost_list_ie_result, IE_KEY['name'])),
|
||||||
"admission_date_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, ADMISSION_DATE)),
|
"admission_date_str": handle_original_data(
|
||||||
"discharge_date_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, DISCHARGE_DATE)),
|
get_best_value_of_key(cost_list_ie_result, IE_KEY['admission_date'])),
|
||||||
"medical_expenses_str": handle_original_data(get_best_value_in_keys(cost_list_ie_result, MEDICAL_EXPENSES))
|
"discharge_date_str": handle_original_data(
|
||||||
|
get_best_value_of_key(cost_list_ie_result, IE_KEY['discharge_date'])),
|
||||||
|
"medical_expenses_str": handle_original_data(
|
||||||
|
get_best_value_of_key(cost_list_ie_result, IE_KEY['medical_expenses']))
|
||||||
}
|
}
|
||||||
cost_data["admission_date"] = handle_date(cost_data["admission_date_str"])
|
cost_data["admission_date"] = handle_date(cost_data["admission_date_str"])
|
||||||
cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"])
|
cost_data["discharge_date"] = handle_date(cost_data["discharge_date_str"])
|
||||||
cost_data["medical_expenses"] = handle_decimal(cost_data["medical_expenses_str"])
|
cost_data["medical_expenses"] = handle_decimal(cost_data["medical_expenses_str"])
|
||||||
|
if cost_list_ie_result.get(IE_KEY['page']):
|
||||||
|
page_nums, page_count = parse_page_num(cost_list_ie_result[IE_KEY['page']])
|
||||||
|
if page_nums:
|
||||||
|
page_nums_str = [str(num) for num in page_nums]
|
||||||
|
cost_data['page_nums'] = handle_original_data(','.join(page_nums_str))
|
||||||
|
cost_data['page_count'] = handle_tiny_int(page_count)
|
||||||
save_or_update_ie(ZxIeCost, pk_phhd, cost_data)
|
save_or_update_ie(ZxIeCost, pk_phhd, cost_data)
|
||||||
|
return cost_data
|
||||||
|
|
||||||
|
|
||||||
def photo_review(pk_phhd):
|
def parse_pdf_text(settlement_text):
|
||||||
settlement_list = []
|
pattern = (r'(?:交款人:(.*?)\n|住院时间:(.*?)至(.*?)\n|\(小写\)(.*?)\n|个人现金支付:(.*?)\n|个人账户支付:(.*?)\n'
|
||||||
discharge_record = []
|
r'|个人自费:(.*?)\n|医保类型:(.*?)\n|住院科别:(.*?)\n|住院号:(.*?)\n|票据号码:(.*?)\n|)')
|
||||||
cost_list = []
|
# 查找所有匹配项
|
||||||
|
matches = re.findall(pattern, settlement_text)
|
||||||
|
results = {}
|
||||||
|
keys = ['患者姓名', '入院日期', '出院日期', '费用总额', '个人现金支付', '个人账户支付', '个人自费', '医保类型',
|
||||||
|
'科室', '住院号', '医保结算单号码']
|
||||||
|
|
||||||
|
for match in matches:
|
||||||
|
for key, value in zip(keys, match):
|
||||||
|
if value:
|
||||||
|
results[key] = [[{'text': value, 'probability': 1}]]
|
||||||
|
settlement_key = ['患者姓名', '入院日期', '出院日期', '费用总额', '个人现金支付', '个人账户支付', '个人自费',
|
||||||
|
'医保类型', '住院号', '医保结算单号码']
|
||||||
|
discharge_key = ['科室', '患者姓名', '入院日期', '出院日期', '住院号']
|
||||||
|
cost_key = ['患者姓名', '入院日期', '出院日期', '费用总额']
|
||||||
|
settlement_result = {key: copy.copy(results[key]) for key in settlement_key if key in results}
|
||||||
|
discharge_result = {key: copy.copy(results[key]) for key in discharge_key if key in results}
|
||||||
|
cost_result = {key: copy.copy(results[key]) for key in cost_key if key in results}
|
||||||
|
return settlement_result, discharge_result, cost_result
|
||||||
|
|
||||||
|
|
||||||
|
def photo_review(pk_phhd, name):
|
||||||
|
"""
|
||||||
|
处理单个报销案子
|
||||||
|
:param pk_phhd: 报销单主键
|
||||||
|
:param name: 报销人姓名
|
||||||
|
"""
|
||||||
|
settlement_result = defaultdict(list)
|
||||||
|
discharge_result = defaultdict(list)
|
||||||
|
cost_result = defaultdict(list)
|
||||||
|
|
||||||
session = MysqlSession()
|
session = MysqlSession()
|
||||||
phrecs = session.query(ZxPhrec.pk_phrec, ZxPhrec.pk_phhd, ZxPhrec.cRectype, ZxPhrec.cfjaddress).filter(
|
phrecs = session.query(ZxPhrec.pk_phrec, ZxPhrec.cRectype, ZxPhrec.cfjaddress).filter(
|
||||||
ZxPhrec.pk_phhd == pk_phhd
|
ZxPhrec.pk_phhd == pk_phhd
|
||||||
).all()
|
).order_by(ZxPhrec.cRectype, ZxPhrec.rowno).all()
|
||||||
session.close()
|
session.close()
|
||||||
for phrec in phrecs:
|
|
||||||
if phrec.cRectype == "1":
|
|
||||||
settlement_list.append(phrec)
|
|
||||||
elif phrec.cRectype == "3":
|
|
||||||
discharge_record.append(phrec)
|
|
||||||
elif phrec.cRectype == "4":
|
|
||||||
cost_list.append(phrec)
|
|
||||||
|
|
||||||
# 同一批图的标识
|
# 同一批图的标识
|
||||||
identity = int(time.time())
|
set_batch_id(uuid.uuid4().hex)
|
||||||
settlement_task(pk_phhd, settlement_list, identity)
|
processed_img_dir = common_util.get_processed_img_path('')
|
||||||
discharge_task(pk_phhd, discharge_record, identity)
|
os.makedirs(processed_img_dir, exist_ok=True)
|
||||||
cost_task(pk_phhd, cost_list, identity)
|
|
||||||
|
has_pdf = False # 是否获取到了pdf,获取到可以直接利用pdf更快的获取信息
|
||||||
|
better_settlement_path = None
|
||||||
|
better_cost_path = None
|
||||||
|
settlement_text = ''
|
||||||
|
qrcode_img_id = None
|
||||||
|
for phrec in phrecs:
|
||||||
|
original_img_path = common_util.get_img_path(phrec.cfjaddress)
|
||||||
|
if not original_img_path:
|
||||||
|
img_url = ufile.get_private_url(phrec.cfjaddress)
|
||||||
|
if not img_url:
|
||||||
|
continue
|
||||||
|
original_img_path = common_util.save_to_local(img_url)
|
||||||
|
img_path = common_util.get_processed_img_path(phrec.cfjaddress)
|
||||||
|
shutil.copy2(original_img_path, img_path)
|
||||||
|
# 尝试从二维码中获取高清图片
|
||||||
|
better_settlement_path, settlement_text, better_cost_path = parse_qrcode(img_path, phrec.cfjaddress)
|
||||||
|
if better_settlement_path:
|
||||||
|
has_pdf = True
|
||||||
|
qrcode_img_id = phrec.cfjaddress
|
||||||
|
break
|
||||||
|
|
||||||
|
discharge_text = ''
|
||||||
|
if has_pdf:
|
||||||
|
settlement_result, discharge_result, cost_result = parse_pdf_text(settlement_text)
|
||||||
|
discharge_ie_result = defaultdict(list)
|
||||||
|
|
||||||
|
is_cost_updated = False
|
||||||
|
for phrec in phrecs:
|
||||||
|
if phrec.cRectype == '1':
|
||||||
|
if phrec.cfjaddress == qrcode_img_id:
|
||||||
|
try:
|
||||||
|
ufile.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress)
|
||||||
|
ufile.upload_file(phrec.cfjaddress, better_settlement_path)
|
||||||
|
except Exception as e:
|
||||||
|
logging.error("更新结算单pdf图片出错", exc_info=e)
|
||||||
|
elif phrec.cRectype == '3':
|
||||||
|
rec_type, ie_result, ocr_text = information_extraction(phrec, pk_phhd)
|
||||||
|
if rec_type == '出院记录':
|
||||||
|
discharge_text += ocr_text
|
||||||
|
for key, value in ie_result.items():
|
||||||
|
discharge_ie_result[key].append(value)
|
||||||
|
# 暂不替换费用清单
|
||||||
|
# elif phrec.cRectype == '4':
|
||||||
|
# if not is_cost_updated:
|
||||||
|
# try:
|
||||||
|
# ufile.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress)
|
||||||
|
# ufile.upload_file(phrec.cfjaddress, better_cost_path)
|
||||||
|
# except Exception as e:
|
||||||
|
# logging.error("更新费用清单pdf图片出错", exc_info=e)
|
||||||
|
# finally:
|
||||||
|
# is_cost_updated = True
|
||||||
|
|
||||||
|
# 合并出院记录
|
||||||
|
for key, value in discharge_ie_result.items():
|
||||||
|
ie_value = get_best_value_of_key(discharge_ie_result, key)
|
||||||
|
pdf_value = discharge_result.get(key)[0][0]['text'] if discharge_result.get(key) else ''
|
||||||
|
similarity_ratio = fuzz.ratio(ie_value, pdf_value)
|
||||||
|
if similarity_ratio < 60:
|
||||||
|
discharge_result[key] = [[{'text': ie_value, 'probability': 1}]]
|
||||||
|
else:
|
||||||
|
for phrec in phrecs:
|
||||||
|
rec_type, ie_result, ocr_text = information_extraction(phrec, pk_phhd)
|
||||||
|
if rec_type == '基本医保结算单':
|
||||||
|
rec_result = settlement_result
|
||||||
|
elif rec_type == '出院记录':
|
||||||
|
rec_result = discharge_result
|
||||||
|
discharge_text += ocr_text
|
||||||
|
elif rec_type == '费用清单':
|
||||||
|
rec_result = cost_result
|
||||||
|
else:
|
||||||
|
rec_result = None
|
||||||
|
if rec_result is not None:
|
||||||
|
for key, value in ie_result.items():
|
||||||
|
rec_result[key].append(value)
|
||||||
|
|
||||||
|
# 删除多余图片
|
||||||
|
if os.path.exists(processed_img_dir) and os.path.isdir(processed_img_dir):
|
||||||
|
shutil.rmtree(processed_img_dir)
|
||||||
|
|
||||||
|
settlement_data = settlement_task(pk_phhd, settlement_result)
|
||||||
|
discharge_data = discharge_task(pk_phhd, discharge_result)
|
||||||
|
cost_data = cost_task(pk_phhd, cost_result)
|
||||||
|
|
||||||
|
# 三项资料完整性判断
|
||||||
|
# 三项资料缺项判断
|
||||||
|
review_result = {
|
||||||
|
'pk_phhd': pk_phhd,
|
||||||
|
'has_settlement': bool(settlement_result),
|
||||||
|
'has_discharge': bool(discharge_result),
|
||||||
|
'has_cost': bool(cost_result),
|
||||||
|
}
|
||||||
|
if (review_result['has_settlement'] and settlement_data.get('personal_account_payment')
|
||||||
|
and settlement_data.get('personal_cash_payment') and settlement_data.get('medical_expenses')):
|
||||||
|
review_result['has_settlement'] &= (
|
||||||
|
float(settlement_data['personal_account_payment']) + float(settlement_data['personal_cash_payment'])
|
||||||
|
< float(settlement_data['medical_expenses'])
|
||||||
|
)
|
||||||
|
if has_pdf:
|
||||||
|
review_result['has_discharge'] &= bool(discharge_text)
|
||||||
|
|
||||||
|
# 三项资料缺页判断
|
||||||
|
page_description = []
|
||||||
|
if review_result['has_discharge']:
|
||||||
|
for discharge_item in DISCHARGE_KEY:
|
||||||
|
if not any(key in discharge_text for key in DISCHARGE_KEY[discharge_item]):
|
||||||
|
page_description.append(f"《出院记录》缺页")
|
||||||
|
break
|
||||||
|
|
||||||
|
if review_result['has_cost']:
|
||||||
|
cost_missing_page = {}
|
||||||
|
if cost_data.get('page_nums') and cost_data.get('page_count'):
|
||||||
|
page_nums = cost_data['page_nums'].split(',')
|
||||||
|
required_set = set(range(1, cost_data['page_count'] + 1))
|
||||||
|
page_set = set([int(num) for num in page_nums])
|
||||||
|
cost_missing_page = required_set - page_set
|
||||||
|
if cost_missing_page:
|
||||||
|
cost_missing_page = sorted(cost_missing_page)
|
||||||
|
cost_missing_page = [str(num) for num in cost_missing_page]
|
||||||
|
page_description.append(f"《住院费用清单》,缺第{','.join(cost_missing_page)}页")
|
||||||
|
|
||||||
|
if page_description:
|
||||||
|
review_result['full_page'] = False
|
||||||
|
review_result['page_description'] = ';'.join(page_description)
|
||||||
|
else:
|
||||||
|
review_result['full_page'] = True
|
||||||
|
|
||||||
|
review_result['integrity'] = (review_result['has_settlement'] and review_result['has_discharge']
|
||||||
|
and review_result['has_cost'] and review_result['full_page'])
|
||||||
|
|
||||||
|
# 三项资料一致性判断
|
||||||
|
# 姓名一致性
|
||||||
|
name_list = [settlement_data['name'], discharge_data['name'], cost_data['name']]
|
||||||
|
if sum(not bool(n) for n in name_list) > 1: # 有2个及以上空值直接认为都不一致
|
||||||
|
review_result['name_match'] = '0'
|
||||||
|
else:
|
||||||
|
unique_name = set(name_list)
|
||||||
|
if len(unique_name) == 1:
|
||||||
|
review_result['name_match'] = '1' if name == unique_name.pop() else '5'
|
||||||
|
elif len(unique_name) == 2:
|
||||||
|
if settlement_data['name'] != discharge_data['name'] and settlement_data['name'] != cost_data['name']:
|
||||||
|
review_result['name_match'] = '2'
|
||||||
|
elif discharge_data['name'] != settlement_data['name'] and discharge_data['name'] != cost_data['name']:
|
||||||
|
review_result['name_match'] = '3'
|
||||||
|
else:
|
||||||
|
review_result['name_match'] = '4'
|
||||||
|
else:
|
||||||
|
review_result['name_match'] = '0'
|
||||||
|
|
||||||
|
# 住院日期一致性
|
||||||
|
if (settlement_data['admission_date'] and discharge_data['admission_date']
|
||||||
|
and settlement_data['discharge_date'] and discharge_data['discharge_date']
|
||||||
|
and settlement_data['admission_date'] == discharge_data['admission_date']
|
||||||
|
and settlement_data['discharge_date'] == discharge_data['discharge_date']):
|
||||||
|
review_result['admission_date_match'] = '1'
|
||||||
|
else:
|
||||||
|
review_result['admission_date_match'] = '0'
|
||||||
|
|
||||||
|
# 出院日期一致性
|
||||||
|
discharge_date_list = [settlement_data['discharge_date'], discharge_data['discharge_date'],
|
||||||
|
cost_data['discharge_date']]
|
||||||
|
if sum(not bool(d) for d in discharge_date_list) > 1:
|
||||||
|
review_result['discharge_date_match'] = '0'
|
||||||
|
else:
|
||||||
|
unique_discharge_date = set(discharge_date_list)
|
||||||
|
if len(unique_discharge_date) == 1:
|
||||||
|
review_result['discharge_date_match'] = '1'
|
||||||
|
elif len(unique_discharge_date) == 2:
|
||||||
|
if (settlement_data['discharge_date'] != discharge_data['discharge_date']
|
||||||
|
and settlement_data['discharge_date'] != cost_data['discharge_date']):
|
||||||
|
review_result['discharge_date_match'] = '2'
|
||||||
|
elif (discharge_data['discharge_date'] != settlement_data['discharge_date']
|
||||||
|
and discharge_data['discharge_date'] != cost_data['discharge_date']):
|
||||||
|
review_result['discharge_date_match'] = '3'
|
||||||
|
else:
|
||||||
|
review_result['discharge_date_match'] = '4'
|
||||||
|
else:
|
||||||
|
review_result['discharge_date_match'] = '0'
|
||||||
|
|
||||||
|
review_result['consistency'] = (
|
||||||
|
review_result['name_match'] == '1' and review_result['admission_date_match'] == '1'
|
||||||
|
and review_result['discharge_date_match'] == '1')
|
||||||
|
|
||||||
|
review_result['success'] = review_result['integrity'] and review_result['consistency']
|
||||||
|
save_or_update_ie(ZxIeReview, pk_phhd, review_result)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
"""
|
||||||
|
照片审核批量控制
|
||||||
|
"""
|
||||||
while 1:
|
while 1:
|
||||||
session = MysqlSession()
|
session = MysqlSession()
|
||||||
phhds = (session.query(ZxPhhd.pk_phhd)
|
phhds = (session.query(ZxPhhd.pk_phhd, ZxPhhd.cXm)
|
||||||
.join(ZxPhrec, ZxPhhd.pk_phhd == ZxPhrec.pk_phhd, isouter=True)
|
.join(ZxPhrec, ZxPhhd.pk_phhd == ZxPhrec.pk_phhd, isouter=True)
|
||||||
.filter(ZxPhhd.exsuccess_flag == "1")
|
.filter(ZxPhhd.exsuccess_flag == "1")
|
||||||
.filter(ZxPhrec.pk_phrec.isnot(None))
|
.filter(ZxPhrec.pk_phrec.isnot(None))
|
||||||
@@ -529,14 +698,14 @@ def main():
|
|||||||
pk_phhd = phhd.pk_phhd
|
pk_phhd = phhd.pk_phhd
|
||||||
logging.info(f"开始识别:{pk_phhd}")
|
logging.info(f"开始识别:{pk_phhd}")
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
photo_review(pk_phhd)
|
photo_review(pk_phhd, phhd.cXm)
|
||||||
|
|
||||||
# 识别完成更新标识
|
# 识别完成更新标识
|
||||||
session = MysqlSession()
|
session = MysqlSession()
|
||||||
update_flag = (update(ZxPhhd).where(ZxPhhd.pk_phhd == pk_phhd).values(
|
update_flag = (update(ZxPhhd).where(ZxPhhd.pk_phhd == pk_phhd).values(
|
||||||
exsuccess_flag="8",
|
exsuccess_flag="8",
|
||||||
ref_id1=HOSTNAME,
|
ref_id1=HOSTNAME,
|
||||||
checktime=util.get_default_datetime(),
|
checktime=common_util.get_default_datetime(),
|
||||||
fFSYLFY=time.time() - start_time))
|
fFSYLFY=time.time() - start_time))
|
||||||
session.execute(update_flag)
|
session.execute(update_flag)
|
||||||
session.commit()
|
session.commit()
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ from sqlalchemy.sql.functions import count
|
|||||||
|
|
||||||
from db import MysqlSession
|
from db import MysqlSession
|
||||||
from db.mysql import ZxPhhd, ViewErrorReview
|
from db.mysql import ZxPhhd, ViewErrorReview
|
||||||
from util import util
|
from util import common_util
|
||||||
|
|
||||||
|
|
||||||
def handle_reason(reason):
|
def handle_reason(reason):
|
||||||
@@ -95,5 +95,5 @@ if __name__ == '__main__':
|
|||||||
print(result)
|
print(result)
|
||||||
with open("photo_review_error_report.txt", 'w', encoding='utf-8') as file:
|
with open("photo_review_error_report.txt", 'w', encoding='utf-8') as file:
|
||||||
file.write(json.dumps(result, indent=4, ensure_ascii=False))
|
file.write(json.dumps(result, indent=4, ensure_ascii=False))
|
||||||
file.write(util.get_default_datetime())
|
file.write(common_util.get_default_datetime())
|
||||||
print("结果已保存。")
|
print("结果已保存。")
|
||||||
|
|||||||
@@ -1,16 +1,16 @@
|
|||||||
|
beautifulsoup4==4.12.3 # 网页分析
|
||||||
|
jieba==0.42.1 # 中文分词
|
||||||
numpy==1.26.4
|
numpy==1.26.4
|
||||||
onnxconverter-common==1.14.0
|
OpenCC==1.1.9 # 中文繁简转换
|
||||||
OpenCC==1.1.6
|
|
||||||
opencv-python==4.6.0.66
|
opencv-python==4.6.0.66
|
||||||
paddle2onnx==1.2.3
|
opencv-python-headless==4.10.0.84
|
||||||
paddleclas==2.5.2
|
|
||||||
paddlenlp==2.6.1
|
|
||||||
paddleocr==2.7.3
|
|
||||||
pillow==10.4.0
|
pillow==10.4.0
|
||||||
|
PyMuPDF==1.24.9 # pdf处理
|
||||||
pymysql==1.1.1
|
pymysql==1.1.1
|
||||||
|
rapidfuzz==3.9.4 #文本相似度
|
||||||
requests==2.32.3
|
requests==2.32.3
|
||||||
sqlacodegen==2.3.0.post1
|
sqlacodegen==2.3.0.post1 # 实体类生成
|
||||||
sqlalchemy==1.4.52
|
sqlalchemy==1.4.52 # ORM框架
|
||||||
tenacity==8.5.0
|
tenacity==8.5.0 # 重试
|
||||||
ufile==3.2.9
|
ufile==3.2.9 # 云空间
|
||||||
zxing-cpp==2.2.0
|
zxing-cpp==2.2.0 # 二维码识别
|
||||||
245
services/paddle_services/.dockerignore
Normal file
245
services/paddle_services/.dockerignore
Normal file
@@ -0,0 +1,245 @@
|
|||||||
|
### PyCharm template
|
||||||
|
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
|
||||||
|
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
||||||
|
|
||||||
|
# User-specific stuff
|
||||||
|
.idea/**/workspace.xml
|
||||||
|
.idea/**/tasks.xml
|
||||||
|
.idea/**/usage.statistics.xml
|
||||||
|
.idea/**/dictionaries
|
||||||
|
.idea/**/shelf
|
||||||
|
|
||||||
|
# AWS User-specific
|
||||||
|
.idea/**/aws.xml
|
||||||
|
|
||||||
|
# Generated files
|
||||||
|
.idea/**/contentModel.xml
|
||||||
|
|
||||||
|
# Sensitive or high-churn files
|
||||||
|
.idea/**/dataSources/
|
||||||
|
.idea/**/dataSources.ids
|
||||||
|
.idea/**/dataSources.local.xml
|
||||||
|
.idea/**/sqlDataSources.xml
|
||||||
|
.idea/**/dynamic.xml
|
||||||
|
.idea/**/uiDesigner.xml
|
||||||
|
.idea/**/dbnavigator.xml
|
||||||
|
|
||||||
|
# Gradle
|
||||||
|
.idea/**/gradle.xml
|
||||||
|
.idea/**/libraries
|
||||||
|
|
||||||
|
# Gradle and Maven with auto-import
|
||||||
|
# When using Gradle or Maven with auto-import, you should exclude module files,
|
||||||
|
# since they will be recreated, and may cause churn. Uncomment if using
|
||||||
|
# auto-import.
|
||||||
|
# .idea/artifacts
|
||||||
|
# .idea/compiler.xml
|
||||||
|
# .idea/jarRepositories.xml
|
||||||
|
# .idea/modules.xml
|
||||||
|
# .idea/*.iml
|
||||||
|
# .idea/modules
|
||||||
|
# *.iml
|
||||||
|
# *.ipr
|
||||||
|
|
||||||
|
# CMake
|
||||||
|
cmake-build-*/
|
||||||
|
|
||||||
|
# Mongo Explorer plugin
|
||||||
|
.idea/**/mongoSettings.xml
|
||||||
|
|
||||||
|
# File-based project format
|
||||||
|
*.iws
|
||||||
|
|
||||||
|
# IntelliJ
|
||||||
|
out/
|
||||||
|
|
||||||
|
# mpeltonen/sbt-idea plugin
|
||||||
|
.idea_modules/
|
||||||
|
|
||||||
|
# JIRA plugin
|
||||||
|
atlassian-ide-plugin.xml
|
||||||
|
|
||||||
|
# Cursive Clojure plugin
|
||||||
|
.idea/replstate.xml
|
||||||
|
|
||||||
|
# SonarLint plugin
|
||||||
|
.idea/sonarlint/
|
||||||
|
|
||||||
|
# Crashlytics plugin (for Android Studio and IntelliJ)
|
||||||
|
com_crashlytics_export_strings.xml
|
||||||
|
crashlytics.properties
|
||||||
|
crashlytics-build.properties
|
||||||
|
fabric.properties
|
||||||
|
|
||||||
|
# Editor-based Rest Client
|
||||||
|
.idea/httpRequests
|
||||||
|
|
||||||
|
# Android studio 3.1+ serialized cache file
|
||||||
|
.idea/caches/build_file_checksums.ser
|
||||||
|
|
||||||
|
### Python template
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
#Pipfile.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
#poetry.lock
|
||||||
|
|
||||||
|
# pdm
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||||
|
#pdm.lock
|
||||||
|
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||||
|
# in version control.
|
||||||
|
# https://pdm.fming.dev/#use-with-ide
|
||||||
|
.pdm.toml
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
#.idea/
|
||||||
|
|
||||||
|
# 通过卷绑定挂载到容器中
|
||||||
|
/log
|
||||||
|
/model
|
||||||
|
# docker
|
||||||
|
Dockerfile
|
||||||
28
services/paddle_services/Dockerfile
Normal file
28
services/paddle_services/Dockerfile
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
# 使用官方的paddle镜像作为基础
|
||||||
|
FROM registry.baidubce.com/paddlepaddle/paddle:2.6.1-gpu-cuda12.0-cudnn8.9-trt8.6
|
||||||
|
|
||||||
|
# 设置工作目录
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# 设置环境变量
|
||||||
|
ENV PYTHONUNBUFFERED=1 \
|
||||||
|
# 设置时区
|
||||||
|
TZ=Asia/Shanghai \
|
||||||
|
# 设置pip镜像地址,加快安装速度
|
||||||
|
PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
|
||||||
|
|
||||||
|
# 安装依赖
|
||||||
|
COPY requirements.txt /app/requirements.txt
|
||||||
|
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo '$TZ' > /etc/timezone \
|
||||||
|
&& pip install --no-cache-dir -r requirements.txt \
|
||||||
|
&& pip uninstall -y onnxruntime onnxruntime-gpu \
|
||||||
|
&& pip install onnxruntime-gpu==1.18.0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
|
||||||
|
|
||||||
|
# 将当前目录内容复制到容器的/app内
|
||||||
|
COPY . /app
|
||||||
|
|
||||||
|
# 暴露端口
|
||||||
|
# EXPOSE 8081
|
||||||
|
|
||||||
|
# 运行api接口,具体接口在命令行或docker-compose.yml文件中定义
|
||||||
|
ENTRYPOINT ["gunicorn"]
|
||||||
21
services/paddle_services/__init__.py
Normal file
21
services/paddle_services/__init__.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
"""
|
||||||
|
信息抽取关键词配置
|
||||||
|
"""
|
||||||
|
IE_KEY = {
|
||||||
|
'name': '患者姓名',
|
||||||
|
'admission_date': '入院日期',
|
||||||
|
'discharge_date': '出院日期',
|
||||||
|
'medical_expenses': '费用总额',
|
||||||
|
'personal_cash_payment': '个人现金支付',
|
||||||
|
'personal_account_payment': '个人账户支付',
|
||||||
|
'personal_funded_amount': '自费金额',
|
||||||
|
'medical_insurance_type': '医保类型',
|
||||||
|
'hospital': '医院',
|
||||||
|
'department': '科室',
|
||||||
|
'doctor': '主治医生',
|
||||||
|
'admission_id': '住院号',
|
||||||
|
'settlement_id': '医保结算单号码',
|
||||||
|
'age': '年龄',
|
||||||
|
'uppercase_medical_expenses': '大写总额',
|
||||||
|
'page': '页码',
|
||||||
|
}
|
||||||
30
services/paddle_services/clas_orientation.py
Normal file
30
services/paddle_services/clas_orientation.py
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
import logging.config
|
||||||
|
|
||||||
|
from flask import Flask, request
|
||||||
|
from paddleclas import PaddleClas
|
||||||
|
|
||||||
|
from log import LOGGING_CONFIG
|
||||||
|
from utils import process_request
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
CLAS = PaddleClas(model_name='text_image_orientation')
|
||||||
|
|
||||||
|
|
||||||
|
@app.route(rule='/', methods=['POST'])
|
||||||
|
@process_request
|
||||||
|
def main():
|
||||||
|
"""
|
||||||
|
判断图片旋转角度,逆时针旋转该角度后为正。可能值['0', '90', '180', '270']
|
||||||
|
:return: 最有可能的两个角度
|
||||||
|
"""
|
||||||
|
img_path = request.form.get('img_path')
|
||||||
|
clas_result = CLAS.predict(input_data=img_path)
|
||||||
|
clas_result = next(clas_result)[0]
|
||||||
|
if clas_result['scores'][0] < 0.5:
|
||||||
|
return ['0', '90']
|
||||||
|
return clas_result['label_names']
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
logging.config.dictConfig(LOGGING_CONFIG)
|
||||||
|
app.run('0.0.0.0', 5005)
|
||||||
31
services/paddle_services/clas_text.py
Normal file
31
services/paddle_services/clas_text.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
import logging.config
|
||||||
|
|
||||||
|
from flask import Flask, request
|
||||||
|
from paddlenlp import Taskflow
|
||||||
|
|
||||||
|
from log import LOGGING_CONFIG
|
||||||
|
from utils import process_request
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
schema = ['基本医保结算单', '出院记录', '费用清单']
|
||||||
|
CLAS = Taskflow('zero_shot_text_classification', model='utc-xbase', schema=schema,
|
||||||
|
task_path='model/text_classification', precision='fp32')
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/', methods=['POST'])
|
||||||
|
@process_request
|
||||||
|
def main():
|
||||||
|
text = request.form.get('text')
|
||||||
|
cls_result = CLAS(text)
|
||||||
|
cls_result = cls_result[0].get('predictions')
|
||||||
|
if cls_result:
|
||||||
|
cls_result = cls_result[0]
|
||||||
|
if cls_result['score'] and float(cls_result['score']) < 0.8:
|
||||||
|
logging.info(f"识别结果置信度{cls_result['score']}过低!text: {text}")
|
||||||
|
return None
|
||||||
|
return cls_result['label']
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
logging.config.dictConfig(LOGGING_CONFIG)
|
||||||
|
app.run('0.0.0.0', 5008)
|
||||||
31
services/paddle_services/det_book.py
Normal file
31
services/paddle_services/det_book.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
import logging.config
|
||||||
|
import os.path
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
from flask import Flask, request
|
||||||
|
|
||||||
|
from log import LOGGING_CONFIG
|
||||||
|
from paddle_detection import detector
|
||||||
|
from utils import process_request, parse_img_path
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/', methods=['POST'])
|
||||||
|
@process_request
|
||||||
|
def main():
|
||||||
|
img_path = request.form.get('img_path')
|
||||||
|
result = detector.get_book_areas(img_path)
|
||||||
|
|
||||||
|
dirname, img_name, img_ext = parse_img_path(img_path)
|
||||||
|
books_path = []
|
||||||
|
for i in range(len(result)):
|
||||||
|
save_path = os.path.join(dirname, f'{img_name}.book_{i}.{img_ext}')
|
||||||
|
cv2.imwrite(save_path, result[i])
|
||||||
|
books_path.append(save_path)
|
||||||
|
return books_path
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
logging.config.dictConfig(LOGGING_CONFIG)
|
||||||
|
app.run('0.0.0.0', 5006)
|
||||||
28
services/paddle_services/dewarp.py
Normal file
28
services/paddle_services/dewarp.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
import logging.config
|
||||||
|
import os
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
from flask import Flask, request
|
||||||
|
|
||||||
|
from doc_dewarp import dewarper
|
||||||
|
from log import LOGGING_CONFIG
|
||||||
|
from utils import process_request, parse_img_path
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/', methods=['POST'])
|
||||||
|
@process_request
|
||||||
|
def main():
|
||||||
|
img_path = request.form.get('img_path')
|
||||||
|
img = cv2.imread(img_path)
|
||||||
|
dewarped_img = dewarper.dewarp_image(img)
|
||||||
|
dirname, img_name, img_ext = parse_img_path(img_path)
|
||||||
|
save_path = os.path.join(dirname, f'{img_name}.dewarped.{img_ext}')
|
||||||
|
cv2.imwrite(save_path, dewarped_img)
|
||||||
|
return save_path
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
logging.config.dictConfig(LOGGING_CONFIG)
|
||||||
|
app.run('0.0.0.0', 5007)
|
||||||
7
services/paddle_services/doc_dewarp/__init__.py
Normal file
7
services/paddle_services/doc_dewarp/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
from onnxruntime import InferenceSession
|
||||||
|
|
||||||
|
MODEL_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||||
|
'model', 'dewarp_model', 'doc_tr_pp.onnx')
|
||||||
|
DOC_TR = InferenceSession(MODEL_PATH, providers=['CUDAExecutionProvider'], provider_options=[{'device_id': 0}])
|
||||||
@@ -11,10 +11,10 @@ def dewarp_image(image):
|
|||||||
y = to_tensor(image)
|
y = to_tensor(image)
|
||||||
|
|
||||||
img = np.transpose(img, (2, 0, 1))
|
img = np.transpose(img, (2, 0, 1))
|
||||||
bm = DOC_TR.run(None, {"image": img[None,]})[0]
|
bm = DOC_TR.run(None, {'image': img[None,]})[0]
|
||||||
bm = paddle.to_tensor(bm)
|
bm = paddle.to_tensor(bm)
|
||||||
bm = paddle.nn.functional.interpolate(
|
bm = paddle.nn.functional.interpolate(
|
||||||
bm, y.shape[2:], mode="bilinear", align_corners=False
|
bm, y.shape[2:], mode='bilinear', align_corners=False
|
||||||
)
|
)
|
||||||
bm_nhwc = np.transpose(bm, (0, 2, 3, 1))
|
bm_nhwc = np.transpose(bm, (0, 2, 3, 1))
|
||||||
out = paddle.nn.functional.grid_sample(y, (bm_nhwc / 288 - 0.5) * 2)
|
out = paddle.nn.functional.grid_sample(y, (bm_nhwc / 288 - 0.5) * 2)
|
||||||
|
Before Width: | Height: | Size: 76 KiB After Width: | Height: | Size: 76 KiB |
36
services/paddle_services/ie_cost.py
Normal file
36
services/paddle_services/ie_cost.py
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
import json
|
||||||
|
import logging.config
|
||||||
|
|
||||||
|
from flask import Flask, request
|
||||||
|
from paddlenlp import Taskflow
|
||||||
|
|
||||||
|
from __init__ import IE_KEY
|
||||||
|
from log import LOGGING_CONFIG
|
||||||
|
from utils import process_request
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
COST_LIST_SCHEMA = tuple(IE_KEY[key] for key in [
|
||||||
|
'name', 'admission_date', 'discharge_date', 'medical_expenses', 'page'
|
||||||
|
])
|
||||||
|
COST = Taskflow('information_extraction', schema=COST_LIST_SCHEMA, model='uie-x-base',
|
||||||
|
task_path='model/cost_list_model', layout_analysis=False, precision='fp16')
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/', methods=['POST'], endpoint='cost')
|
||||||
|
@process_request
|
||||||
|
def main():
|
||||||
|
img_path = request.form.get('img_path')
|
||||||
|
layout = request.form.get('layout')
|
||||||
|
return COST({'doc': img_path, 'layout': json.loads(layout)})
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/text', methods=['POST'])
|
||||||
|
@process_request
|
||||||
|
def text():
|
||||||
|
t = request.form.get('text')
|
||||||
|
return COST(t)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
logging.config.dictConfig(LOGGING_CONFIG)
|
||||||
|
app.run('0.0.0.0', 5004)
|
||||||
36
services/paddle_services/ie_discharge.py
Normal file
36
services/paddle_services/ie_discharge.py
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
import json
|
||||||
|
import logging.config
|
||||||
|
|
||||||
|
from flask import Flask, request
|
||||||
|
from paddlenlp import Taskflow
|
||||||
|
|
||||||
|
from __init__ import IE_KEY
|
||||||
|
from log import LOGGING_CONFIG
|
||||||
|
from utils import process_request
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
DISCHARGE_RECORD_SCHEMA = tuple(IE_KEY[key] for key in [
|
||||||
|
'hospital', 'department', 'name', 'admission_date', 'discharge_date', 'doctor', 'admission_id', 'age'
|
||||||
|
])
|
||||||
|
DISCHARGE = Taskflow('information_extraction', schema=DISCHARGE_RECORD_SCHEMA, model='uie-x-base',
|
||||||
|
task_path='model/discharge_record_model', layout_analysis=False, precision='fp16')
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/', methods=['POST'], endpoint='discharge')
|
||||||
|
@process_request
|
||||||
|
def main():
|
||||||
|
img_path = request.form.get('img_path')
|
||||||
|
layout = request.form.get('layout')
|
||||||
|
return DISCHARGE({'doc': img_path, 'layout': json.loads(layout)})
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/text', methods=['POST'])
|
||||||
|
@process_request
|
||||||
|
def text():
|
||||||
|
t = request.form.get('text')
|
||||||
|
return DISCHARGE(t)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
logging.config.dictConfig(LOGGING_CONFIG)
|
||||||
|
app.run('0.0.0.0', 5003)
|
||||||
38
services/paddle_services/ie_settlement.py
Normal file
38
services/paddle_services/ie_settlement.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
import json
|
||||||
|
import logging.config
|
||||||
|
|
||||||
|
from flask import Flask, request
|
||||||
|
from paddlenlp import Taskflow
|
||||||
|
|
||||||
|
from __init__ import IE_KEY
|
||||||
|
from log import LOGGING_CONFIG
|
||||||
|
from utils import process_request
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
SETTLEMENT_LIST_SCHEMA = tuple(IE_KEY[key] for key in [
|
||||||
|
'name', 'admission_date', 'discharge_date', 'medical_expenses', 'personal_cash_payment',
|
||||||
|
'personal_account_payment', 'personal_funded_amount', 'medical_insurance_type', 'admission_id', 'settlement_id',
|
||||||
|
'uppercase_medical_expenses'
|
||||||
|
])
|
||||||
|
SETTLEMENT_IE = Taskflow('information_extraction', schema=SETTLEMENT_LIST_SCHEMA, model='uie-x-base',
|
||||||
|
task_path='model/settlement_list_model', layout_analysis=False, precision='fp16')
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/', methods=['POST'], endpoint='settlement')
|
||||||
|
@process_request
|
||||||
|
def main():
|
||||||
|
img_path = request.form.get('img_path')
|
||||||
|
layout = request.form.get('layout')
|
||||||
|
return SETTLEMENT_IE({'doc': img_path, 'layout': json.loads(layout)})
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/text', methods=['POST'])
|
||||||
|
@process_request
|
||||||
|
def text():
|
||||||
|
t = request.form.get('text')
|
||||||
|
return SETTLEMENT_IE(t)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
logging.config.dictConfig(LOGGING_CONFIG)
|
||||||
|
app.run('0.0.0.0', 5002)
|
||||||
70
services/paddle_services/log/__init__.py
Normal file
70
services/paddle_services/log/__init__.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
import os
|
||||||
|
import socket
|
||||||
|
|
||||||
|
# 获取主机名,方便区分容器
|
||||||
|
HOSTNAME = socket.gethostname()
|
||||||
|
# 检测日志文件的路径是否存在,不存在则创建
|
||||||
|
LOG_PATHS = [
|
||||||
|
f'log/{HOSTNAME}/error',
|
||||||
|
]
|
||||||
|
for path in LOG_PATHS:
|
||||||
|
if not os.path.exists(path):
|
||||||
|
os.makedirs(path)
|
||||||
|
|
||||||
|
# 配置字典
|
||||||
|
LOGGING_CONFIG = {
|
||||||
|
'version': 1, # 必需,指定配置格式的版本
|
||||||
|
'disable_existing_loggers': False, # 是否禁用已经存在的logger实例
|
||||||
|
|
||||||
|
# formatters定义了不同格式的日志样式
|
||||||
|
'formatters': {
|
||||||
|
'standard': {
|
||||||
|
'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s',
|
||||||
|
'datefmt': '%Y-%m-%d %H:%M:%S',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
|
# handlers定义了不同类型的日志处理器
|
||||||
|
'handlers': {
|
||||||
|
'console': {
|
||||||
|
'class': 'logging.StreamHandler', # 控制台处理器
|
||||||
|
'level': 'DEBUG',
|
||||||
|
'formatter': 'standard',
|
||||||
|
'stream': 'ext://sys.stdout', # 输出到标准输出,默认编码跟随系统,一般为UTF-8
|
||||||
|
},
|
||||||
|
'file': {
|
||||||
|
'class': 'logging.handlers.TimedRotatingFileHandler', # 文件处理器,支持日志滚动
|
||||||
|
'level': 'INFO',
|
||||||
|
'formatter': 'standard',
|
||||||
|
'filename': f'log/{HOSTNAME}/fcb_photo_review.log', # 日志文件路径
|
||||||
|
'when': 'midnight',
|
||||||
|
'interval': 1,
|
||||||
|
'backupCount': 14, # 保留的备份文件数量
|
||||||
|
'encoding': 'utf-8', # 显式指定文件编码为UTF-8以支持中文
|
||||||
|
},
|
||||||
|
'error': {
|
||||||
|
'class': 'logging.handlers.TimedRotatingFileHandler',
|
||||||
|
'level': 'INFO',
|
||||||
|
'formatter': 'standard',
|
||||||
|
'filename': f'log/{HOSTNAME}/error/fcb_photo_review_error.log',
|
||||||
|
'when': 'midnight',
|
||||||
|
'interval': 1,
|
||||||
|
'backupCount': 14,
|
||||||
|
'encoding': 'utf-8',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
|
# loggers定义了日志记录器
|
||||||
|
'loggers': {
|
||||||
|
'': { # 根记录器
|
||||||
|
'handlers': ['console', 'file'], # 关联的处理器
|
||||||
|
'level': 'DEBUG', # 根记录器的级别
|
||||||
|
'propagate': False, # 是否向上级传播日志信息
|
||||||
|
},
|
||||||
|
'error': {
|
||||||
|
'handlers': ['console', 'file', 'error'],
|
||||||
|
'level': 'DEBUG',
|
||||||
|
'propagate': False,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
1
services/paddle_services/model/cost_list_model/README.md
Normal file
1
services/paddle_services/model/cost_list_model/README.md
Normal file
@@ -0,0 +1 @@
|
|||||||
|
住院费用清单信息抽取微调模型存放目录
|
||||||
1
services/paddle_services/model/dewarp_model/README.md
Normal file
1
services/paddle_services/model/dewarp_model/README.md
Normal file
@@ -0,0 +1 @@
|
|||||||
|
图片扭曲矫正模型存放目录
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
出院记录信息抽取微调模型存放目录
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
文档检测模型存放目录
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
基本医保结算单信息抽取微调模型存放目录
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
文本分类模型存放目录
|
||||||
24
services/paddle_services/ocr.py
Normal file
24
services/paddle_services/ocr.py
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
import logging.config
|
||||||
|
|
||||||
|
from flask import Flask, request
|
||||||
|
from paddleocr import PaddleOCR
|
||||||
|
|
||||||
|
from log import LOGGING_CONFIG
|
||||||
|
from utils import process_request
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
# 如果不希望识别出空格,可以设置use_space_char=False。做此项设置一定要测试,2.7.3版本此项设置有bug,会导致识别失败
|
||||||
|
OCR = PaddleOCR(use_angle_cls=False, show_log=False, det_db_thresh=0.1, det_db_box_thresh=0.3, det_limit_side_len=1248,
|
||||||
|
drop_score=0.3)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/', methods=['POST'])
|
||||||
|
@process_request
|
||||||
|
def main():
|
||||||
|
img_path = request.form.get('img_path')
|
||||||
|
return OCR.ocr(img_path, cls=False)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
logging.config.dictConfig(LOGGING_CONFIG)
|
||||||
|
app.run('0.0.0.0', 5001)
|
||||||
1
services/paddle_services/paddle_detection/README.md
Normal file
1
services/paddle_services/paddle_detection/README.md
Normal file
@@ -0,0 +1 @@
|
|||||||
|
README_cn.md
|
||||||
7
services/paddle_services/paddle_detection/__init__.py
Normal file
7
services/paddle_services/paddle_detection/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
from onnxruntime import InferenceSession
|
||||||
|
|
||||||
|
MODEL_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'model', 'object_det_model')
|
||||||
|
PADDLE_DET = InferenceSession(os.path.join(MODEL_DIR, 'ppyoloe_plus_crn_l_80e_coco_w_nms.onnx'),
|
||||||
|
providers=['CUDAExecutionProvider'], provider_options=[{'device_id': 0}])
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user