优化接口图片传输方式

This commit is contained in:
2024-09-23 14:45:03 +08:00
parent a2a82df21c
commit c091a82a91
8 changed files with 89 additions and 62 deletions

View File

@@ -1,8 +1,7 @@
import base64
import os.path
import cv2
import numpy as np
from flask import Flask, request, jsonify, Blueprint
from flask import Flask, request, Blueprint
from paddle_detection import detector
from util.common_util import process_request
@@ -12,25 +11,23 @@ det_bp = Blueprint('det_bp', __name__)
app.register_blueprint(det_bp, url_prefix='/det')
@det_bp.route("/books", methods=['POST'])
@det_bp.route('/books', methods=['POST'])
@process_request
def books():
try:
file = request.files['image']
image_data = file.read()
nparr = np.frombuffer(image_data, np.uint8)
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
img_path = request.form['img_path']
image = cv2.imread(img_path)
result = detector.get_book_areas(image)
encoded_images = []
for i in result:
_, encoded_image = cv2.imencode('.jpg', i)
byte_stream = encoded_image.tobytes()
img_str = base64.b64encode(byte_stream).decode('utf-8')
encoded_images.append(img_str)
return jsonify(encoded_images), 200
except Exception as e:
return jsonify({'error': str(e)}), 500
dirname = os.path.dirname(img_path)
img_name, ext = os.path.basename(img_path).rsplit('.', 1)
books_path = []
for i in range(len(result)):
save_path = os.path.join(dirname, img_name + '_book_' + str(i) + '.' + ext)
with open(save_path, 'wb') as file:
file.write(result[i])
books_path.append(save_path)
return books_path
if __name__ == '__main__':
app.run("0.0.0.0", 5000)
app.run('0.0.0.0', 5000)

View File

@@ -1,6 +1,8 @@
import os
import socket
# 项目根目录
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# 获取主机名,方便区分容器
HOSTNAME = socket.gethostname()
# 检测日志文件的路径是否存在,不存在则创建

View File

@@ -1,12 +1,8 @@
import base64
import logging
import tempfile
from collections import defaultdict
import cv2
import numpy as np
import requests
from tenacity import retry, stop_after_attempt, wait_random
from paddle_detection import PADDLE_DET
from paddle_detection.deploy.third_engine.onnx.infer import PredictConfig
@@ -51,26 +47,3 @@ def get_book_areas(image):
for book_area in book_areas:
result.append(image_util.capture(image, book_area["box"]))
return result
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning("获取文档区域失败!"))
def request_book_areas(image):
url = "http://det_api:5000/det/books"
_, encoded_image = cv2.imencode('.jpg', image)
byte_stream = encoded_image.tobytes()
files = {"image": ("image.jpg", byte_stream)}
response = requests.post(url, files=files)
if response.status_code == 200:
img_str_list = response.json()
result = []
for img_str in img_str_list:
img_data = base64.b64decode(img_str)
np_array = np.frombuffer(img_data, np.uint8)
img = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
height, width = img.shape[:2]
if max(height, width) / min(height, width) <= 6.5:
result.append(img) # 过滤异常结果
return result
else:
return []

View File

@@ -11,27 +11,26 @@ from db.mysql import ZxPhhd
from log import LOGGING_CONFIG
from photo_review import auto_photo_review, SEND_ERROR_EMAIL
# 项目必须从此处启动,否则代码中的相对路径可能导致错误的发生
if __name__ == '__main__':
program_name = '照片审核自动识别脚本'
logging.config.dictConfig(LOGGING_CONFIG)
parser = argparse.ArgumentParser()
parser.add_argument("--clean", default=False, type=bool, help="是否将识别中的案子改为待识别状态")
parser.add_argument('--clean', default=False, type=bool, help='是否将识别中的案子改为待识别状态')
args = parser.parse_args()
if args.clean:
# 主要用于启动时,清除仍在涂抹中的案子
session = MysqlSession()
update_flag = (update(ZxPhhd).where(ZxPhhd.exsuccess_flag == "2").values(exsuccess_flag="1"))
update_flag = (update(ZxPhhd).where(ZxPhhd.exsuccess_flag == '2').values(exsuccess_flag='1'))
session.execute(update_flag)
session.commit()
session.close()
logging.info("已释放残余的识别案子!")
logging.info('已释放残余的识别案子!')
else:
sleep(5)
try:
logging.info(f"{program_name}】开始运行")
logging.info(f'{program_name}】开始运行')
auto_photo_review.main()
except Exception as e:
error_logger = logging.getLogger('error')

View File

@@ -23,7 +23,7 @@ from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_E
ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \
UPPERCASE_MEDICAL_EXPENSES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, DEPARTMENT_FILTER
from ucloud import ufile
from util import image_util, common_util, html_util
from util import image_util, common_util, html_util, model_util
from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \
handle_insurance_type, handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, \
parse_hospital
@@ -148,12 +148,12 @@ def get_better_image_from_qrcode(image, image_id, dpi=150):
def information_extraction(ie, phrecs, identity):
result = {}
for phrec in phrecs:
img_path = ufile.get_private_url(phrec.cfjaddress)
if not img_path:
img_url = ufile.get_private_url(phrec.cfjaddress)
if not img_url:
continue
image = image_util.read(img_path)
img_path = image_util.save_to_local(img_url)
image = cv2.imread(img_path)
# 尝试从二维码中获取高清图片
better_image, text = get_better_image_from_qrcode(image, phrec.cfjaddress)
if phrec.cRectype != '1':
@@ -166,12 +166,11 @@ def information_extraction(ie, phrecs, identity):
info_extract = ie(text)[0]
else:
info_extract = ie_temp_image(ie, OCR, image)
ie_result = {'result': info_extract, 'angle': '0'}
now = common_util.get_default_datetime()
if not ie_result['result']:
if not info_extract:
continue
ie_result = {'result': info_extract, 'angle': img_angle}
now = common_util.get_default_datetime()
result_json = json.dumps(ie_result['result'], ensure_ascii=False)
if len(result_json) > 5000:
result_json = result_json[:5000]
@@ -184,7 +183,7 @@ def information_extraction(ie, phrecs, identity):
result = merge_result(result, ie_result['result'])
else:
target_images = []
# target_images += detector.request_book_areas(image) # 识别文档区域并裁剪
target_images += model_util.request_book_areas(img_path) # 识别文档区域并裁剪
if not target_images:
target_images.append(image) # 识别失败
angle_count = defaultdict(int, {'0': 0}) # 分割后图片的最优角度统计

1
tmp_img/README.md Normal file
View File

@@ -0,0 +1 @@
本文件夹用于保存临时图片,方便各个服务间调用。

View File

@@ -1,12 +1,16 @@
import logging
import math
import os
import urllib.request
import cv2
import numpy
import requests
from paddleclas import PaddleClas
from tenacity import retry, stop_after_attempt, wait_random
from log import PROJECT_ROOT
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning("获取图片失败!"))
@@ -247,3 +251,35 @@ def combined(img1, img2):
combined_img[:height1, :width1] = img1
combined_img[:height2, width1:width1 + width2] = img2
return combined_img
def parse_img_url(url):
"""
解析图片url
:param url: 图片url
:return: 图片名称和图片后缀
"""
url = url.split('?')[0]
return os.path.basename(url).rsplit('.', 1)
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('保存图片失败!'))
def save_to_local(img_url, save_path=None):
"""
保存图片到本地
:param img_url: 图片url
:param save_path: 本地保存地址,精确到文件名
:return: 本地保存地址
"""
response = requests.get(img_url)
response.raise_for_status() # 检查响应状态码是否正常
if save_path is None:
img_name, img_ext = parse_img_url(img_url)
save_path = os.path.join(PROJECT_ROOT, 'tmp_img', img_name + '.' + img_ext)
with open(save_path, 'wb') as file:
file.write(response.content)
return save_path

20
util/model_util.py Normal file
View File

@@ -0,0 +1,20 @@
import logging
import cv2
import requests
from tenacity import retry, stop_after_attempt, wait_random
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('获取文档区域失败!'))
def request_book_areas(img_path):
url = 'http://det_api:5000/det/books'
response = requests.post(url, {'img_path': img_path})
if response.status_code == 200:
response_data = response.json()
books = []
for books_path in response_data:
books.append(cv2.imread(books_path))
return books
else:
return []