优化接口图片传输方式

This commit is contained in:
2024-09-23 14:45:03 +08:00
parent a2a82df21c
commit c091a82a91
8 changed files with 89 additions and 62 deletions

View File

@@ -1,8 +1,7 @@
import base64 import os.path
import cv2 import cv2
import numpy as np from flask import Flask, request, Blueprint
from flask import Flask, request, jsonify, Blueprint
from paddle_detection import detector from paddle_detection import detector
from util.common_util import process_request from util.common_util import process_request
@@ -12,25 +11,23 @@ det_bp = Blueprint('det_bp', __name__)
app.register_blueprint(det_bp, url_prefix='/det') app.register_blueprint(det_bp, url_prefix='/det')
@det_bp.route("/books", methods=['POST']) @det_bp.route('/books', methods=['POST'])
@process_request @process_request
def books(): def books():
try: img_path = request.form['img_path']
file = request.files['image'] image = cv2.imread(img_path)
image_data = file.read() result = detector.get_book_areas(image)
nparr = np.frombuffer(image_data, np.uint8)
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR) dirname = os.path.dirname(img_path)
result = detector.get_book_areas(image) img_name, ext = os.path.basename(img_path).rsplit('.', 1)
encoded_images = [] books_path = []
for i in result: for i in range(len(result)):
_, encoded_image = cv2.imencode('.jpg', i) save_path = os.path.join(dirname, img_name + '_book_' + str(i) + '.' + ext)
byte_stream = encoded_image.tobytes() with open(save_path, 'wb') as file:
img_str = base64.b64encode(byte_stream).decode('utf-8') file.write(result[i])
encoded_images.append(img_str) books_path.append(save_path)
return jsonify(encoded_images), 200 return books_path
except Exception as e:
return jsonify({'error': str(e)}), 500
if __name__ == '__main__': if __name__ == '__main__':
app.run("0.0.0.0", 5000) app.run('0.0.0.0', 5000)

View File

@@ -1,6 +1,8 @@
import os import os
import socket import socket
# 项目根目录
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# 获取主机名,方便区分容器 # 获取主机名,方便区分容器
HOSTNAME = socket.gethostname() HOSTNAME = socket.gethostname()
# 检测日志文件的路径是否存在,不存在则创建 # 检测日志文件的路径是否存在,不存在则创建

View File

@@ -1,12 +1,8 @@
import base64
import logging
import tempfile import tempfile
from collections import defaultdict from collections import defaultdict
import cv2 import cv2
import numpy as np import numpy as np
import requests
from tenacity import retry, stop_after_attempt, wait_random
from paddle_detection import PADDLE_DET from paddle_detection import PADDLE_DET
from paddle_detection.deploy.third_engine.onnx.infer import PredictConfig from paddle_detection.deploy.third_engine.onnx.infer import PredictConfig
@@ -51,26 +47,3 @@ def get_book_areas(image):
for book_area in book_areas: for book_area in book_areas:
result.append(image_util.capture(image, book_area["box"])) result.append(image_util.capture(image, book_area["box"]))
return result return result
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning("获取文档区域失败!"))
def request_book_areas(image):
url = "http://det_api:5000/det/books"
_, encoded_image = cv2.imencode('.jpg', image)
byte_stream = encoded_image.tobytes()
files = {"image": ("image.jpg", byte_stream)}
response = requests.post(url, files=files)
if response.status_code == 200:
img_str_list = response.json()
result = []
for img_str in img_str_list:
img_data = base64.b64decode(img_str)
np_array = np.frombuffer(img_data, np.uint8)
img = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
height, width = img.shape[:2]
if max(height, width) / min(height, width) <= 6.5:
result.append(img) # 过滤异常结果
return result
else:
return []

View File

@@ -11,27 +11,26 @@ from db.mysql import ZxPhhd
from log import LOGGING_CONFIG from log import LOGGING_CONFIG
from photo_review import auto_photo_review, SEND_ERROR_EMAIL from photo_review import auto_photo_review, SEND_ERROR_EMAIL
# 项目必须从此处启动,否则代码中的相对路径可能导致错误的发生
if __name__ == '__main__': if __name__ == '__main__':
program_name = '照片审核自动识别脚本' program_name = '照片审核自动识别脚本'
logging.config.dictConfig(LOGGING_CONFIG) logging.config.dictConfig(LOGGING_CONFIG)
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--clean", default=False, type=bool, help="是否将识别中的案子改为待识别状态") parser.add_argument('--clean', default=False, type=bool, help='是否将识别中的案子改为待识别状态')
args = parser.parse_args() args = parser.parse_args()
if args.clean: if args.clean:
# 主要用于启动时,清除仍在涂抹中的案子 # 主要用于启动时,清除仍在涂抹中的案子
session = MysqlSession() session = MysqlSession()
update_flag = (update(ZxPhhd).where(ZxPhhd.exsuccess_flag == "2").values(exsuccess_flag="1")) update_flag = (update(ZxPhhd).where(ZxPhhd.exsuccess_flag == '2').values(exsuccess_flag='1'))
session.execute(update_flag) session.execute(update_flag)
session.commit() session.commit()
session.close() session.close()
logging.info("已释放残余的识别案子!") logging.info('已释放残余的识别案子!')
else: else:
sleep(5) sleep(5)
try: try:
logging.info(f"{program_name}】开始运行") logging.info(f'{program_name}】开始运行')
auto_photo_review.main() auto_photo_review.main()
except Exception as e: except Exception as e:
error_logger = logging.getLogger('error') error_logger = logging.getLogger('error')

View File

@@ -23,7 +23,7 @@ from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_E
ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \ ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \
UPPERCASE_MEDICAL_EXPENSES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, DEPARTMENT_FILTER UPPERCASE_MEDICAL_EXPENSES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, DEPARTMENT_FILTER
from ucloud import ufile from ucloud import ufile
from util import image_util, common_util, html_util from util import image_util, common_util, html_util, model_util
from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \ from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \
handle_insurance_type, handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, \ handle_insurance_type, handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, \
parse_hospital parse_hospital
@@ -148,12 +148,12 @@ def get_better_image_from_qrcode(image, image_id, dpi=150):
def information_extraction(ie, phrecs, identity): def information_extraction(ie, phrecs, identity):
result = {} result = {}
for phrec in phrecs: for phrec in phrecs:
img_path = ufile.get_private_url(phrec.cfjaddress) img_url = ufile.get_private_url(phrec.cfjaddress)
if not img_path: if not img_url:
continue continue
image = image_util.read(img_path) img_path = image_util.save_to_local(img_url)
image = cv2.imread(img_path)
# 尝试从二维码中获取高清图片 # 尝试从二维码中获取高清图片
better_image, text = get_better_image_from_qrcode(image, phrec.cfjaddress) better_image, text = get_better_image_from_qrcode(image, phrec.cfjaddress)
if phrec.cRectype != '1': if phrec.cRectype != '1':
@@ -166,12 +166,11 @@ def information_extraction(ie, phrecs, identity):
info_extract = ie(text)[0] info_extract = ie(text)[0]
else: else:
info_extract = ie_temp_image(ie, OCR, image) info_extract = ie_temp_image(ie, OCR, image)
ie_result = {'result': info_extract, 'angle': '0'} if not info_extract:
now = common_util.get_default_datetime()
if not ie_result['result']:
continue continue
ie_result = {'result': info_extract, 'angle': img_angle}
now = common_util.get_default_datetime()
result_json = json.dumps(ie_result['result'], ensure_ascii=False) result_json = json.dumps(ie_result['result'], ensure_ascii=False)
if len(result_json) > 5000: if len(result_json) > 5000:
result_json = result_json[:5000] result_json = result_json[:5000]
@@ -184,7 +183,7 @@ def information_extraction(ie, phrecs, identity):
result = merge_result(result, ie_result['result']) result = merge_result(result, ie_result['result'])
else: else:
target_images = [] target_images = []
# target_images += detector.request_book_areas(image) # 识别文档区域并裁剪 target_images += model_util.request_book_areas(img_path) # 识别文档区域并裁剪
if not target_images: if not target_images:
target_images.append(image) # 识别失败 target_images.append(image) # 识别失败
angle_count = defaultdict(int, {'0': 0}) # 分割后图片的最优角度统计 angle_count = defaultdict(int, {'0': 0}) # 分割后图片的最优角度统计

1
tmp_img/README.md Normal file
View File

@@ -0,0 +1 @@
本文件夹用于保存临时图片,方便各个服务间调用。

View File

@@ -1,12 +1,16 @@
import logging import logging
import math import math
import os
import urllib.request import urllib.request
import cv2 import cv2
import numpy import numpy
import requests
from paddleclas import PaddleClas from paddleclas import PaddleClas
from tenacity import retry, stop_after_attempt, wait_random from tenacity import retry, stop_after_attempt, wait_random
from log import PROJECT_ROOT
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, @retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning("获取图片失败!")) after=lambda x: logging.warning("获取图片失败!"))
@@ -247,3 +251,35 @@ def combined(img1, img2):
combined_img[:height1, :width1] = img1 combined_img[:height1, :width1] = img1
combined_img[:height2, width1:width1 + width2] = img2 combined_img[:height2, width1:width1 + width2] = img2
return combined_img return combined_img
def parse_img_url(url):
"""
解析图片url
:param url: 图片url
:return: 图片名称和图片后缀
"""
url = url.split('?')[0]
return os.path.basename(url).rsplit('.', 1)
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('保存图片失败!'))
def save_to_local(img_url, save_path=None):
"""
保存图片到本地
:param img_url: 图片url
:param save_path: 本地保存地址,精确到文件名
:return: 本地保存地址
"""
response = requests.get(img_url)
response.raise_for_status() # 检查响应状态码是否正常
if save_path is None:
img_name, img_ext = parse_img_url(img_url)
save_path = os.path.join(PROJECT_ROOT, 'tmp_img', img_name + '.' + img_ext)
with open(save_path, 'wb') as file:
file.write(response.content)
return save_path

20
util/model_util.py Normal file
View File

@@ -0,0 +1,20 @@
import logging
import cv2
import requests
from tenacity import retry, stop_after_attempt, wait_random
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
after=lambda x: logging.warning('获取文档区域失败!'))
def request_book_areas(img_path):
url = 'http://det_api:5000/det/books'
response = requests.post(url, {'img_path': img_path})
if response.status_code == 200:
response_data = response.json()
books = []
for books_path in response_data:
books.append(cv2.imread(books_path))
return books
else:
return []