diff --git a/det_api.py b/det_api.py index 091eb2d..0d79222 100644 --- a/det_api.py +++ b/det_api.py @@ -1,8 +1,7 @@ -import base64 +import os.path import cv2 -import numpy as np -from flask import Flask, request, jsonify, Blueprint +from flask import Flask, request, Blueprint from paddle_detection import detector from util.common_util import process_request @@ -12,25 +11,23 @@ det_bp = Blueprint('det_bp', __name__) app.register_blueprint(det_bp, url_prefix='/det') -@det_bp.route("/books", methods=['POST']) +@det_bp.route('/books', methods=['POST']) @process_request def books(): - try: - file = request.files['image'] - image_data = file.read() - nparr = np.frombuffer(image_data, np.uint8) - image = cv2.imdecode(nparr, cv2.IMREAD_COLOR) - result = detector.get_book_areas(image) - encoded_images = [] - for i in result: - _, encoded_image = cv2.imencode('.jpg', i) - byte_stream = encoded_image.tobytes() - img_str = base64.b64encode(byte_stream).decode('utf-8') - encoded_images.append(img_str) - return jsonify(encoded_images), 200 - except Exception as e: - return jsonify({'error': str(e)}), 500 + img_path = request.form['img_path'] + image = cv2.imread(img_path) + result = detector.get_book_areas(image) + + dirname = os.path.dirname(img_path) + img_name, ext = os.path.basename(img_path).rsplit('.', 1) + books_path = [] + for i in range(len(result)): + save_path = os.path.join(dirname, img_name + '_book_' + str(i) + '.' + ext) + with open(save_path, 'wb') as file: + file.write(result[i]) + books_path.append(save_path) + return books_path if __name__ == '__main__': - app.run("0.0.0.0", 5000) + app.run('0.0.0.0', 5000) diff --git a/log/__init__.py b/log/__init__.py index 7f83ed7..8a95796 100644 --- a/log/__init__.py +++ b/log/__init__.py @@ -1,6 +1,8 @@ import os import socket +# 项目根目录 +PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # 获取主机名,方便区分容器 HOSTNAME = socket.gethostname() # 检测日志文件的路径是否存在,不存在则创建 diff --git a/paddle_detection/detector.py b/paddle_detection/detector.py index df0cce4..f50d28d 100644 --- a/paddle_detection/detector.py +++ b/paddle_detection/detector.py @@ -1,12 +1,8 @@ -import base64 -import logging import tempfile from collections import defaultdict import cv2 import numpy as np -import requests -from tenacity import retry, stop_after_attempt, wait_random from paddle_detection import PADDLE_DET from paddle_detection.deploy.third_engine.onnx.infer import PredictConfig @@ -51,26 +47,3 @@ def get_book_areas(image): for book_area in book_areas: result.append(image_util.capture(image, book_area["box"])) return result - - -@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, - after=lambda x: logging.warning("获取文档区域失败!")) -def request_book_areas(image): - url = "http://det_api:5000/det/books" - _, encoded_image = cv2.imencode('.jpg', image) - byte_stream = encoded_image.tobytes() - files = {"image": ("image.jpg", byte_stream)} - response = requests.post(url, files=files) - if response.status_code == 200: - img_str_list = response.json() - result = [] - for img_str in img_str_list: - img_data = base64.b64decode(img_str) - np_array = np.frombuffer(img_data, np.uint8) - img = cv2.imdecode(np_array, cv2.IMREAD_COLOR) - height, width = img.shape[:2] - if max(height, width) / min(height, width) <= 6.5: - result.append(img) # 过滤异常结果 - return result - else: - return [] diff --git a/photo_review.py b/photo_review.py index 232376a..1866edb 100644 --- a/photo_review.py +++ b/photo_review.py @@ -11,27 +11,26 @@ from db.mysql import ZxPhhd from log import LOGGING_CONFIG from photo_review import auto_photo_review, SEND_ERROR_EMAIL -# 项目必须从此处启动,否则代码中的相对路径可能导致错误的发生 if __name__ == '__main__': program_name = '照片审核自动识别脚本' logging.config.dictConfig(LOGGING_CONFIG) parser = argparse.ArgumentParser() - parser.add_argument("--clean", default=False, type=bool, help="是否将识别中的案子改为待识别状态") + parser.add_argument('--clean', default=False, type=bool, help='是否将识别中的案子改为待识别状态') args = parser.parse_args() if args.clean: # 主要用于启动时,清除仍在涂抹中的案子 session = MysqlSession() - update_flag = (update(ZxPhhd).where(ZxPhhd.exsuccess_flag == "2").values(exsuccess_flag="1")) + update_flag = (update(ZxPhhd).where(ZxPhhd.exsuccess_flag == '2').values(exsuccess_flag='1')) session.execute(update_flag) session.commit() session.close() - logging.info("已释放残余的识别案子!") + logging.info('已释放残余的识别案子!') else: sleep(5) try: - logging.info(f"【{program_name}】开始运行") + logging.info(f'【{program_name}】开始运行') auto_photo_review.main() except Exception as e: error_logger = logging.getLogger('error') diff --git a/photo_review/auto_photo_review.py b/photo_review/auto_photo_review.py index 67aa845..6664117 100644 --- a/photo_review/auto_photo_review.py +++ b/photo_review/auto_photo_review.py @@ -23,7 +23,7 @@ from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_E ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \ UPPERCASE_MEDICAL_EXPENSES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, DEPARTMENT_FILTER from ucloud import ufile -from util import image_util, common_util, html_util +from util import image_util, common_util, html_util, model_util from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \ handle_insurance_type, handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money, \ parse_hospital @@ -148,12 +148,12 @@ def get_better_image_from_qrcode(image, image_id, dpi=150): def information_extraction(ie, phrecs, identity): result = {} for phrec in phrecs: - img_path = ufile.get_private_url(phrec.cfjaddress) - if not img_path: + img_url = ufile.get_private_url(phrec.cfjaddress) + if not img_url: continue - image = image_util.read(img_path) - + img_path = image_util.save_to_local(img_url) + image = cv2.imread(img_path) # 尝试从二维码中获取高清图片 better_image, text = get_better_image_from_qrcode(image, phrec.cfjaddress) if phrec.cRectype != '1': @@ -166,12 +166,11 @@ def information_extraction(ie, phrecs, identity): info_extract = ie(text)[0] else: info_extract = ie_temp_image(ie, OCR, image) - ie_result = {'result': info_extract, 'angle': '0'} - - now = common_util.get_default_datetime() - if not ie_result['result']: + if not info_extract: continue + ie_result = {'result': info_extract, 'angle': img_angle} + now = common_util.get_default_datetime() result_json = json.dumps(ie_result['result'], ensure_ascii=False) if len(result_json) > 5000: result_json = result_json[:5000] @@ -184,7 +183,7 @@ def information_extraction(ie, phrecs, identity): result = merge_result(result, ie_result['result']) else: target_images = [] - # target_images += detector.request_book_areas(image) # 识别文档区域并裁剪 + target_images += model_util.request_book_areas(img_path) # 识别文档区域并裁剪 if not target_images: target_images.append(image) # 识别失败 angle_count = defaultdict(int, {'0': 0}) # 分割后图片的最优角度统计 diff --git a/tmp_img/README.md b/tmp_img/README.md new file mode 100644 index 0000000..4b92b40 --- /dev/null +++ b/tmp_img/README.md @@ -0,0 +1 @@ +本文件夹用于保存临时图片,方便各个服务间调用。 \ No newline at end of file diff --git a/util/image_util.py b/util/image_util.py index 0b30cc0..5a9d4bb 100644 --- a/util/image_util.py +++ b/util/image_util.py @@ -1,12 +1,16 @@ import logging import math +import os import urllib.request import cv2 import numpy +import requests from paddleclas import PaddleClas from tenacity import retry, stop_after_attempt, wait_random +from log import PROJECT_ROOT + @retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, after=lambda x: logging.warning("获取图片失败!")) @@ -247,3 +251,35 @@ def combined(img1, img2): combined_img[:height1, :width1] = img1 combined_img[:height2, width1:width1 + width2] = img2 return combined_img + + +def parse_img_url(url): + """ + 解析图片url + :param url: 图片url + :return: 图片名称和图片后缀 + """ + url = url.split('?')[0] + return os.path.basename(url).rsplit('.', 1) + + +@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, + after=lambda x: logging.warning('保存图片失败!')) +def save_to_local(img_url, save_path=None): + """ + 保存图片到本地 + :param img_url: 图片url + :param save_path: 本地保存地址,精确到文件名 + :return: 本地保存地址 + """ + response = requests.get(img_url) + response.raise_for_status() # 检查响应状态码是否正常 + + if save_path is None: + img_name, img_ext = parse_img_url(img_url) + save_path = os.path.join(PROJECT_ROOT, 'tmp_img', img_name + '.' + img_ext) + + with open(save_path, 'wb') as file: + file.write(response.content) + + return save_path diff --git a/util/model_util.py b/util/model_util.py new file mode 100644 index 0000000..547f525 --- /dev/null +++ b/util/model_util.py @@ -0,0 +1,20 @@ +import logging + +import cv2 +import requests +from tenacity import retry, stop_after_attempt, wait_random + + +@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True, + after=lambda x: logging.warning('获取文档区域失败!')) +def request_book_areas(img_path): + url = 'http://det_api:5000/det/books' + response = requests.post(url, {'img_path': img_path}) + if response.status_code == 200: + response_data = response.json() + books = [] + for books_path in response_data: + books.append(cv2.imread(books_path)) + return books + else: + return []