From ee86bb4e74ec1c19fb25065f4b03797646cf3dfb Mon Sep 17 00:00:00 2001 From: liuyebo <1515783401@qq.com> Date: Mon, 22 Jul 2024 17:31:32 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96ucloud=E7=9A=84=E6=97=A5?= =?UTF-8?q?=E5=BF=97=E5=8F=8A=E9=87=8D=E8=AF=95=E6=9C=BA=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- check_ie_result/check_ie_result.py | 6 +-- log/__init__.py | 41 ++++++++++++++--- photo_mask/photo_mask.py | 8 ++-- photo_review/photo_review.py | 4 +- requirements.txt | 1 + test_photo_mask_optimization.py | 8 ++-- ucloud/__init__.py | 11 ++++- ucloud/ucloud.py | 58 ------------------------ ucloud/ufile.py | 61 ++++++++++++++++++++++++++ visual_model_test/visual_model_test.py | 12 ++--- 10 files changed, 124 insertions(+), 86 deletions(-) delete mode 100644 ucloud/ucloud.py create mode 100644 ucloud/ufile.py diff --git a/check_ie_result/check_ie_result.py b/check_ie_result/check_ie_result.py index e8730b1..9751c66 100644 --- a/check_ie_result/check_ie_result.py +++ b/check_ie_result/check_ie_result.py @@ -1,7 +1,6 @@ import datetime import json import os -import sys from decimal import Decimal from io import BytesIO from itertools import groupby @@ -9,10 +8,9 @@ from itertools import groupby import requests from PIL import ImageDraw, Image, ImageFont -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from db import MysqlSession from db.mysql import ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxOcr, ZxPhrec -from ucloud import ucloud +from ucloud import ufile def check_ie_result(pk_phhd): @@ -78,7 +76,7 @@ def check_ie_result(pk_phhd): ZxPhrec.pk_phhd == pk_phhd).all() for phrec in phrecs: img_name = phrec.cfjaddress - img_path = ucloud.get_private_url(img_name) + img_path = ufile.get_private_url(img_name) response = requests.get(img_path) image = Image.open(BytesIO(response.content)).convert("RGB") diff --git a/log/__init__.py b/log/__init__.py index 3c25585..f3dcf97 100644 --- a/log/__init__.py +++ b/log/__init__.py @@ -1,3 +1,16 @@ +import os +import socket + +# 获取主机名,方便区分容器 +HOSTNAME = socket.gethostname() +# 检测日志文件的路径是否存在,不存在则创建 +LOG_PATHS = [ + f"log/{HOSTNAME}/ucloud", +] +for path in LOG_PATHS: + if not os.path.exists(path): + os.makedirs(path) + # 配置字典 LOGGING_CONFIG = { 'version': 1, # 必需,指定配置格式的版本 @@ -20,22 +33,38 @@ LOGGING_CONFIG = { 'stream': 'ext://sys.stdout', # 输出到标准输出,默认编码跟随系统,一般为UTF-8 }, 'file': { - 'class': 'logging.handlers.RotatingFileHandler', # 文件处理器,支持日志滚动 + 'class': 'logging.handlers.TimedRotatingFileHandler', # 文件处理器,支持日志滚动 'level': 'INFO', 'formatter': 'standard', - 'filename': 'log/fcb_photo_review.log', # 日志文件路径 - 'maxBytes': 1024 * 1024 * 5, # 文件最大大小,这里为5MB - 'backupCount': 5, # 保留的备份文件数量 + 'filename': f'log/{HOSTNAME}/fcb_photo_review.log', # 日志文件路径 + 'when': 'midnight', + 'interval': 1, + 'backupCount': 14, # 保留的备份文件数量 + 'encoding': 'utf-8', # 显式指定文件编码为UTF-8以支持中文 + }, + 'ucloud': { + 'class': 'logging.handlers.TimedRotatingFileHandler', # 文件处理器,支持日志滚动 + 'level': 'INFO', + 'formatter': 'standard', + 'filename': f'log/{HOSTNAME}/ucloud/fcb_photo_review_ucloud.log', # 日志文件路径 + 'when': 'midnight', + 'interval': 1, + 'backupCount': 14, # 保留的备份文件数量 'encoding': 'utf-8', # 显式指定文件编码为UTF-8以支持中文 }, }, - # loggers定义了日志记录器,这里是根记录器 + # loggers定义了日志记录器 'loggers': { '': { # 根记录器 'handlers': ['console', 'file'], # 关联的处理器 'level': 'DEBUG', # 根记录器的级别 'propagate': False, # 是否向上级传播日志信息 }, + 'ucloud': { + 'handlers': ['console', 'ucloud'], + 'level': 'DEBUG', + 'propagate': False, + } }, -} \ No newline at end of file +} diff --git a/photo_mask/photo_mask.py b/photo_mask/photo_mask.py index 89900cb..9ee735f 100644 --- a/photo_mask/photo_mask.py +++ b/photo_mask/photo_mask.py @@ -9,7 +9,7 @@ from db import MysqlSession from db.mysql import ZxPhrec, ZxPhhd from photo_mask import OCR, PHHD_BATCH_SIZE, SLEEP_MINUTES, COPY_TRY_TIMES, UPLOAD_TRY_TIMES, NAME_KEYS, \ ID_CARD_NUM_KEYS -from ucloud import BUCKET, ucloud +from ucloud import BUCKET, ufile from util import image_util, util @@ -150,7 +150,7 @@ def photo_mask(pk_phhd, name, id_card_num): )).all() session.close() for phrec in phrecs: - img_url = ucloud.get_private_url(phrec.cfjaddress) + img_url = ufile.get_private_url(phrec.cfjaddress) if not img_url: continue # 是否有涂抹 @@ -178,7 +178,7 @@ def photo_mask(pk_phhd, name, id_card_num): # 如果涂抹了要备份以及更新 if is_masked: for i in range(COPY_TRY_TIMES): - is_copy_success = ucloud.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress) + is_copy_success = ufile.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress) if is_copy_success: break @@ -186,7 +186,7 @@ def photo_mask(pk_phhd, name, id_card_num): cv2.imwrite(temp_file.name, image) try: for i in range(UPLOAD_TRY_TIMES): - is_upload_success = ucloud.upload_file(phrec.cfjaddress, temp_file.name) + is_upload_success = ufile.upload_file(phrec.cfjaddress, temp_file.name) if is_upload_success: break except Exception as e: diff --git a/photo_review/photo_review.py b/photo_review/photo_review.py index a64c8a3..7e757fa 100644 --- a/photo_review/photo_review.py +++ b/photo_review/photo_review.py @@ -15,7 +15,7 @@ from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_E PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \ ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \ UPPERCASE_MEDICAL_EXPENSES -from ucloud import ucloud +from ucloud import ufile from util import image_util, util from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \ handle_insurance_type, handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money @@ -73,7 +73,7 @@ def information_extraction(ie, phrecs): # 同一批图的标识 identity = int(time.time()) for phrec in phrecs: - img_path = ucloud.get_private_url(phrec.cfjaddress) + img_path = ufile.get_private_url(phrec.cfjaddress) if not img_path: continue split_results = image_util.split(img_path) diff --git a/requirements.txt b/requirements.txt index e807d3f..e9f7c59 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,5 @@ pymysql==1.1.0 requests==2.22.0 sqlacodegen==2.3.0.post1 sqlalchemy==1.4.52 +tenacity==8.5.0 ufile==3.2.9 \ No newline at end of file diff --git a/test_photo_mask_optimization.py b/test_photo_mask_optimization.py index 632e0d7..a7edd8d 100644 --- a/test_photo_mask_optimization.py +++ b/test_photo_mask_optimization.py @@ -3,7 +3,7 @@ import cv2 from db import MysqlSession from db.mysql import ZxIeOcrerror from photo_mask.photo_mask import handle_image_for_mask, get_mask_layout -from ucloud import ucloud +from ucloud import ufile from util import image_util if __name__ == '__main__': @@ -13,13 +13,13 @@ if __name__ == '__main__': session.close() for ocr_error in ocr_errors: - final_img_url = ucloud.get_private_url(ocr_error.cfjaddress, "drg100") + final_img_url = ufile.get_private_url(ocr_error.cfjaddress, "drg100") final_image = image_util.read(final_img_url) cv2.imwrite(f"./mask_optimization_result/answer/{ocr_error.cfjaddress}.jpg", final_image) - img_url = ucloud.get_private_url(ocr_error.cfjaddress, "drg2015") + img_url = ufile.get_private_url(ocr_error.cfjaddress, "drg2015") if not img_url: # 没有自动涂抹的图片 - img_url = ucloud.get_private_url(ocr_error.cfjaddress, "drg103") + img_url = ufile.get_private_url(ocr_error.cfjaddress, "drg103") name = ocr_error.cXm id_card_num = ocr_error.cSfzh diff --git a/ucloud/__init__.py b/ucloud/__init__.py index 438f480..f3d0d28 100644 --- a/ucloud/__init__.py +++ b/ucloud/__init__.py @@ -1,6 +1,6 @@ -# 公钥 from ufile import config +# 公钥 PUBLIC_KEY = "4Z7QYI7qml36QRjcCjKrls7aHl1R6H6uq" # 私钥 PRIVATE_KEY = "FIdW1Kev1Ge3K7GHXzSLyGG1wTnaG6LE9BxmIVubcCaG" @@ -13,5 +13,12 @@ DOWNLOAD_SUFFIX = ".cn-sh2.ufileos.com" # 私空间文件地址过期时间(秒) PRIVATE_EXPIRES = 3600 -# 设置默认请求超时时间 +# 设置默认请求超时时间(秒) config.set_default(connection_timeout=60) + +# 尝试次数 +TRY_TIMES = 5 +# 最小等待时间(秒) +MIN_WAIT_TIME = 1 +# 最大等待时间(秒) +MAX_WAIT_TIME = 3 diff --git a/ucloud/ucloud.py b/ucloud/ucloud.py deleted file mode 100644 index 1475dad..0000000 --- a/ucloud/ucloud.py +++ /dev/null @@ -1,58 +0,0 @@ -# https://github.com/ucloud/ufile-sdk-python -import logging -from time import sleep - -from ufile import filemanager - -from ucloud import PUBLIC_KEY, PRIVATE_KEY, UPLOAD_SUFFIX, DOWNLOAD_SUFFIX, BUCKET, PRIVATE_EXPIRES - -UFILE_HANDLER = filemanager.FileManager(PUBLIC_KEY, PRIVATE_KEY, UPLOAD_SUFFIX, DOWNLOAD_SUFFIX) - - -def get_private_url(key, bucket=BUCKET): - for i in range(3): - # 判断文件是否存在 - _, resp = UFILE_HANDLER.head_file(bucket, key) - if resp.status_code == -1: - logging.warning(f"uCloud连接失败!即将重试...") - sleep(3) - continue - if resp.status_code != 200: - logging.warning(f"uCloud中未找到({key})! status: {resp.status_code} error: {resp.error}") - return None - - # 获取公有空间下载url - # url = get_ufile_handler.public_download_url(bucket, key) - - # 获取私有空间下载url, expires为下载链接有效期,单位为秒 - url = UFILE_HANDLER.private_download_url(bucket, key, expires=PRIVATE_EXPIRES) - return url - - -def copy_file(source_bucket, source_key, target_bucket, target_key): - for i in range(3): - # 拷贝文件 - ret, resp = UFILE_HANDLER.copy(target_bucket, target_key, source_bucket, source_key) - if resp.status_code == -1: - logging.warning(f"uCloud连接失败!即将重试...") - sleep(3) - continue - if resp.status_code != 200: - logging.warning( - f"将({source_key})从({source_bucket})拷贝到({target_bucket})失败! status: {resp.status_code} error: {resp.error}") - return False - return True - - -def upload_file(key, file_path, bucket=BUCKET): - for i in range(3): - # 普通上传文件至空间 - ret, resp = UFILE_HANDLER.putfile(bucket, key, file_path, header=None) - if resp.status_code == -1: - logging.warning(f"uCloud连接失败!即将重试...") - sleep(3) - continue - if resp.status_code != 200: - logging.warning(f"上传({key})失败! status: {resp.status_code} error: {resp.error}") - return False - return True diff --git a/ucloud/ufile.py b/ucloud/ufile.py new file mode 100644 index 0000000..95494dc --- /dev/null +++ b/ucloud/ufile.py @@ -0,0 +1,61 @@ +# https://github.com/ucloud/ufile-sdk-python +import logging + +from tenacity import retry, stop_after_attempt, wait_random, retry_if_exception_type +from ufile import filemanager + +from ucloud import PUBLIC_KEY, PRIVATE_KEY, UPLOAD_SUFFIX, DOWNLOAD_SUFFIX, BUCKET, PRIVATE_EXPIRES, TRY_TIMES, \ + MIN_WAIT_TIME, MAX_WAIT_TIME + +UFILE_HANDLER = filemanager.FileManager(PUBLIC_KEY, PRIVATE_KEY, UPLOAD_SUFFIX, DOWNLOAD_SUFFIX) +UCLOUD_LOGGER = logging.getLogger('ucloud') + + +@retry(stop=stop_after_attempt(TRY_TIMES), wait=wait_random(MIN_WAIT_TIME, MAX_WAIT_TIME), + retry=retry_if_exception_type(ConnectionError), reraise=True) +def get_private_url(key, bucket=BUCKET): + # 判断文件是否存在 + _, resp = UFILE_HANDLER.head_file(bucket, key) + if resp.status_code == -1: + UCLOUD_LOGGER.warning(f"查询{key}时uCloud连接失败!") + raise ConnectionError("uCloud连接失败") + if resp.status_code != 200: + UCLOUD_LOGGER.warning(f"({bucket})中未找到({key})! status: {resp.status_code} error: {resp.error}") + return None + + # 获取公有空间下载url + # url = get_ufile_handler.public_download_url(bucket, key) + + # 获取私有空间下载url, expires为下载链接有效期,单位为秒 + url = UFILE_HANDLER.private_download_url(bucket, key, expires=PRIVATE_EXPIRES) + return url + + +@retry(stop=stop_after_attempt(TRY_TIMES), wait=wait_random(MIN_WAIT_TIME, MAX_WAIT_TIME), + retry=retry_if_exception_type(ConnectionError), reraise=True) +def copy_file(source_bucket, source_key, target_bucket, target_key): + # 复制文件 + _, resp = UFILE_HANDLER.copy(target_bucket, target_key, source_bucket, source_key) + if resp.status_code == -1: + UCLOUD_LOGGER.warning(f"复制{source_key}时uCloud连接失败!") + raise ConnectionError("uCloud连接失败") + if resp.status_code != 200: + UCLOUD_LOGGER.warning( + f"将({source_key})从({source_bucket})拷贝到({target_bucket})失败! status: {resp.status_code} error: {resp.error}" + ) + return False + return True + + +@retry(stop=stop_after_attempt(TRY_TIMES), wait=wait_random(MIN_WAIT_TIME, MAX_WAIT_TIME), + retry=retry_if_exception_type(ConnectionError), reraise=True) +def upload_file(key, file_path, bucket=BUCKET): + # 普通上传文件至云空间 + _, resp = UFILE_HANDLER.putfile(bucket, key, file_path, header=None) + if resp.status_code == -1: + UCLOUD_LOGGER.warning(f"上传{key}时uCloud连接失败!即将重试...") + raise ConnectionError("uCloud连接失败") + if resp.status_code != 200: + UCLOUD_LOGGER.warning(f"上传({key})失败! status: {resp.status_code} error: {resp.error}") + return False + return True diff --git a/visual_model_test/visual_model_test.py b/visual_model_test/visual_model_test.py index 501a9f0..5dad92f 100644 --- a/visual_model_test/visual_model_test.py +++ b/visual_model_test/visual_model_test.py @@ -10,7 +10,7 @@ from paddlenlp import Taskflow from paddlenlp.utils.doc_parser import DocParser from paddleocr import PaddleOCR -from ucloud import ucloud +from ucloud import ufile from util import image_util, util @@ -118,24 +118,24 @@ def main(model_type, pic_name=None): if model_type == "ocr": task_path = None - test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240428000832_1_093844_2.jpg" + test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240428000832_1_093844_2.jpg" schema = None elif model_type == "settlement": task_path = "../model/settlement_list_model" - test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240511000638_1_094306_1.jpg" + test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240511000638_1_094306_1.jpg" schema = ["患者姓名", "入院日期", "出院日期", "费用总额", "个人现金支付", "个人账户支付", "自费金额", "医保类型", "住院号", "医保结算单号码", "大写总额"] elif model_type == "discharge": task_path = "../model/discharge_record_model" - test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240401000003_3_001938_2.jpg" + test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240401000003_3_001938_2.jpg" schema = ["医院", "科室", "患者姓名", "入院日期", "出院日期", "主治医生", "住院号", "年龄"] elif model_type == "cost": task_path = "../model/cost_list_model" - test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg" + test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg" schema = ["患者姓名", "入院日期", "出院日期", "费用总额"] elif model_type == "cost_detail": task_path = "../model/cost_list_detail_model" - test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg" + test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg" schema = {"名称": ["类别", "规格", "单价", "数量", "金额"]} else: print("请输入正确的类型!")