优化ucloud的日志及重试机制

2024-07-22 17:31:32 +08:00
parent c06796d0a3
commit ee86bb4e74
10 changed files with 124 additions and 86 deletions
--- a/check_ie_result/check_ie_result.py
+++ b/check_ie_result/check_ie_result.py
@@ -1,7 +1,6 @@
 import datetime
 import json
 import os
-import sys
 from decimal import Decimal
 from io import BytesIO
 from itertools import groupby
@@ -9,10 +8,9 @@ from itertools import groupby
 import requests
 from PIL import ImageDraw, Image, ImageFont

-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from db import MysqlSession
 from db.mysql import ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxOcr, ZxPhrec
-from ucloud import ucloud
+from ucloud import ufile


 def check_ie_result(pk_phhd):
@@ -78,7 +76,7 @@ def check_ie_result(pk_phhd):
        ZxPhrec.pk_phhd == pk_phhd).all()
    for phrec in phrecs:
        img_name = phrec.cfjaddress
-        img_path = ucloud.get_private_url(img_name)
+        img_path = ufile.get_private_url(img_name)

        response = requests.get(img_path)
        image = Image.open(BytesIO(response.content)).convert("RGB")
--- a/log/init.py
+++ b/log/init.py
@@ -1,3 +1,16 @@
+import os
+import socket
+
+# 获取主机名，方便区分容器
+HOSTNAME = socket.gethostname()
+# 检测日志文件的路径是否存在，不存在则创建
+LOG_PATHS = [
+    f"log/{HOSTNAME}/ucloud",
+]
+for path in LOG_PATHS:
+    if not os.path.exists(path):
+        os.makedirs(path)
+
 # 配置字典
 LOGGING_CONFIG = {
    'version': 1,  # 必需，指定配置格式的版本
@@ -20,22 +33,38 @@ LOGGING_CONFIG = {
            'stream': 'ext://sys.stdout',  # 输出到标准输出，默认编码跟随系统，一般为UTF-8
        },
        'file': {
-            'class': 'logging.handlers.RotatingFileHandler',  # 文件处理器，支持日志滚动
+            'class': 'logging.handlers.TimedRotatingFileHandler',  # 文件处理器，支持日志滚动
            'level': 'INFO',
            'formatter': 'standard',
-            'filename': 'log/fcb_photo_review.log',  # 日志文件路径
-            'maxBytes': 1024 * 1024 * 5,  # 文件最大大小，这里为5MB
-            'backupCount': 5,  # 保留的备份文件数量
+            'filename': f'log/{HOSTNAME}/fcb_photo_review.log',  # 日志文件路径
+            'when': 'midnight',
+            'interval': 1,
+            'backupCount': 14,  # 保留的备份文件数量
+            'encoding': 'utf-8',  # 显式指定文件编码为UTF-8以支持中文
+        },
+        'ucloud': {
+            'class': 'logging.handlers.TimedRotatingFileHandler',  # 文件处理器，支持日志滚动
+            'level': 'INFO',
+            'formatter': 'standard',
+            'filename': f'log/{HOSTNAME}/ucloud/fcb_photo_review_ucloud.log',  # 日志文件路径
+            'when': 'midnight',
+            'interval': 1,
+            'backupCount': 14,  # 保留的备份文件数量
            'encoding': 'utf-8',  # 显式指定文件编码为UTF-8以支持中文
        },
    },

-    # loggers定义了日志记录器，这里是根记录器
+    # loggers定义了日志记录器
    'loggers': {
        '': {  # 根记录器
            'handlers': ['console', 'file'],  # 关联的处理器
            'level': 'DEBUG',  # 根记录器的级别
            'propagate': False,  # 是否向上级传播日志信息
        },
+        'ucloud': {
+            'handlers': ['console', 'ucloud'],
+            'level': 'DEBUG',
+            'propagate': False,
+        }
    },
 }
--- a/photo_mask/photo_mask.py
+++ b/photo_mask/photo_mask.py
@@ -9,7 +9,7 @@ from db import MysqlSession
 from db.mysql import ZxPhrec, ZxPhhd
 from photo_mask import OCR, PHHD_BATCH_SIZE, SLEEP_MINUTES, COPY_TRY_TIMES, UPLOAD_TRY_TIMES, NAME_KEYS, \
    ID_CARD_NUM_KEYS
-from ucloud import BUCKET, ucloud
+from ucloud import BUCKET, ufile
 from util import image_util, util


@@ -150,7 +150,7 @@ def photo_mask(pk_phhd, name, id_card_num):
    )).all()
    session.close()
    for phrec in phrecs:
-        img_url = ucloud.get_private_url(phrec.cfjaddress)
+        img_url = ufile.get_private_url(phrec.cfjaddress)
        if not img_url:
            continue
        # 是否有涂抹
@@ -178,7 +178,7 @@ def photo_mask(pk_phhd, name, id_card_num):
        # 如果涂抹了要备份以及更新
        if is_masked:
            for i in range(COPY_TRY_TIMES):
-                is_copy_success = ucloud.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress)
+                is_copy_success = ufile.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress)
                if is_copy_success:
                    break

@@ -186,7 +186,7 @@ def photo_mask(pk_phhd, name, id_card_num):
                cv2.imwrite(temp_file.name, image)
            try:
                for i in range(UPLOAD_TRY_TIMES):
-                    is_upload_success = ucloud.upload_file(phrec.cfjaddress, temp_file.name)
+                    is_upload_success = ufile.upload_file(phrec.cfjaddress, temp_file.name)
                    if is_upload_success:
                        break
            except Exception as e:
--- a/photo_review/photo_review.py
+++ b/photo_review/photo_review.py
@@ -15,7 +15,7 @@ from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_E
    PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \
    ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \
    UPPERCASE_MEDICAL_EXPENSES
-from ucloud import ucloud
+from ucloud import ufile
 from util import image_util, util
 from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \
    handle_insurance_type, handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money
@@ -73,7 +73,7 @@ def information_extraction(ie, phrecs):
    # 同一批图的标识
    identity = int(time.time())
    for phrec in phrecs:
-        img_path = ucloud.get_private_url(phrec.cfjaddress)
+        img_path = ufile.get_private_url(phrec.cfjaddress)
        if not img_path:
            continue
        split_results = image_util.split(img_path)
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,4 +9,5 @@ pymysql==1.1.0
 requests==2.22.0
 sqlacodegen==2.3.0.post1
 sqlalchemy==1.4.52
+tenacity==8.5.0
 ufile==3.2.9
--- a/test_photo_mask_optimization.py
+++ b/test_photo_mask_optimization.py
@@ -3,7 +3,7 @@ import cv2
 from db import MysqlSession
 from db.mysql import ZxIeOcrerror
 from photo_mask.photo_mask import handle_image_for_mask, get_mask_layout
-from ucloud import ucloud
+from ucloud import ufile
 from util import image_util

 if __name__ == '__main__':
@@ -13,13 +13,13 @@ if __name__ == '__main__':
    session.close()

    for ocr_error in ocr_errors:
-        final_img_url = ucloud.get_private_url(ocr_error.cfjaddress, "drg100")
+        final_img_url = ufile.get_private_url(ocr_error.cfjaddress, "drg100")
        final_image = image_util.read(final_img_url)
        cv2.imwrite(f"./mask_optimization_result/answer/{ocr_error.cfjaddress}.jpg", final_image)
-        img_url = ucloud.get_private_url(ocr_error.cfjaddress, "drg2015")
+        img_url = ufile.get_private_url(ocr_error.cfjaddress, "drg2015")
        if not img_url:
            # 没有自动涂抹的图片
-            img_url = ucloud.get_private_url(ocr_error.cfjaddress, "drg103")
+            img_url = ufile.get_private_url(ocr_error.cfjaddress, "drg103")
        name = ocr_error.cXm
        id_card_num = ocr_error.cSfzh

--- a/ucloud/init.py
+++ b/ucloud/init.py
@@ -1,6 +1,6 @@
-# 公钥
 from ufile import config

+# 公钥
 PUBLIC_KEY = "4Z7QYI7qml36QRjcCjKrls7aHl1R6H6uq"
 # 私钥
 PRIVATE_KEY = "FIdW1Kev1Ge3K7GHXzSLyGG1wTnaG6LE9BxmIVubcCaG"
@@ -13,5 +13,12 @@ DOWNLOAD_SUFFIX = ".cn-sh2.ufileos.com"
 # 私空间文件地址过期时间(秒)
 PRIVATE_EXPIRES = 3600

-# 设置默认请求超时时间
+# 设置默认请求超时时间(秒)
 config.set_default(connection_timeout=60)
+
+# 尝试次数
+TRY_TIMES = 5
+# 最小等待时间(秒)
+MIN_WAIT_TIME = 1
+# 最大等待时间(秒)
+MAX_WAIT_TIME = 3
--- a/ucloud/ucloud.py
+++ b/ucloud/ucloud.py
@@ -1,58 +0,0 @@
-# https://github.com/ucloud/ufile-sdk-python
-import logging
-from time import sleep
-
-from ufile import filemanager
-
-from ucloud import PUBLIC_KEY, PRIVATE_KEY, UPLOAD_SUFFIX, DOWNLOAD_SUFFIX, BUCKET, PRIVATE_EXPIRES
-
-UFILE_HANDLER = filemanager.FileManager(PUBLIC_KEY, PRIVATE_KEY, UPLOAD_SUFFIX, DOWNLOAD_SUFFIX)
-
-
-def get_private_url(key, bucket=BUCKET):
-    for i in range(3):
-        # 判断文件是否存在
-        _, resp = UFILE_HANDLER.head_file(bucket, key)
-        if resp.status_code == -1:
-            logging.warning(f"uCloud连接失败!即将重试...")
-            sleep(3)
-            continue
-        if resp.status_code != 200:
-            logging.warning(f"uCloud中未找到({key})! status: {resp.status_code} error: {resp.error}")
-            return None
-
-        # 获取公有空间下载url
-        # url = get_ufile_handler.public_download_url(bucket, key)
-
-        # 获取私有空间下载url, expires为下载链接有效期，单位为秒
-        url = UFILE_HANDLER.private_download_url(bucket, key, expires=PRIVATE_EXPIRES)
-        return url
-
-
-def copy_file(source_bucket, source_key, target_bucket, target_key):
-    for i in range(3):
-        # 拷贝文件
-        ret, resp = UFILE_HANDLER.copy(target_bucket, target_key, source_bucket, source_key)
-        if resp.status_code == -1:
-            logging.warning(f"uCloud连接失败!即将重试...")
-            sleep(3)
-            continue
-        if resp.status_code != 200:
-            logging.warning(
-                f"将({source_key})从({source_bucket})拷贝到({target_bucket})失败! status: {resp.status_code} error: {resp.error}")
-            return False
-        return True
-
-
-def upload_file(key, file_path, bucket=BUCKET):
-    for i in range(3):
-        # 普通上传文件至空间
-        ret, resp = UFILE_HANDLER.putfile(bucket, key, file_path, header=None)
-        if resp.status_code == -1:
-            logging.warning(f"uCloud连接失败!即将重试...")
-            sleep(3)
-            continue
-        if resp.status_code != 200:
-            logging.warning(f"上传({key})失败! status: {resp.status_code} error: {resp.error}")
-            return False
-        return True
--- a/ucloud/ufile.py
+++ b/ucloud/ufile.py
@@ -0,0 +1,61 @@
+# https://github.com/ucloud/ufile-sdk-python
+import logging
+
+from tenacity import retry, stop_after_attempt, wait_random, retry_if_exception_type
+from ufile import filemanager
+
+from ucloud import PUBLIC_KEY, PRIVATE_KEY, UPLOAD_SUFFIX, DOWNLOAD_SUFFIX, BUCKET, PRIVATE_EXPIRES, TRY_TIMES, \
+    MIN_WAIT_TIME, MAX_WAIT_TIME
+
+UFILE_HANDLER = filemanager.FileManager(PUBLIC_KEY, PRIVATE_KEY, UPLOAD_SUFFIX, DOWNLOAD_SUFFIX)
+UCLOUD_LOGGER = logging.getLogger('ucloud')
+
+
+@retry(stop=stop_after_attempt(TRY_TIMES), wait=wait_random(MIN_WAIT_TIME, MAX_WAIT_TIME),
+       retry=retry_if_exception_type(ConnectionError), reraise=True)
+def get_private_url(key, bucket=BUCKET):
+    # 判断文件是否存在
+    _, resp = UFILE_HANDLER.head_file(bucket, key)
+    if resp.status_code == -1:
+        UCLOUD_LOGGER.warning(f"查询{key}时uCloud连接失败!")
+        raise ConnectionError("uCloud连接失败")
+    if resp.status_code != 200:
+        UCLOUD_LOGGER.warning(f"({bucket})中未找到({key})! status: {resp.status_code} error: {resp.error}")
+        return None
+
+    # 获取公有空间下载url
+    # url = get_ufile_handler.public_download_url(bucket, key)
+
+    # 获取私有空间下载url, expires为下载链接有效期，单位为秒
+    url = UFILE_HANDLER.private_download_url(bucket, key, expires=PRIVATE_EXPIRES)
+    return url
+
+
+@retry(stop=stop_after_attempt(TRY_TIMES), wait=wait_random(MIN_WAIT_TIME, MAX_WAIT_TIME),
+       retry=retry_if_exception_type(ConnectionError), reraise=True)
+def copy_file(source_bucket, source_key, target_bucket, target_key):
+    # 复制文件
+    _, resp = UFILE_HANDLER.copy(target_bucket, target_key, source_bucket, source_key)
+    if resp.status_code == -1:
+        UCLOUD_LOGGER.warning(f"复制{source_key}时uCloud连接失败!")
+        raise ConnectionError("uCloud连接失败")
+    if resp.status_code != 200:
+        UCLOUD_LOGGER.warning(
+            f"将({source_key})从({source_bucket})拷贝到({target_bucket})失败! status: {resp.status_code} error: {resp.error}"
+        )
+        return False
+    return True
+
+
+@retry(stop=stop_after_attempt(TRY_TIMES), wait=wait_random(MIN_WAIT_TIME, MAX_WAIT_TIME),
+       retry=retry_if_exception_type(ConnectionError), reraise=True)
+def upload_file(key, file_path, bucket=BUCKET):
+    # 普通上传文件至云空间
+    _, resp = UFILE_HANDLER.putfile(bucket, key, file_path, header=None)
+    if resp.status_code == -1:
+        UCLOUD_LOGGER.warning(f"上传{key}时uCloud连接失败!即将重试...")
+        raise ConnectionError("uCloud连接失败")
+    if resp.status_code != 200:
+        UCLOUD_LOGGER.warning(f"上传({key})失败! status: {resp.status_code} error: {resp.error}")
+        return False
+    return True
--- a/visual_model_test/visual_model_test.py
+++ b/visual_model_test/visual_model_test.py
@@ -10,7 +10,7 @@ from paddlenlp import Taskflow
 from paddlenlp.utils.doc_parser import DocParser
 from paddleocr import PaddleOCR

-from ucloud import ucloud
+from ucloud import ufile
 from util import image_util, util


@@ -118,24 +118,24 @@ def main(model_type, pic_name=None):

    if model_type == "ocr":
        task_path = None
-        test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240428000832_1_093844_2.jpg"
+        test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240428000832_1_093844_2.jpg"
        schema = None
    elif model_type == "settlement":
        task_path = "../model/settlement_list_model"
-        test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240511000638_1_094306_1.jpg"
+        test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240511000638_1_094306_1.jpg"
        schema = ["患者姓名", "入院日期", "出院日期", "费用总额", "个人现金支付", "个人账户支付", "自费金额",
                  "医保类型", "住院号", "医保结算单号码", "大写总额"]
    elif model_type == "discharge":
        task_path = "../model/discharge_record_model"
-        test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240401000003_3_001938_2.jpg"
+        test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240401000003_3_001938_2.jpg"
        schema = ["医院", "科室", "患者姓名", "入院日期", "出院日期", "主治医生", "住院号", "年龄"]
    elif model_type == "cost":
        task_path = "../model/cost_list_model"
-        test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg"
+        test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg"
        schema = ["患者姓名", "入院日期", "出院日期", "费用总额"]
    elif model_type == "cost_detail":
        task_path = "../model/cost_list_detail_model"
-        test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg"
+        test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg"
        schema = {"名称": ["类别", "规格", "单价", "数量", "金额"]}
    else:
        print("请输入正确的类型！")