优化ucloud的日志及重试机制

2024-07-22 17:31:32 +08:00
parent c06796d0a3
commit ee86bb4e74
10 changed files with 124 additions and 86 deletions
--- a/check_ie_result/check_ie_result.py
+++ b/check_ie_result/check_ie_result.py
@@ -1,7 +1,6 @@
 import datetime
 import json
 import os
 import sys
 from decimal import Decimal
 from io import BytesIO
 from itertools import groupby
@@ -9,10 +8,9 @@ from itertools import groupby
 import requests
 from PIL import ImageDraw, Image, ImageFont
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from db import MysqlSession
 from db.mysql import ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxOcr, ZxPhrec
-from ucloud import ucloud
+from ucloud import ufile
 def check_ie_result(pk_phhd):
@@ -78,7 +76,7 @@ def check_ie_result(pk_phhd):
        ZxPhrec.pk_phhd == pk_phhd).all()
    for phrec in phrecs:
        img_name = phrec.cfjaddress
-        img_path = ucloud.get_private_url(img_name)
+        img_path = ufile.get_private_url(img_name)
        response = requests.get(img_path)
        image = Image.open(BytesIO(response.content)).convert("RGB")
--- a/log/init.py
+++ b/log/init.py
@@ -1,3 +1,16 @@
 import os
 import socket
 # 获取主机名，方便区分容器
 HOSTNAME = socket.gethostname()
 # 检测日志文件的路径是否存在，不存在则创建
 LOG_PATHS = [
    f"log/{HOSTNAME}/ucloud",
 ]
 for path in LOG_PATHS:
    if not os.path.exists(path):
        os.makedirs(path)
 # 配置字典
 LOGGING_CONFIG = {
    'version': 1,  # 必需，指定配置格式的版本
@@ -20,22 +33,38 @@ LOGGING_CONFIG = {
            'stream': 'ext://sys.stdout',  # 输出到标准输出，默认编码跟随系统，一般为UTF-8
        },
        'file': {
-            'class': 'logging.handlers.RotatingFileHandler',  # 文件处理器，支持日志滚动
+            'class': 'logging.handlers.TimedRotatingFileHandler',  # 文件处理器，支持日志滚动
            'level': 'INFO',
            'formatter': 'standard',
-            'filename': 'log/fcb_photo_review.log',  # 日志文件路径
+            'filename': f'log/{HOSTNAME}/fcb_photo_review.log',  # 日志文件路径
-            'maxBytes': 1024 * 1024 * 5,  # 文件最大大小，这里为5MB
+            'when': 'midnight',
-            'backupCount': 5,  # 保留的备份文件数量
+            'interval': 1,
            'backupCount': 14,  # 保留的备份文件数量
            'encoding': 'utf-8',  # 显式指定文件编码为UTF-8以支持中文
        },
        'ucloud': {
            'class': 'logging.handlers.TimedRotatingFileHandler',  # 文件处理器，支持日志滚动
            'level': 'INFO',
            'formatter': 'standard',
            'filename': f'log/{HOSTNAME}/ucloud/fcb_photo_review_ucloud.log',  # 日志文件路径
            'when': 'midnight',
            'interval': 1,
            'backupCount': 14,  # 保留的备份文件数量
            'encoding': 'utf-8',  # 显式指定文件编码为UTF-8以支持中文
        },
    },
-    # loggers定义了日志记录器，这里是根记录器
+    # loggers定义了日志记录器
    'loggers': {
        '': {  # 根记录器
            'handlers': ['console', 'file'],  # 关联的处理器
            'level': 'DEBUG',  # 根记录器的级别
            'propagate': False,  # 是否向上级传播日志信息
        },
        'ucloud': {
            'handlers': ['console', 'ucloud'],
            'level': 'DEBUG',
            'propagate': False,
        }
    },
-}
+}
--- a/photo_mask/photo_mask.py
+++ b/photo_mask/photo_mask.py
@@ -9,7 +9,7 @@ from db import MysqlSession
 from db.mysql import ZxPhrec, ZxPhhd
 from photo_mask import OCR, PHHD_BATCH_SIZE, SLEEP_MINUTES, COPY_TRY_TIMES, UPLOAD_TRY_TIMES, NAME_KEYS, \
    ID_CARD_NUM_KEYS
-from ucloud import BUCKET, ucloud
+from ucloud import BUCKET, ufile
 from util import image_util, util
@@ -150,7 +150,7 @@ def photo_mask(pk_phhd, name, id_card_num):
    )).all()
    session.close()
    for phrec in phrecs:
-        img_url = ucloud.get_private_url(phrec.cfjaddress)
+        img_url = ufile.get_private_url(phrec.cfjaddress)
        if not img_url:
            continue
        # 是否有涂抹
@@ -178,7 +178,7 @@ def photo_mask(pk_phhd, name, id_card_num):
        # 如果涂抹了要备份以及更新
        if is_masked:
            for i in range(COPY_TRY_TIMES):
-                is_copy_success = ucloud.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress)
+                is_copy_success = ufile.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress)
                if is_copy_success:
                    break
@@ -186,7 +186,7 @@ def photo_mask(pk_phhd, name, id_card_num):
                cv2.imwrite(temp_file.name, image)
            try:
                for i in range(UPLOAD_TRY_TIMES):
-                    is_upload_success = ucloud.upload_file(phrec.cfjaddress, temp_file.name)
+                    is_upload_success = ufile.upload_file(phrec.cfjaddress, temp_file.name)
                    if is_upload_success:
                        break
            except Exception as e:
--- a/photo_review/photo_review.py
+++ b/photo_review/photo_review.py
@@ -15,7 +15,7 @@ from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_E
    PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \
    ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \
    UPPERCASE_MEDICAL_EXPENSES
-from ucloud import ucloud
+from ucloud import ufile
 from util import image_util, util
 from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \
    handle_insurance_type, handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money
@@ -73,7 +73,7 @@ def information_extraction(ie, phrecs):
    # 同一批图的标识
    identity = int(time.time())
    for phrec in phrecs:
-        img_path = ucloud.get_private_url(phrec.cfjaddress)
+        img_path = ufile.get_private_url(phrec.cfjaddress)
        if not img_path:
            continue
        split_results = image_util.split(img_path)
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,4 +9,5 @@ pymysql==1.1.0
 requests==2.22.0
 sqlacodegen==2.3.0.post1
 sqlalchemy==1.4.52
 tenacity==8.5.0
 ufile==3.2.9
--- a/test_photo_mask_optimization.py
+++ b/test_photo_mask_optimization.py
@@ -3,7 +3,7 @@ import cv2
 from db import MysqlSession
 from db.mysql import ZxIeOcrerror
 from photo_mask.photo_mask import handle_image_for_mask, get_mask_layout
-from ucloud import ucloud
+from ucloud import ufile
 from util import image_util
 if __name__ == '__main__':
@@ -13,13 +13,13 @@ if __name__ == '__main__':
    session.close()
    for ocr_error in ocr_errors:
-        final_img_url = ucloud.get_private_url(ocr_error.cfjaddress, "drg100")
+        final_img_url = ufile.get_private_url(ocr_error.cfjaddress, "drg100")
        final_image = image_util.read(final_img_url)
        cv2.imwrite(f"./mask_optimization_result/answer/{ocr_error.cfjaddress}.jpg", final_image)
-        img_url = ucloud.get_private_url(ocr_error.cfjaddress, "drg2015")
+        img_url = ufile.get_private_url(ocr_error.cfjaddress, "drg2015")
        if not img_url:
            # 没有自动涂抹的图片
-            img_url = ucloud.get_private_url(ocr_error.cfjaddress, "drg103")
+            img_url = ufile.get_private_url(ocr_error.cfjaddress, "drg103")
        name = ocr_error.cXm
        id_card_num = ocr_error.cSfzh
--- a/ucloud/init.py
+++ b/ucloud/init.py
@@ -1,6 +1,6 @@
 # 公钥
 from ufile import config
 # 公钥
 PUBLIC_KEY = "4Z7QYI7qml36QRjcCjKrls7aHl1R6H6uq"
 # 私钥
 PRIVATE_KEY = "FIdW1Kev1Ge3K7GHXzSLyGG1wTnaG6LE9BxmIVubcCaG"
@@ -13,5 +13,12 @@ DOWNLOAD_SUFFIX = ".cn-sh2.ufileos.com"
 # 私空间文件地址过期时间(秒)
 PRIVATE_EXPIRES = 3600
-# 设置默认请求超时时间
+# 设置默认请求超时时间(秒)
 config.set_default(connection_timeout=60)
 # 尝试次数
 TRY_TIMES = 5
 # 最小等待时间(秒)
 MIN_WAIT_TIME = 1
 # 最大等待时间(秒)
 MAX_WAIT_TIME = 3
--- a/ucloud/ucloud.py
+++ b/ucloud/ucloud.py
@@ -1,58 +0,0 @@
 # https://github.com/ucloud/ufile-sdk-python
 import logging
 from time import sleep
 from ufile import filemanager
 from ucloud import PUBLIC_KEY, PRIVATE_KEY, UPLOAD_SUFFIX, DOWNLOAD_SUFFIX, BUCKET, PRIVATE_EXPIRES
 UFILE_HANDLER = filemanager.FileManager(PUBLIC_KEY, PRIVATE_KEY, UPLOAD_SUFFIX, DOWNLOAD_SUFFIX)
 def get_private_url(key, bucket=BUCKET):
    for i in range(3):
        # 判断文件是否存在
        _, resp = UFILE_HANDLER.head_file(bucket, key)
        if resp.status_code == -1:
            logging.warning(f"uCloud连接失败!即将重试...")
            sleep(3)
            continue
        if resp.status_code != 200:
            logging.warning(f"uCloud中未找到({key})! status: {resp.status_code} error: {resp.error}")
            return None
        # 获取公有空间下载url
        # url = get_ufile_handler.public_download_url(bucket, key)
        # 获取私有空间下载url, expires为下载链接有效期，单位为秒
        url = UFILE_HANDLER.private_download_url(bucket, key, expires=PRIVATE_EXPIRES)
        return url
 def copy_file(source_bucket, source_key, target_bucket, target_key):
    for i in range(3):
        # 拷贝文件
        ret, resp = UFILE_HANDLER.copy(target_bucket, target_key, source_bucket, source_key)
        if resp.status_code == -1:
            logging.warning(f"uCloud连接失败!即将重试...")
            sleep(3)
            continue
        if resp.status_code != 200:
            logging.warning(
                f"将({source_key})从({source_bucket})拷贝到({target_bucket})失败! status: {resp.status_code} error: {resp.error}")
            return False
        return True
 def upload_file(key, file_path, bucket=BUCKET):
    for i in range(3):
        # 普通上传文件至空间
        ret, resp = UFILE_HANDLER.putfile(bucket, key, file_path, header=None)
        if resp.status_code == -1:
            logging.warning(f"uCloud连接失败!即将重试...")
            sleep(3)
            continue
        if resp.status_code != 200:
            logging.warning(f"上传({key})失败! status: {resp.status_code} error: {resp.error}")
            return False
        return True
--- a/ucloud/ufile.py
+++ b/ucloud/ufile.py
@@ -0,0 +1,61 @@
 # https://github.com/ucloud/ufile-sdk-python
 import logging
 from tenacity import retry, stop_after_attempt, wait_random, retry_if_exception_type
 from ufile import filemanager
 from ucloud import PUBLIC_KEY, PRIVATE_KEY, UPLOAD_SUFFIX, DOWNLOAD_SUFFIX, BUCKET, PRIVATE_EXPIRES, TRY_TIMES, \
    MIN_WAIT_TIME, MAX_WAIT_TIME
 UFILE_HANDLER = filemanager.FileManager(PUBLIC_KEY, PRIVATE_KEY, UPLOAD_SUFFIX, DOWNLOAD_SUFFIX)
 UCLOUD_LOGGER = logging.getLogger('ucloud')
@retry(stop=stop_after_attempt(TRY_TIMES), wait=wait_random(MIN_WAIT_TIME, MAX_WAIT_TIME),
       retry=retry_if_exception_type(ConnectionError), reraise=True)
 def get_private_url(key, bucket=BUCKET):
    # 判断文件是否存在
    _, resp = UFILE_HANDLER.head_file(bucket, key)
    if resp.status_code == -1:
        UCLOUD_LOGGER.warning(f"查询{key}时uCloud连接失败!")
        raise ConnectionError("uCloud连接失败")
    if resp.status_code != 200:
        UCLOUD_LOGGER.warning(f"({bucket})中未找到({key})! status: {resp.status_code} error: {resp.error}")
        return None
    # 获取公有空间下载url
    # url = get_ufile_handler.public_download_url(bucket, key)
    # 获取私有空间下载url, expires为下载链接有效期，单位为秒
    url = UFILE_HANDLER.private_download_url(bucket, key, expires=PRIVATE_EXPIRES)
    return url
@retry(stop=stop_after_attempt(TRY_TIMES), wait=wait_random(MIN_WAIT_TIME, MAX_WAIT_TIME),
       retry=retry_if_exception_type(ConnectionError), reraise=True)
 def copy_file(source_bucket, source_key, target_bucket, target_key):
    # 复制文件
    _, resp = UFILE_HANDLER.copy(target_bucket, target_key, source_bucket, source_key)
    if resp.status_code == -1:
        UCLOUD_LOGGER.warning(f"复制{source_key}时uCloud连接失败!")
        raise ConnectionError("uCloud连接失败")
    if resp.status_code != 200:
        UCLOUD_LOGGER.warning(
            f"将({source_key})从({source_bucket})拷贝到({target_bucket})失败! status: {resp.status_code} error: {resp.error}"
        )
        return False
    return True
@retry(stop=stop_after_attempt(TRY_TIMES), wait=wait_random(MIN_WAIT_TIME, MAX_WAIT_TIME),
       retry=retry_if_exception_type(ConnectionError), reraise=True)
 def upload_file(key, file_path, bucket=BUCKET):
    # 普通上传文件至云空间
    _, resp = UFILE_HANDLER.putfile(bucket, key, file_path, header=None)
    if resp.status_code == -1:
        UCLOUD_LOGGER.warning(f"上传{key}时uCloud连接失败!即将重试...")
        raise ConnectionError("uCloud连接失败")
    if resp.status_code != 200:
        UCLOUD_LOGGER.warning(f"上传({key})失败! status: {resp.status_code} error: {resp.error}")
        return False
    return True
--- a/visual_model_test/visual_model_test.py
+++ b/visual_model_test/visual_model_test.py
@@ -10,7 +10,7 @@ from paddlenlp import Taskflow
 from paddlenlp.utils.doc_parser import DocParser
 from paddleocr import PaddleOCR
-from ucloud import ucloud
+from ucloud import ufile
 from util import image_util, util
@@ -118,24 +118,24 @@ def main(model_type, pic_name=None):
    if model_type == "ocr":
        task_path = None
-        test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240428000832_1_093844_2.jpg"
+        test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240428000832_1_093844_2.jpg"
        schema = None
    elif model_type == "settlement":
        task_path = "../model/settlement_list_model"
-        test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240511000638_1_094306_1.jpg"
+        test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240511000638_1_094306_1.jpg"
        schema = ["患者姓名", "入院日期", "出院日期", "费用总额", "个人现金支付", "个人账户支付", "自费金额",
                  "医保类型", "住院号", "医保结算单号码", "大写总额"]
    elif model_type == "discharge":
        task_path = "../model/discharge_record_model"
-        test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240401000003_3_001938_2.jpg"
+        test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240401000003_3_001938_2.jpg"
        schema = ["医院", "科室", "患者姓名", "入院日期", "出院日期", "主治医生", "住院号", "年龄"]
    elif model_type == "cost":
        task_path = "../model/cost_list_model"
-        test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg"
+        test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg"
        schema = ["患者姓名", "入院日期", "出院日期", "费用总额"]
    elif model_type == "cost_detail":
        task_path = "../model/cost_list_detail_model"
-        test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg"
+        test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg"
        schema = {"名称": ["类别", "规格", "单价", "数量", "金额"]}
    else:
        print("请输入正确的类型！")