优化ucloud的日志及重试机制

This commit is contained in:
2024-07-22 17:31:32 +08:00
parent c06796d0a3
commit ee86bb4e74
10 changed files with 124 additions and 86 deletions

View File

@@ -1,7 +1,6 @@
import datetime
import json
import os
import sys
from decimal import Decimal
from io import BytesIO
from itertools import groupby
@@ -9,10 +8,9 @@ from itertools import groupby
import requests
from PIL import ImageDraw, Image, ImageFont
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from db import MysqlSession
from db.mysql import ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxOcr, ZxPhrec
from ucloud import ucloud
from ucloud import ufile
def check_ie_result(pk_phhd):
@@ -78,7 +76,7 @@ def check_ie_result(pk_phhd):
ZxPhrec.pk_phhd == pk_phhd).all()
for phrec in phrecs:
img_name = phrec.cfjaddress
img_path = ucloud.get_private_url(img_name)
img_path = ufile.get_private_url(img_name)
response = requests.get(img_path)
image = Image.open(BytesIO(response.content)).convert("RGB")

View File

@@ -1,3 +1,16 @@
import os
import socket
# 获取主机名,方便区分容器
HOSTNAME = socket.gethostname()
# 检测日志文件的路径是否存在,不存在则创建
LOG_PATHS = [
f"log/{HOSTNAME}/ucloud",
]
for path in LOG_PATHS:
if not os.path.exists(path):
os.makedirs(path)
# 配置字典
LOGGING_CONFIG = {
'version': 1, # 必需,指定配置格式的版本
@@ -20,22 +33,38 @@ LOGGING_CONFIG = {
'stream': 'ext://sys.stdout', # 输出到标准输出默认编码跟随系统一般为UTF-8
},
'file': {
'class': 'logging.handlers.RotatingFileHandler', # 文件处理器,支持日志滚动
'class': 'logging.handlers.TimedRotatingFileHandler', # 文件处理器,支持日志滚动
'level': 'INFO',
'formatter': 'standard',
'filename': 'log/fcb_photo_review.log', # 日志文件路径
'maxBytes': 1024 * 1024 * 5, # 文件最大大小这里为5MB
'backupCount': 5, # 保留的备份文件数量
'filename': f'log/{HOSTNAME}/fcb_photo_review.log', # 日志文件路径
'when': 'midnight',
'interval': 1,
'backupCount': 14, # 保留的备份文件数量
'encoding': 'utf-8', # 显式指定文件编码为UTF-8以支持中文
},
'ucloud': {
'class': 'logging.handlers.TimedRotatingFileHandler', # 文件处理器,支持日志滚动
'level': 'INFO',
'formatter': 'standard',
'filename': f'log/{HOSTNAME}/ucloud/fcb_photo_review_ucloud.log', # 日志文件路径
'when': 'midnight',
'interval': 1,
'backupCount': 14, # 保留的备份文件数量
'encoding': 'utf-8', # 显式指定文件编码为UTF-8以支持中文
},
},
# loggers定义了日志记录器,这里是根记录器
# loggers定义了日志记录器
'loggers': {
'': { # 根记录器
'handlers': ['console', 'file'], # 关联的处理器
'level': 'DEBUG', # 根记录器的级别
'propagate': False, # 是否向上级传播日志信息
},
'ucloud': {
'handlers': ['console', 'ucloud'],
'level': 'DEBUG',
'propagate': False,
}
},
}
}

View File

@@ -9,7 +9,7 @@ from db import MysqlSession
from db.mysql import ZxPhrec, ZxPhhd
from photo_mask import OCR, PHHD_BATCH_SIZE, SLEEP_MINUTES, COPY_TRY_TIMES, UPLOAD_TRY_TIMES, NAME_KEYS, \
ID_CARD_NUM_KEYS
from ucloud import BUCKET, ucloud
from ucloud import BUCKET, ufile
from util import image_util, util
@@ -150,7 +150,7 @@ def photo_mask(pk_phhd, name, id_card_num):
)).all()
session.close()
for phrec in phrecs:
img_url = ucloud.get_private_url(phrec.cfjaddress)
img_url = ufile.get_private_url(phrec.cfjaddress)
if not img_url:
continue
# 是否有涂抹
@@ -178,7 +178,7 @@ def photo_mask(pk_phhd, name, id_card_num):
# 如果涂抹了要备份以及更新
if is_masked:
for i in range(COPY_TRY_TIMES):
is_copy_success = ucloud.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress)
is_copy_success = ufile.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress)
if is_copy_success:
break
@@ -186,7 +186,7 @@ def photo_mask(pk_phhd, name, id_card_num):
cv2.imwrite(temp_file.name, image)
try:
for i in range(UPLOAD_TRY_TIMES):
is_upload_success = ucloud.upload_file(phrec.cfjaddress, temp_file.name)
is_upload_success = ufile.upload_file(phrec.cfjaddress, temp_file.name)
if is_upload_success:
break
except Exception as e:

View File

@@ -15,7 +15,7 @@ from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_E
PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \
ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \
UPPERCASE_MEDICAL_EXPENSES
from ucloud import ucloud
from ucloud import ufile
from util import image_util, util
from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \
handle_insurance_type, handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money
@@ -73,7 +73,7 @@ def information_extraction(ie, phrecs):
# 同一批图的标识
identity = int(time.time())
for phrec in phrecs:
img_path = ucloud.get_private_url(phrec.cfjaddress)
img_path = ufile.get_private_url(phrec.cfjaddress)
if not img_path:
continue
split_results = image_util.split(img_path)

View File

@@ -9,4 +9,5 @@ pymysql==1.1.0
requests==2.22.0
sqlacodegen==2.3.0.post1
sqlalchemy==1.4.52
tenacity==8.5.0
ufile==3.2.9

View File

@@ -3,7 +3,7 @@ import cv2
from db import MysqlSession
from db.mysql import ZxIeOcrerror
from photo_mask.photo_mask import handle_image_for_mask, get_mask_layout
from ucloud import ucloud
from ucloud import ufile
from util import image_util
if __name__ == '__main__':
@@ -13,13 +13,13 @@ if __name__ == '__main__':
session.close()
for ocr_error in ocr_errors:
final_img_url = ucloud.get_private_url(ocr_error.cfjaddress, "drg100")
final_img_url = ufile.get_private_url(ocr_error.cfjaddress, "drg100")
final_image = image_util.read(final_img_url)
cv2.imwrite(f"./mask_optimization_result/answer/{ocr_error.cfjaddress}.jpg", final_image)
img_url = ucloud.get_private_url(ocr_error.cfjaddress, "drg2015")
img_url = ufile.get_private_url(ocr_error.cfjaddress, "drg2015")
if not img_url:
# 没有自动涂抹的图片
img_url = ucloud.get_private_url(ocr_error.cfjaddress, "drg103")
img_url = ufile.get_private_url(ocr_error.cfjaddress, "drg103")
name = ocr_error.cXm
id_card_num = ocr_error.cSfzh

View File

@@ -1,6 +1,6 @@
# 公钥
from ufile import config
# 公钥
PUBLIC_KEY = "4Z7QYI7qml36QRjcCjKrls7aHl1R6H6uq"
# 私钥
PRIVATE_KEY = "FIdW1Kev1Ge3K7GHXzSLyGG1wTnaG6LE9BxmIVubcCaG"
@@ -13,5 +13,12 @@ DOWNLOAD_SUFFIX = ".cn-sh2.ufileos.com"
# 私空间文件地址过期时间(秒)
PRIVATE_EXPIRES = 3600
# 设置默认请求超时时间
# 设置默认请求超时时间(秒)
config.set_default(connection_timeout=60)
# 尝试次数
TRY_TIMES = 5
# 最小等待时间(秒)
MIN_WAIT_TIME = 1
# 最大等待时间(秒)
MAX_WAIT_TIME = 3

View File

@@ -1,58 +0,0 @@
# https://github.com/ucloud/ufile-sdk-python
import logging
from time import sleep
from ufile import filemanager
from ucloud import PUBLIC_KEY, PRIVATE_KEY, UPLOAD_SUFFIX, DOWNLOAD_SUFFIX, BUCKET, PRIVATE_EXPIRES
UFILE_HANDLER = filemanager.FileManager(PUBLIC_KEY, PRIVATE_KEY, UPLOAD_SUFFIX, DOWNLOAD_SUFFIX)
def get_private_url(key, bucket=BUCKET):
for i in range(3):
# 判断文件是否存在
_, resp = UFILE_HANDLER.head_file(bucket, key)
if resp.status_code == -1:
logging.warning(f"uCloud连接失败!即将重试...")
sleep(3)
continue
if resp.status_code != 200:
logging.warning(f"uCloud中未找到({key})! status: {resp.status_code} error: {resp.error}")
return None
# 获取公有空间下载url
# url = get_ufile_handler.public_download_url(bucket, key)
# 获取私有空间下载url, expires为下载链接有效期单位为秒
url = UFILE_HANDLER.private_download_url(bucket, key, expires=PRIVATE_EXPIRES)
return url
def copy_file(source_bucket, source_key, target_bucket, target_key):
for i in range(3):
# 拷贝文件
ret, resp = UFILE_HANDLER.copy(target_bucket, target_key, source_bucket, source_key)
if resp.status_code == -1:
logging.warning(f"uCloud连接失败!即将重试...")
sleep(3)
continue
if resp.status_code != 200:
logging.warning(
f"将({source_key})从({source_bucket})拷贝到({target_bucket})失败! status: {resp.status_code} error: {resp.error}")
return False
return True
def upload_file(key, file_path, bucket=BUCKET):
for i in range(3):
# 普通上传文件至空间
ret, resp = UFILE_HANDLER.putfile(bucket, key, file_path, header=None)
if resp.status_code == -1:
logging.warning(f"uCloud连接失败!即将重试...")
sleep(3)
continue
if resp.status_code != 200:
logging.warning(f"上传({key})失败! status: {resp.status_code} error: {resp.error}")
return False
return True

61
ucloud/ufile.py Normal file
View File

@@ -0,0 +1,61 @@
# https://github.com/ucloud/ufile-sdk-python
import logging
from tenacity import retry, stop_after_attempt, wait_random, retry_if_exception_type
from ufile import filemanager
from ucloud import PUBLIC_KEY, PRIVATE_KEY, UPLOAD_SUFFIX, DOWNLOAD_SUFFIX, BUCKET, PRIVATE_EXPIRES, TRY_TIMES, \
MIN_WAIT_TIME, MAX_WAIT_TIME
UFILE_HANDLER = filemanager.FileManager(PUBLIC_KEY, PRIVATE_KEY, UPLOAD_SUFFIX, DOWNLOAD_SUFFIX)
UCLOUD_LOGGER = logging.getLogger('ucloud')
@retry(stop=stop_after_attempt(TRY_TIMES), wait=wait_random(MIN_WAIT_TIME, MAX_WAIT_TIME),
retry=retry_if_exception_type(ConnectionError), reraise=True)
def get_private_url(key, bucket=BUCKET):
# 判断文件是否存在
_, resp = UFILE_HANDLER.head_file(bucket, key)
if resp.status_code == -1:
UCLOUD_LOGGER.warning(f"查询{key}时uCloud连接失败!")
raise ConnectionError("uCloud连接失败")
if resp.status_code != 200:
UCLOUD_LOGGER.warning(f"({bucket})中未找到({key})! status: {resp.status_code} error: {resp.error}")
return None
# 获取公有空间下载url
# url = get_ufile_handler.public_download_url(bucket, key)
# 获取私有空间下载url, expires为下载链接有效期单位为秒
url = UFILE_HANDLER.private_download_url(bucket, key, expires=PRIVATE_EXPIRES)
return url
@retry(stop=stop_after_attempt(TRY_TIMES), wait=wait_random(MIN_WAIT_TIME, MAX_WAIT_TIME),
retry=retry_if_exception_type(ConnectionError), reraise=True)
def copy_file(source_bucket, source_key, target_bucket, target_key):
# 复制文件
_, resp = UFILE_HANDLER.copy(target_bucket, target_key, source_bucket, source_key)
if resp.status_code == -1:
UCLOUD_LOGGER.warning(f"复制{source_key}时uCloud连接失败!")
raise ConnectionError("uCloud连接失败")
if resp.status_code != 200:
UCLOUD_LOGGER.warning(
f"将({source_key})从({source_bucket})拷贝到({target_bucket})失败! status: {resp.status_code} error: {resp.error}"
)
return False
return True
@retry(stop=stop_after_attempt(TRY_TIMES), wait=wait_random(MIN_WAIT_TIME, MAX_WAIT_TIME),
retry=retry_if_exception_type(ConnectionError), reraise=True)
def upload_file(key, file_path, bucket=BUCKET):
# 普通上传文件至云空间
_, resp = UFILE_HANDLER.putfile(bucket, key, file_path, header=None)
if resp.status_code == -1:
UCLOUD_LOGGER.warning(f"上传{key}时uCloud连接失败!即将重试...")
raise ConnectionError("uCloud连接失败")
if resp.status_code != 200:
UCLOUD_LOGGER.warning(f"上传({key})失败! status: {resp.status_code} error: {resp.error}")
return False
return True

View File

@@ -10,7 +10,7 @@ from paddlenlp import Taskflow
from paddlenlp.utils.doc_parser import DocParser
from paddleocr import PaddleOCR
from ucloud import ucloud
from ucloud import ufile
from util import image_util, util
@@ -118,24 +118,24 @@ def main(model_type, pic_name=None):
if model_type == "ocr":
task_path = None
test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240428000832_1_093844_2.jpg"
test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240428000832_1_093844_2.jpg"
schema = None
elif model_type == "settlement":
task_path = "../model/settlement_list_model"
test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240511000638_1_094306_1.jpg"
test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240511000638_1_094306_1.jpg"
schema = ["患者姓名", "入院日期", "出院日期", "费用总额", "个人现金支付", "个人账户支付", "自费金额",
"医保类型", "住院号", "医保结算单号码", "大写总额"]
elif model_type == "discharge":
task_path = "../model/discharge_record_model"
test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240401000003_3_001938_2.jpg"
test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240401000003_3_001938_2.jpg"
schema = ["医院", "科室", "患者姓名", "入院日期", "出院日期", "主治医生", "住院号", "年龄"]
elif model_type == "cost":
task_path = "../model/cost_list_model"
test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg"
test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg"
schema = ["患者姓名", "入院日期", "出院日期", "费用总额"]
elif model_type == "cost_detail":
task_path = "../model/cost_list_detail_model"
test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg"
test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg"
schema = {"名称": ["类别", "规格", "单价", "数量", "金额"]}
else:
print("请输入正确的类型!")