优化ucloud的日志及重试机制

This commit is contained in:
2024-07-22 17:31:32 +08:00
parent c06796d0a3
commit ee86bb4e74
10 changed files with 124 additions and 86 deletions

View File

@@ -1,7 +1,6 @@
import datetime import datetime
import json import json
import os import os
import sys
from decimal import Decimal from decimal import Decimal
from io import BytesIO from io import BytesIO
from itertools import groupby from itertools import groupby
@@ -9,10 +8,9 @@ from itertools import groupby
import requests import requests
from PIL import ImageDraw, Image, ImageFont from PIL import ImageDraw, Image, ImageFont
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from db import MysqlSession from db import MysqlSession
from db.mysql import ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxOcr, ZxPhrec from db.mysql import ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxOcr, ZxPhrec
from ucloud import ucloud from ucloud import ufile
def check_ie_result(pk_phhd): def check_ie_result(pk_phhd):
@@ -78,7 +76,7 @@ def check_ie_result(pk_phhd):
ZxPhrec.pk_phhd == pk_phhd).all() ZxPhrec.pk_phhd == pk_phhd).all()
for phrec in phrecs: for phrec in phrecs:
img_name = phrec.cfjaddress img_name = phrec.cfjaddress
img_path = ucloud.get_private_url(img_name) img_path = ufile.get_private_url(img_name)
response = requests.get(img_path) response = requests.get(img_path)
image = Image.open(BytesIO(response.content)).convert("RGB") image = Image.open(BytesIO(response.content)).convert("RGB")

View File

@@ -1,3 +1,16 @@
import os
import socket
# 获取主机名,方便区分容器
HOSTNAME = socket.gethostname()
# 检测日志文件的路径是否存在,不存在则创建
LOG_PATHS = [
f"log/{HOSTNAME}/ucloud",
]
for path in LOG_PATHS:
if not os.path.exists(path):
os.makedirs(path)
# 配置字典 # 配置字典
LOGGING_CONFIG = { LOGGING_CONFIG = {
'version': 1, # 必需,指定配置格式的版本 'version': 1, # 必需,指定配置格式的版本
@@ -20,22 +33,38 @@ LOGGING_CONFIG = {
'stream': 'ext://sys.stdout', # 输出到标准输出默认编码跟随系统一般为UTF-8 'stream': 'ext://sys.stdout', # 输出到标准输出默认编码跟随系统一般为UTF-8
}, },
'file': { 'file': {
'class': 'logging.handlers.RotatingFileHandler', # 文件处理器,支持日志滚动 'class': 'logging.handlers.TimedRotatingFileHandler', # 文件处理器,支持日志滚动
'level': 'INFO', 'level': 'INFO',
'formatter': 'standard', 'formatter': 'standard',
'filename': 'log/fcb_photo_review.log', # 日志文件路径 'filename': f'log/{HOSTNAME}/fcb_photo_review.log', # 日志文件路径
'maxBytes': 1024 * 1024 * 5, # 文件最大大小这里为5MB 'when': 'midnight',
'backupCount': 5, # 保留的备份文件数量 'interval': 1,
'backupCount': 14, # 保留的备份文件数量
'encoding': 'utf-8', # 显式指定文件编码为UTF-8以支持中文
},
'ucloud': {
'class': 'logging.handlers.TimedRotatingFileHandler', # 文件处理器,支持日志滚动
'level': 'INFO',
'formatter': 'standard',
'filename': f'log/{HOSTNAME}/ucloud/fcb_photo_review_ucloud.log', # 日志文件路径
'when': 'midnight',
'interval': 1,
'backupCount': 14, # 保留的备份文件数量
'encoding': 'utf-8', # 显式指定文件编码为UTF-8以支持中文 'encoding': 'utf-8', # 显式指定文件编码为UTF-8以支持中文
}, },
}, },
# loggers定义了日志记录器,这里是根记录器 # loggers定义了日志记录器
'loggers': { 'loggers': {
'': { # 根记录器 '': { # 根记录器
'handlers': ['console', 'file'], # 关联的处理器 'handlers': ['console', 'file'], # 关联的处理器
'level': 'DEBUG', # 根记录器的级别 'level': 'DEBUG', # 根记录器的级别
'propagate': False, # 是否向上级传播日志信息 'propagate': False, # 是否向上级传播日志信息
}, },
'ucloud': {
'handlers': ['console', 'ucloud'],
'level': 'DEBUG',
'propagate': False,
}
}, },
} }

View File

@@ -9,7 +9,7 @@ from db import MysqlSession
from db.mysql import ZxPhrec, ZxPhhd from db.mysql import ZxPhrec, ZxPhhd
from photo_mask import OCR, PHHD_BATCH_SIZE, SLEEP_MINUTES, COPY_TRY_TIMES, UPLOAD_TRY_TIMES, NAME_KEYS, \ from photo_mask import OCR, PHHD_BATCH_SIZE, SLEEP_MINUTES, COPY_TRY_TIMES, UPLOAD_TRY_TIMES, NAME_KEYS, \
ID_CARD_NUM_KEYS ID_CARD_NUM_KEYS
from ucloud import BUCKET, ucloud from ucloud import BUCKET, ufile
from util import image_util, util from util import image_util, util
@@ -150,7 +150,7 @@ def photo_mask(pk_phhd, name, id_card_num):
)).all() )).all()
session.close() session.close()
for phrec in phrecs: for phrec in phrecs:
img_url = ucloud.get_private_url(phrec.cfjaddress) img_url = ufile.get_private_url(phrec.cfjaddress)
if not img_url: if not img_url:
continue continue
# 是否有涂抹 # 是否有涂抹
@@ -178,7 +178,7 @@ def photo_mask(pk_phhd, name, id_card_num):
# 如果涂抹了要备份以及更新 # 如果涂抹了要备份以及更新
if is_masked: if is_masked:
for i in range(COPY_TRY_TIMES): for i in range(COPY_TRY_TIMES):
is_copy_success = ucloud.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress) is_copy_success = ufile.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress)
if is_copy_success: if is_copy_success:
break break
@@ -186,7 +186,7 @@ def photo_mask(pk_phhd, name, id_card_num):
cv2.imwrite(temp_file.name, image) cv2.imwrite(temp_file.name, image)
try: try:
for i in range(UPLOAD_TRY_TIMES): for i in range(UPLOAD_TRY_TIMES):
is_upload_success = ucloud.upload_file(phrec.cfjaddress, temp_file.name) is_upload_success = ufile.upload_file(phrec.cfjaddress, temp_file.name)
if is_upload_success: if is_upload_success:
break break
except Exception as e: except Exception as e:

View File

@@ -15,7 +15,7 @@ from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_E
PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \ PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \
ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \ ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \
UPPERCASE_MEDICAL_EXPENSES UPPERCASE_MEDICAL_EXPENSES
from ucloud import ucloud from ucloud import ufile
from util import image_util, util from util import image_util, util
from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \ from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \
handle_insurance_type, handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money handle_insurance_type, handle_original_data, handle_hospital, handle_department, handle_id, handle_age, parse_money
@@ -73,7 +73,7 @@ def information_extraction(ie, phrecs):
# 同一批图的标识 # 同一批图的标识
identity = int(time.time()) identity = int(time.time())
for phrec in phrecs: for phrec in phrecs:
img_path = ucloud.get_private_url(phrec.cfjaddress) img_path = ufile.get_private_url(phrec.cfjaddress)
if not img_path: if not img_path:
continue continue
split_results = image_util.split(img_path) split_results = image_util.split(img_path)

View File

@@ -9,4 +9,5 @@ pymysql==1.1.0
requests==2.22.0 requests==2.22.0
sqlacodegen==2.3.0.post1 sqlacodegen==2.3.0.post1
sqlalchemy==1.4.52 sqlalchemy==1.4.52
tenacity==8.5.0
ufile==3.2.9 ufile==3.2.9

View File

@@ -3,7 +3,7 @@ import cv2
from db import MysqlSession from db import MysqlSession
from db.mysql import ZxIeOcrerror from db.mysql import ZxIeOcrerror
from photo_mask.photo_mask import handle_image_for_mask, get_mask_layout from photo_mask.photo_mask import handle_image_for_mask, get_mask_layout
from ucloud import ucloud from ucloud import ufile
from util import image_util from util import image_util
if __name__ == '__main__': if __name__ == '__main__':
@@ -13,13 +13,13 @@ if __name__ == '__main__':
session.close() session.close()
for ocr_error in ocr_errors: for ocr_error in ocr_errors:
final_img_url = ucloud.get_private_url(ocr_error.cfjaddress, "drg100") final_img_url = ufile.get_private_url(ocr_error.cfjaddress, "drg100")
final_image = image_util.read(final_img_url) final_image = image_util.read(final_img_url)
cv2.imwrite(f"./mask_optimization_result/answer/{ocr_error.cfjaddress}.jpg", final_image) cv2.imwrite(f"./mask_optimization_result/answer/{ocr_error.cfjaddress}.jpg", final_image)
img_url = ucloud.get_private_url(ocr_error.cfjaddress, "drg2015") img_url = ufile.get_private_url(ocr_error.cfjaddress, "drg2015")
if not img_url: if not img_url:
# 没有自动涂抹的图片 # 没有自动涂抹的图片
img_url = ucloud.get_private_url(ocr_error.cfjaddress, "drg103") img_url = ufile.get_private_url(ocr_error.cfjaddress, "drg103")
name = ocr_error.cXm name = ocr_error.cXm
id_card_num = ocr_error.cSfzh id_card_num = ocr_error.cSfzh

View File

@@ -1,6 +1,6 @@
# 公钥
from ufile import config from ufile import config
# 公钥
PUBLIC_KEY = "4Z7QYI7qml36QRjcCjKrls7aHl1R6H6uq" PUBLIC_KEY = "4Z7QYI7qml36QRjcCjKrls7aHl1R6H6uq"
# 私钥 # 私钥
PRIVATE_KEY = "FIdW1Kev1Ge3K7GHXzSLyGG1wTnaG6LE9BxmIVubcCaG" PRIVATE_KEY = "FIdW1Kev1Ge3K7GHXzSLyGG1wTnaG6LE9BxmIVubcCaG"
@@ -13,5 +13,12 @@ DOWNLOAD_SUFFIX = ".cn-sh2.ufileos.com"
# 私空间文件地址过期时间(秒) # 私空间文件地址过期时间(秒)
PRIVATE_EXPIRES = 3600 PRIVATE_EXPIRES = 3600
# 设置默认请求超时时间 # 设置默认请求超时时间(秒)
config.set_default(connection_timeout=60) config.set_default(connection_timeout=60)
# 尝试次数
TRY_TIMES = 5
# 最小等待时间(秒)
MIN_WAIT_TIME = 1
# 最大等待时间(秒)
MAX_WAIT_TIME = 3

View File

@@ -1,58 +0,0 @@
# https://github.com/ucloud/ufile-sdk-python
import logging
from time import sleep
from ufile import filemanager
from ucloud import PUBLIC_KEY, PRIVATE_KEY, UPLOAD_SUFFIX, DOWNLOAD_SUFFIX, BUCKET, PRIVATE_EXPIRES
UFILE_HANDLER = filemanager.FileManager(PUBLIC_KEY, PRIVATE_KEY, UPLOAD_SUFFIX, DOWNLOAD_SUFFIX)
def get_private_url(key, bucket=BUCKET):
for i in range(3):
# 判断文件是否存在
_, resp = UFILE_HANDLER.head_file(bucket, key)
if resp.status_code == -1:
logging.warning(f"uCloud连接失败!即将重试...")
sleep(3)
continue
if resp.status_code != 200:
logging.warning(f"uCloud中未找到({key})! status: {resp.status_code} error: {resp.error}")
return None
# 获取公有空间下载url
# url = get_ufile_handler.public_download_url(bucket, key)
# 获取私有空间下载url, expires为下载链接有效期单位为秒
url = UFILE_HANDLER.private_download_url(bucket, key, expires=PRIVATE_EXPIRES)
return url
def copy_file(source_bucket, source_key, target_bucket, target_key):
for i in range(3):
# 拷贝文件
ret, resp = UFILE_HANDLER.copy(target_bucket, target_key, source_bucket, source_key)
if resp.status_code == -1:
logging.warning(f"uCloud连接失败!即将重试...")
sleep(3)
continue
if resp.status_code != 200:
logging.warning(
f"将({source_key})从({source_bucket})拷贝到({target_bucket})失败! status: {resp.status_code} error: {resp.error}")
return False
return True
def upload_file(key, file_path, bucket=BUCKET):
for i in range(3):
# 普通上传文件至空间
ret, resp = UFILE_HANDLER.putfile(bucket, key, file_path, header=None)
if resp.status_code == -1:
logging.warning(f"uCloud连接失败!即将重试...")
sleep(3)
continue
if resp.status_code != 200:
logging.warning(f"上传({key})失败! status: {resp.status_code} error: {resp.error}")
return False
return True

61
ucloud/ufile.py Normal file
View File

@@ -0,0 +1,61 @@
# https://github.com/ucloud/ufile-sdk-python
import logging
from tenacity import retry, stop_after_attempt, wait_random, retry_if_exception_type
from ufile import filemanager
from ucloud import PUBLIC_KEY, PRIVATE_KEY, UPLOAD_SUFFIX, DOWNLOAD_SUFFIX, BUCKET, PRIVATE_EXPIRES, TRY_TIMES, \
MIN_WAIT_TIME, MAX_WAIT_TIME
UFILE_HANDLER = filemanager.FileManager(PUBLIC_KEY, PRIVATE_KEY, UPLOAD_SUFFIX, DOWNLOAD_SUFFIX)
UCLOUD_LOGGER = logging.getLogger('ucloud')
@retry(stop=stop_after_attempt(TRY_TIMES), wait=wait_random(MIN_WAIT_TIME, MAX_WAIT_TIME),
retry=retry_if_exception_type(ConnectionError), reraise=True)
def get_private_url(key, bucket=BUCKET):
# 判断文件是否存在
_, resp = UFILE_HANDLER.head_file(bucket, key)
if resp.status_code == -1:
UCLOUD_LOGGER.warning(f"查询{key}时uCloud连接失败!")
raise ConnectionError("uCloud连接失败")
if resp.status_code != 200:
UCLOUD_LOGGER.warning(f"({bucket})中未找到({key})! status: {resp.status_code} error: {resp.error}")
return None
# 获取公有空间下载url
# url = get_ufile_handler.public_download_url(bucket, key)
# 获取私有空间下载url, expires为下载链接有效期单位为秒
url = UFILE_HANDLER.private_download_url(bucket, key, expires=PRIVATE_EXPIRES)
return url
@retry(stop=stop_after_attempt(TRY_TIMES), wait=wait_random(MIN_WAIT_TIME, MAX_WAIT_TIME),
retry=retry_if_exception_type(ConnectionError), reraise=True)
def copy_file(source_bucket, source_key, target_bucket, target_key):
# 复制文件
_, resp = UFILE_HANDLER.copy(target_bucket, target_key, source_bucket, source_key)
if resp.status_code == -1:
UCLOUD_LOGGER.warning(f"复制{source_key}时uCloud连接失败!")
raise ConnectionError("uCloud连接失败")
if resp.status_code != 200:
UCLOUD_LOGGER.warning(
f"将({source_key})从({source_bucket})拷贝到({target_bucket})失败! status: {resp.status_code} error: {resp.error}"
)
return False
return True
@retry(stop=stop_after_attempt(TRY_TIMES), wait=wait_random(MIN_WAIT_TIME, MAX_WAIT_TIME),
retry=retry_if_exception_type(ConnectionError), reraise=True)
def upload_file(key, file_path, bucket=BUCKET):
# 普通上传文件至云空间
_, resp = UFILE_HANDLER.putfile(bucket, key, file_path, header=None)
if resp.status_code == -1:
UCLOUD_LOGGER.warning(f"上传{key}时uCloud连接失败!即将重试...")
raise ConnectionError("uCloud连接失败")
if resp.status_code != 200:
UCLOUD_LOGGER.warning(f"上传({key})失败! status: {resp.status_code} error: {resp.error}")
return False
return True

View File

@@ -10,7 +10,7 @@ from paddlenlp import Taskflow
from paddlenlp.utils.doc_parser import DocParser from paddlenlp.utils.doc_parser import DocParser
from paddleocr import PaddleOCR from paddleocr import PaddleOCR
from ucloud import ucloud from ucloud import ufile
from util import image_util, util from util import image_util, util
@@ -118,24 +118,24 @@ def main(model_type, pic_name=None):
if model_type == "ocr": if model_type == "ocr":
task_path = None task_path = None
test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240428000832_1_093844_2.jpg" test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240428000832_1_093844_2.jpg"
schema = None schema = None
elif model_type == "settlement": elif model_type == "settlement":
task_path = "../model/settlement_list_model" task_path = "../model/settlement_list_model"
test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240511000638_1_094306_1.jpg" test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240511000638_1_094306_1.jpg"
schema = ["患者姓名", "入院日期", "出院日期", "费用总额", "个人现金支付", "个人账户支付", "自费金额", schema = ["患者姓名", "入院日期", "出院日期", "费用总额", "个人现金支付", "个人账户支付", "自费金额",
"医保类型", "住院号", "医保结算单号码", "大写总额"] "医保类型", "住院号", "医保结算单号码", "大写总额"]
elif model_type == "discharge": elif model_type == "discharge":
task_path = "../model/discharge_record_model" task_path = "../model/discharge_record_model"
test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240401000003_3_001938_2.jpg" test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240401000003_3_001938_2.jpg"
schema = ["医院", "科室", "患者姓名", "入院日期", "出院日期", "主治医生", "住院号", "年龄"] schema = ["医院", "科室", "患者姓名", "入院日期", "出院日期", "主治医生", "住院号", "年龄"]
elif model_type == "cost": elif model_type == "cost":
task_path = "../model/cost_list_model" task_path = "../model/cost_list_model"
test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg" test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg"
schema = ["患者姓名", "入院日期", "出院日期", "费用总额"] schema = ["患者姓名", "入院日期", "出院日期", "费用总额"]
elif model_type == "cost_detail": elif model_type == "cost_detail":
task_path = "../model/cost_list_detail_model" task_path = "../model/cost_list_detail_model"
test_img_path = ucloud.get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg" test_img_path = ufile.get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg"
schema = {"名称": ["类别", "规格", "单价", "数量", "金额"]} schema = {"名称": ["类别", "规格", "单价", "数量", "金额"]}
else: else:
print("请输入正确的类型!") print("请输入正确的类型!")