统一引号格式,优化架构排布
This commit is contained in:
@@ -62,9 +62,9 @@ def delete_temp_file(temp_files):
|
||||
for file in temp_files:
|
||||
try:
|
||||
os.remove(file)
|
||||
logging.info(f"临时文件 {file} 已删除")
|
||||
logging.info(f'临时文件 {file} 已删除')
|
||||
except Exception as e:
|
||||
logging.warning(f"删除临时文件 {file} 时出错: {e}")
|
||||
logging.warning(f'删除临时文件 {file} 时出错: {e}')
|
||||
|
||||
|
||||
def zoom_rectangle(rectangle, ratio):
|
||||
@@ -83,40 +83,40 @@ def zoom_rectangle(rectangle, ratio):
|
||||
|
||||
|
||||
def chinese_to_money_unit(chinese):
|
||||
if chinese in ["拾", "十"]:
|
||||
if chinese in ['拾', '十']:
|
||||
return 10, False
|
||||
elif chinese in ["佰", "百"]:
|
||||
elif chinese in ['佰', '百']:
|
||||
return 100, False
|
||||
elif chinese in ["仟", "千"]:
|
||||
elif chinese in ['仟', '千']:
|
||||
return 1000, False
|
||||
elif chinese == "万":
|
||||
elif chinese == '万':
|
||||
return 10000, True
|
||||
elif chinese == "亿":
|
||||
elif chinese == '亿':
|
||||
return 100000000, True
|
||||
else:
|
||||
return None, False
|
||||
|
||||
|
||||
def chinese_char_to_number(chinese):
|
||||
if chinese == "零":
|
||||
if chinese == '零':
|
||||
return 0
|
||||
elif chinese in ["一", "壹"]:
|
||||
elif chinese in ['一', '壹']:
|
||||
return 1
|
||||
elif chinese in ["二", "贰"]:
|
||||
elif chinese in ['二', '贰']:
|
||||
return 2
|
||||
elif chinese in ["三", "叁"]:
|
||||
elif chinese in ['三', '叁']:
|
||||
return 3
|
||||
elif chinese in ["四", "肆"]:
|
||||
elif chinese in ['四', '肆']:
|
||||
return 4
|
||||
elif chinese in ["五", "伍"]:
|
||||
elif chinese in ['五', '伍']:
|
||||
return 5
|
||||
elif chinese in ["六", "陆"]:
|
||||
elif chinese in ['六', '陆']:
|
||||
return 6
|
||||
elif chinese in ["七", "柒"]:
|
||||
elif chinese in ['七', '柒']:
|
||||
return 7
|
||||
elif chinese in ["八", "捌"]:
|
||||
elif chinese in ['八', '捌']:
|
||||
return 8
|
||||
elif chinese in ["九", "玖"]:
|
||||
elif chinese in ['九', '玖']:
|
||||
return 9
|
||||
else:
|
||||
return -1
|
||||
@@ -137,12 +137,12 @@ def chinese_to_number(chinese):
|
||||
section += number * (unit[0] / 10)
|
||||
unit = [None, False]
|
||||
elif number > 0:
|
||||
raise ValueError(f"{chinese} has bad number '{chinese[i - 1]}{c}' at: {i}")
|
||||
raise ValueError(f"'{chinese} has bad number '{chinese[i - 1]}{c}' at: {i}'")
|
||||
number = num
|
||||
else:
|
||||
unit = chinese_to_money_unit(c)
|
||||
if unit[0] is None:
|
||||
raise ValueError(f"{chinese} has unknown unit '{c}' at: {i}")
|
||||
raise ValueError(f"'{chinese} has unknown unit '{c}' at: {i}'")
|
||||
if unit[1]:
|
||||
section = (section + number) * unit[0]
|
||||
result += section
|
||||
@@ -163,14 +163,14 @@ def chinese_to_number(chinese):
|
||||
def chinese_money_to_number(chinese_money_amount):
|
||||
if string_util.blank(chinese_money_amount):
|
||||
return None
|
||||
yi = chinese_money_amount.find("元")
|
||||
yi = chinese_money_amount.find('元')
|
||||
if yi == -1:
|
||||
yi = chinese_money_amount.find("圆")
|
||||
ji = chinese_money_amount.find("角")
|
||||
fi = chinese_money_amount.find("分")
|
||||
yi = chinese_money_amount.find('圆')
|
||||
ji = chinese_money_amount.find('角')
|
||||
fi = chinese_money_amount.find('分')
|
||||
|
||||
if yi == -1 and ji == -1 and fi == -1:
|
||||
raise ValueError(f"无法解析: {chinese_money_amount}")
|
||||
raise ValueError(f'无法解析: {chinese_money_amount}')
|
||||
|
||||
y_str = None
|
||||
if yi > 0:
|
||||
|
||||
@@ -8,20 +8,20 @@ from util import common_util
|
||||
# 处理金额类数据
|
||||
def handle_decimal(string):
|
||||
if not string:
|
||||
return ""
|
||||
return ''
|
||||
string = re.sub(r'[^0-9.]', '', string)
|
||||
if not string:
|
||||
return ""
|
||||
if "." not in string:
|
||||
return ''
|
||||
if '.' not in string:
|
||||
if len(string) > 2:
|
||||
result = string[:-2] + "." + string[-2:]
|
||||
result = string[:-2] + '.' + string[-2:]
|
||||
else:
|
||||
result = string
|
||||
else:
|
||||
front, back = string.rsplit('.', 1)
|
||||
front = front.replace(".", "")
|
||||
front = front.replace('.', '')
|
||||
if back:
|
||||
back = "." + back[:2]
|
||||
back = '.' + back[:2]
|
||||
result = front + back
|
||||
return result[:16]
|
||||
|
||||
@@ -32,7 +32,7 @@ def parse_money(capital_num, num):
|
||||
money = common_util.chinese_money_to_number(capital_num)
|
||||
return capital_num, money
|
||||
except Exception as e:
|
||||
logging.warning("大写金额解析失败", exc_info=e)
|
||||
logging.warning('大写金额解析失败', exc_info=e)
|
||||
|
||||
return num, handle_decimal(num)
|
||||
|
||||
@@ -40,17 +40,17 @@ def parse_money(capital_num, num):
|
||||
# 处理日期类数据
|
||||
def handle_date(string):
|
||||
if not string:
|
||||
return ""
|
||||
return ''
|
||||
|
||||
string = string.replace("年", "-").replace("月", "-").replace("日", "").replace("/", "-").replace(".", "-")
|
||||
string = string.replace('年', '-').replace('月', '-').replace('日', '').replace('/', '-').replace('.', '-')
|
||||
string = re.sub(r'[^0-9-]', '', string)
|
||||
string = string.strip("-")
|
||||
if "-" in string:
|
||||
dash_count = string.count("-")
|
||||
string = string.strip('-')
|
||||
if '-' in string:
|
||||
dash_count = string.count('-')
|
||||
if dash_count > 2:
|
||||
third_dash_index = string.find("-", string.find("-", string.find("-") + 1) + 1)
|
||||
third_dash_index = string.find('-', string.find('-', string.find('-') + 1) + 1)
|
||||
string = string[:third_dash_index]
|
||||
day = string[string.rindex("-") + 1:]
|
||||
day = string[string.rindex('-') + 1:]
|
||||
if len(day) > 2:
|
||||
string = string[:2 - len(day)]
|
||||
else:
|
||||
@@ -58,7 +58,7 @@ def handle_date(string):
|
||||
string = string[:8]
|
||||
|
||||
if len(string) < 6:
|
||||
return ""
|
||||
return ''
|
||||
|
||||
# 定义可能的日期格式
|
||||
formats = [
|
||||
@@ -78,23 +78,23 @@ def handle_date(string):
|
||||
date = datetime.strptime(string, fmt)
|
||||
# 限定日期的年份范围
|
||||
if 2000 < date.year < 2100:
|
||||
return date.strftime("%Y-%m-%d")
|
||||
return date.strftime('%Y-%m-%d')
|
||||
continue
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
return ""
|
||||
return ''
|
||||
|
||||
|
||||
def handle_hospital(string):
|
||||
if not string:
|
||||
return ""
|
||||
return ''
|
||||
return string[:255]
|
||||
|
||||
|
||||
def handle_department(string):
|
||||
if not string:
|
||||
return ""
|
||||
return ''
|
||||
return string[:255]
|
||||
|
||||
|
||||
@@ -103,12 +103,12 @@ def parse_department(string):
|
||||
if not string:
|
||||
return result
|
||||
|
||||
string = string.replace(")", "").replace(")", "").replace("(", " ").replace("(", " ") # 去除括号
|
||||
string = string.replace(')', '').replace(')', '').replace('(', ' ').replace('(', ' ') # 去除括号
|
||||
string = re.sub(r'[^⺀-鿿 ]', '', string) # 去除非汉字字符,除了空格
|
||||
string = re.sub(r'[一二三四五六七八九十]', '', string) # 去除中文数字
|
||||
string = string.replace("病区", "").replace("病", "") # 去除常见的无意义词
|
||||
string = string.replace("科", " ") # 分离科室
|
||||
departments = string.strip().split(" ")
|
||||
string = string.replace('病区', '').replace('病', '') # 去除常见的无意义词
|
||||
string = string.replace('科', ' ') # 分离科室
|
||||
departments = string.strip().split(' ')
|
||||
for department in departments:
|
||||
if department:
|
||||
result.append(department)
|
||||
@@ -118,33 +118,33 @@ def parse_department(string):
|
||||
# 处理姓名类数据
|
||||
def handle_name(string):
|
||||
if not string:
|
||||
return ""
|
||||
return ''
|
||||
return re.sub(r'[^⺀-鿿·]', '', string)[:30]
|
||||
|
||||
|
||||
# 处理医保类型数据
|
||||
def handle_insurance_type(string):
|
||||
if not string:
|
||||
return ""
|
||||
worker_insurance_keys = ["社保", "城保", "职", "退休"]
|
||||
villager_insurance_keys = ["农保", "居民"]
|
||||
migrant_worker_insurance_keys = ["农民工"]
|
||||
no_insurance_keys = ["自费", "全费"]
|
||||
return ''
|
||||
worker_insurance_keys = ['社保', '城保', '职', '退休']
|
||||
villager_insurance_keys = ['农保', '居民']
|
||||
migrant_worker_insurance_keys = ['农民工']
|
||||
no_insurance_keys = ['自费', '全费']
|
||||
if any(key in string for key in worker_insurance_keys):
|
||||
return "职工医保"
|
||||
return '职工医保'
|
||||
if any(key in string for key in villager_insurance_keys):
|
||||
return "居民医保"
|
||||
return '居民医保'
|
||||
if any(key in string for key in migrant_worker_insurance_keys):
|
||||
return "农民工医保"
|
||||
return '农民工医保'
|
||||
if any(key in string for key in no_insurance_keys):
|
||||
return "无医保"
|
||||
return "其他"
|
||||
return '无医保'
|
||||
return '其他'
|
||||
|
||||
|
||||
# 处理原始数据
|
||||
def handle_original_data(string):
|
||||
if not string:
|
||||
return ""
|
||||
return ''
|
||||
# 防止过长存入数据库失败
|
||||
return string[:255]
|
||||
|
||||
@@ -152,7 +152,7 @@ def handle_original_data(string):
|
||||
# 处理id类数据
|
||||
def handle_id(string):
|
||||
if not string:
|
||||
return ""
|
||||
return ''
|
||||
# 防止过长存入数据库失败
|
||||
return string[:50]
|
||||
|
||||
@@ -160,8 +160,8 @@ def handle_id(string):
|
||||
# 处理年龄类数据
|
||||
def handle_age(string):
|
||||
if not string:
|
||||
return ""
|
||||
string = string.split("岁")[0]
|
||||
return ''
|
||||
string = string.split('岁')[0]
|
||||
num = re.sub(r'\D', '', string)
|
||||
return num[-3:]
|
||||
|
||||
@@ -173,8 +173,8 @@ def parse_hospital(string):
|
||||
return result
|
||||
|
||||
string = common_util.traditional_to_simple_chinese(string)
|
||||
string_without_brackets = string.replace(")", "").replace(")", "").replace("(", " ").replace("(", " ")
|
||||
string_without_company = string_without_brackets.replace("有限公司", "")
|
||||
split_hospitals = string_without_company.replace("医院", "医院 ")
|
||||
result += split_hospitals.strip().split(" ")
|
||||
string_without_brackets = string.replace(')', '').replace(')', '').replace('(', ' ').replace('(', ' ')
|
||||
string_without_company = string_without_brackets.replace('有限公司', '')
|
||||
split_hospitals = string_without_company.replace('医院', '医院 ')
|
||||
result += split_hospitals.strip().split(' ')
|
||||
return result
|
||||
|
||||
@@ -15,7 +15,7 @@ def get_jsczt_id_base(url):
|
||||
if response.status_code != 200:
|
||||
raise Exception(f'请求江苏省财政票据失败!状态码: {response.status_code}')
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
hidden_input = soup.find('input', {'name': "idBase"})
|
||||
hidden_input = soup.find('input', {'name': 'idBase'})
|
||||
if hidden_input:
|
||||
# 获取隐藏字段的值
|
||||
value = hidden_input.get('value')
|
||||
|
||||
@@ -13,14 +13,14 @@ from log import PROJECT_ROOT
|
||||
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 3), reraise=True,
|
||||
after=lambda x: logging.warning("获取图片失败!"))
|
||||
after=lambda x: logging.warning('获取图片失败!'))
|
||||
def read(image_path):
|
||||
"""
|
||||
从网络或本地读取图片
|
||||
:param image_path: 网络或本地路径
|
||||
:return: NumPy数组形式的图片
|
||||
"""
|
||||
if image_path.startswith("http"):
|
||||
if image_path.startswith('http'):
|
||||
# 发送HTTP请求并获取图像数据
|
||||
resp = urllib.request.urlopen(image_path, timeout=60)
|
||||
# 将数据读取为字节流
|
||||
@@ -76,35 +76,35 @@ def split(image, ratio=1.414, overlap=0.05, x_compensation=3):
|
||||
for i in range(math.ceil(height / step)):
|
||||
offset = round(step * i)
|
||||
cropped_img = capture(image, [0, offset, width, offset + new_img_height])
|
||||
split_result.append({"img": cropped_img, "x_offset": 0, "y_offset": offset})
|
||||
split_result.append({'img': cropped_img, 'x_offset': 0, 'y_offset': offset})
|
||||
elif wh_ratio > ratio: # 横向过长
|
||||
new_img_width = height * ratio
|
||||
step = height * (ratio - overlap * x_compensation) # 一般文字是横向的,所以横向截取时增大重叠部分
|
||||
for i in range(math.ceil(width / step)):
|
||||
offset = round(step * i)
|
||||
cropped_img = capture(image, [offset, 0, offset + new_img_width, width])
|
||||
split_result.append({"img": cropped_img, "x_offset": offset, "y_offset": 0})
|
||||
split_result.append({'img': cropped_img, 'x_offset': offset, 'y_offset': 0})
|
||||
else:
|
||||
split_result.append({"img": image, "x_offset": 0, "y_offset": 0})
|
||||
split_result.append({'img': image, 'x_offset': 0, 'y_offset': 0})
|
||||
return split_result
|
||||
|
||||
|
||||
def parse_rotation_angles(image):
|
||||
"""
|
||||
判断图片旋转角度,逆时针旋转该角度后为正。可能值["0", "90", "180", "270"]
|
||||
判断图片旋转角度,逆时针旋转该角度后为正。可能值['0', '90', '180', '270']
|
||||
:param image: 图片NumPy数组或文件路径
|
||||
:return: 最有可能的两个角度
|
||||
"""
|
||||
angles = ['0', '90']
|
||||
model = PaddleClas(model_name="text_image_orientation")
|
||||
model = PaddleClas(model_name='text_image_orientation')
|
||||
clas_result = model.predict(input_data=image)
|
||||
try:
|
||||
clas_result = next(clas_result)[0]
|
||||
if clas_result["scores"][0] < 0.5:
|
||||
if clas_result['scores'][0] < 0.5:
|
||||
return angles
|
||||
angles = clas_result["label_names"]
|
||||
angles = clas_result['label_names']
|
||||
except Exception as e:
|
||||
logging.error("获取图片旋转角度失败", exc_info=e)
|
||||
logging.error('获取图片旋转角度失败', exc_info=e)
|
||||
return angles
|
||||
|
||||
|
||||
@@ -201,25 +201,25 @@ def expand_to_a4_size(image):
|
||||
if hw_ratio >= 1.42:
|
||||
exp_w = int(height / 1.414 - width)
|
||||
x_offset = int(exp_w / 2)
|
||||
exp_img = numpy.zeros((height, x_offset, 3), dtype="uint8")
|
||||
exp_img = numpy.zeros((height, x_offset, 3), dtype='uint8')
|
||||
exp_img.fill(255)
|
||||
image = numpy.hstack([exp_img, image, exp_img])
|
||||
elif 1 <= hw_ratio <= 1.40:
|
||||
exp_h = int(width * 1.414 - height)
|
||||
y_offset = int(exp_h / 2)
|
||||
exp_img = numpy.zeros((y_offset, width, 3), dtype="uint8")
|
||||
exp_img = numpy.zeros((y_offset, width, 3), dtype='uint8')
|
||||
exp_img.fill(255)
|
||||
image = numpy.vstack([exp_img, image, exp_img])
|
||||
elif 0.72 <= hw_ratio < 1:
|
||||
exp_w = int(height * 1.414 - width)
|
||||
x_offset = int(exp_w / 2)
|
||||
exp_img = numpy.zeros((height, x_offset, 3), dtype="uint8")
|
||||
exp_img = numpy.zeros((height, x_offset, 3), dtype='uint8')
|
||||
exp_img.fill(255)
|
||||
image = numpy.hstack([exp_img, image, exp_img])
|
||||
elif hw_ratio <= 0.7:
|
||||
exp_h = int(width / 1.414 - height)
|
||||
y_offset = int(exp_h / 2)
|
||||
exp_img = numpy.zeros((y_offset, width, 3), dtype="uint8")
|
||||
exp_img = numpy.zeros((y_offset, width, 3), dtype='uint8')
|
||||
exp_img.fill(255)
|
||||
image = numpy.vstack([exp_img, image, exp_img])
|
||||
return image, x_offset, y_offset
|
||||
|
||||
Reference in New Issue
Block a user