统一引号格式,优化架构排布

This commit is contained in:
2024-09-26 15:16:57 +08:00
parent ff9d612e67
commit c5a03ad16f
22 changed files with 143 additions and 302 deletions

View File

@@ -8,20 +8,20 @@ from util import common_util
# 处理金额类数据
def handle_decimal(string):
if not string:
return ""
return ''
string = re.sub(r'[^0-9.]', '', string)
if not string:
return ""
if "." not in string:
return ''
if '.' not in string:
if len(string) > 2:
result = string[:-2] + "." + string[-2:]
result = string[:-2] + '.' + string[-2:]
else:
result = string
else:
front, back = string.rsplit('.', 1)
front = front.replace(".", "")
front = front.replace('.', '')
if back:
back = "." + back[:2]
back = '.' + back[:2]
result = front + back
return result[:16]
@@ -32,7 +32,7 @@ def parse_money(capital_num, num):
money = common_util.chinese_money_to_number(capital_num)
return capital_num, money
except Exception as e:
logging.warning("大写金额解析失败", exc_info=e)
logging.warning('大写金额解析失败', exc_info=e)
return num, handle_decimal(num)
@@ -40,17 +40,17 @@ def parse_money(capital_num, num):
# 处理日期类数据
def handle_date(string):
if not string:
return ""
return ''
string = string.replace("", "-").replace("", "-").replace("", "").replace("/", "-").replace(".", "-")
string = string.replace('', '-').replace('', '-').replace('', '').replace('/', '-').replace('.', '-')
string = re.sub(r'[^0-9-]', '', string)
string = string.strip("-")
if "-" in string:
dash_count = string.count("-")
string = string.strip('-')
if '-' in string:
dash_count = string.count('-')
if dash_count > 2:
third_dash_index = string.find("-", string.find("-", string.find("-") + 1) + 1)
third_dash_index = string.find('-', string.find('-', string.find('-') + 1) + 1)
string = string[:third_dash_index]
day = string[string.rindex("-") + 1:]
day = string[string.rindex('-') + 1:]
if len(day) > 2:
string = string[:2 - len(day)]
else:
@@ -58,7 +58,7 @@ def handle_date(string):
string = string[:8]
if len(string) < 6:
return ""
return ''
# 定义可能的日期格式
formats = [
@@ -78,23 +78,23 @@ def handle_date(string):
date = datetime.strptime(string, fmt)
# 限定日期的年份范围
if 2000 < date.year < 2100:
return date.strftime("%Y-%m-%d")
return date.strftime('%Y-%m-%d')
continue
except ValueError:
continue
return ""
return ''
def handle_hospital(string):
if not string:
return ""
return ''
return string[:255]
def handle_department(string):
if not string:
return ""
return ''
return string[:255]
@@ -103,12 +103,12 @@ def parse_department(string):
if not string:
return result
string = string.replace(")", "").replace("", "").replace("(", " ").replace("", " ") # 去除括号
string = string.replace(')', '').replace('', '').replace('(', ' ').replace('', ' ') # 去除括号
string = re.sub(r'[^⺀-鿿 ]', '', string) # 去除非汉字字符,除了空格
string = re.sub(r'[一二三四五六七八九十]', '', string) # 去除中文数字
string = string.replace("病区", "").replace("", "") # 去除常见的无意义词
string = string.replace("", " ") # 分离科室
departments = string.strip().split(" ")
string = string.replace('病区', '').replace('', '') # 去除常见的无意义词
string = string.replace('', ' ') # 分离科室
departments = string.strip().split(' ')
for department in departments:
if department:
result.append(department)
@@ -118,33 +118,33 @@ def parse_department(string):
# 处理姓名类数据
def handle_name(string):
if not string:
return ""
return ''
return re.sub(r'[^⺀-鿿·]', '', string)[:30]
# 处理医保类型数据
def handle_insurance_type(string):
if not string:
return ""
worker_insurance_keys = ["社保", "城保", "", "退休"]
villager_insurance_keys = ["农保", "居民"]
migrant_worker_insurance_keys = ["农民工"]
no_insurance_keys = ["自费", "全费"]
return ''
worker_insurance_keys = ['社保', '城保', '', '退休']
villager_insurance_keys = ['农保', '居民']
migrant_worker_insurance_keys = ['农民工']
no_insurance_keys = ['自费', '全费']
if any(key in string for key in worker_insurance_keys):
return "职工医保"
return '职工医保'
if any(key in string for key in villager_insurance_keys):
return "居民医保"
return '居民医保'
if any(key in string for key in migrant_worker_insurance_keys):
return "农民工医保"
return '农民工医保'
if any(key in string for key in no_insurance_keys):
return "无医保"
return "其他"
return '无医保'
return '其他'
# 处理原始数据
def handle_original_data(string):
if not string:
return ""
return ''
# 防止过长存入数据库失败
return string[:255]
@@ -152,7 +152,7 @@ def handle_original_data(string):
# 处理id类数据
def handle_id(string):
if not string:
return ""
return ''
# 防止过长存入数据库失败
return string[:50]
@@ -160,8 +160,8 @@ def handle_id(string):
# 处理年龄类数据
def handle_age(string):
if not string:
return ""
string = string.split("")[0]
return ''
string = string.split('')[0]
num = re.sub(r'\D', '', string)
return num[-3:]
@@ -173,8 +173,8 @@ def parse_hospital(string):
return result
string = common_util.traditional_to_simple_chinese(string)
string_without_brackets = string.replace(")", "").replace("", "").replace("(", " ").replace("", " ")
string_without_company = string_without_brackets.replace("有限公司", "")
split_hospitals = string_without_company.replace("医院", "医院 ")
result += split_hospitals.strip().split(" ")
string_without_brackets = string.replace(')', '').replace('', '').replace('(', ' ').replace('', ' ')
string_without_company = string_without_brackets.replace('有限公司', '')
split_hospitals = string_without_company.replace('医院', '医院 ')
result += split_hospitals.strip().split(' ')
return result