Reapply "整理项目结构,优化配置项"

This reverts commit 2f650dd880.
This commit is contained in:
2024-07-15 15:20:52 +08:00
parent 2f650dd880
commit f4a2a38df0
33 changed files with 275 additions and 506 deletions

158
util/data_util.py Normal file
View File

@@ -0,0 +1,158 @@
import re
from datetime import datetime
# 处理金额类数据
def handle_decimal(string):
if not string:
return ""
string = re.sub(r'[^0-9.]', '', string)
if "." not in string:
front = string
back = ""
else:
front, back = string.rsplit('.', 1)
front = front.replace(".", "")
front = front[-16:]
if back:
back = "." + back
return front + back
# 处理日期类数据
def handle_date(string):
if not string:
return ""
string = string.replace("", "-").replace("", "-").replace("", "").replace("/", "-").replace(".", "-")
string = re.sub(r'[^0-9-]', '', string)
string = string.strip("-")
if "-" in string:
dash_count = string.count("-")
if dash_count > 2:
third_dash_index = string.find("-", string.find("-", string.find("-") + 1) + 1)
string = string[:third_dash_index]
day = string[string.rindex("-") + 1:]
if len(day) > 2:
string = string[:2 - len(day)]
else:
if len(string) > 8:
string = string[:8]
if len(string) < 6:
return ""
# 定义可能的日期格式
formats = [
# yyyy-MM-dd
'%Y-%m-%d',
# yy-MM-dd
'%y-%m-%d',
# yyyyMMdd
'%Y%m%d',
# yyMMdd
'%y%m%d',
]
# 遍历所有格式,尝试解析日期
for fmt in formats:
try:
date = datetime.strptime(string, fmt)
# 限定日期的年份范围
if 2000 < date.year < 2100:
return date.strftime("%Y-%m-%d")
continue
except ValueError:
continue
return ""
def handle_hospital(string):
if not string:
return ""
return string[:255]
def handle_department(string):
if not string:
return ""
return string[:255]
def parse_department(string):
result = []
if not string:
return result
string = re.sub(r'\([^()]*\)|\[[^\[\]]*\]|\{[^\{\}]*\}|[^]*|[^⺀-鿿]', '', string)[:255]
if string == "":
return result
result.append(string)
string_without_num = re.sub(r'\d|一|二|三|四|五|六|七|八|九|十', '', string)
if string == "":
return result
if string_without_num != string:
result.append(string_without_num)
pure_string = string_without_num.split("")[0] + ""
if string == "":
return result
if pure_string != string_without_num:
result.append(pure_string)
pure_string_without_io = pure_string.replace("", "").replace("", "")
if string == "":
return result
if pure_string_without_io != pure_string:
result.append(pure_string)
return result
# 处理姓名类数据
def handle_name(string):
if not string:
return ""
return re.sub(r'[^⺀-鿿·]', '', string)[:30]
# 处理医保类型数据
def handle_insurance_type(string):
if not string:
return ""
worker_insurance_keys = ["社保", "城保", "", "退休"]
villager_insurance_keys = ["农保", "居民"]
migrant_worker_insurance_keys = ["农民工"]
no_insurance_keys = ["自费", "全费"]
if any(key in string for key in worker_insurance_keys):
return "职工医保"
if any(key in string for key in villager_insurance_keys):
return "居民医保"
if any(key in string for key in migrant_worker_insurance_keys):
return "农民工医保"
if any(key in string for key in no_insurance_keys):
return "无医保"
return "其他"
# 处理原始数据
def handle_original_data(string):
if not string:
return ""
# 防止过长存入数据库失败
return string[:255]
# 处理id类数据
def handle_id(string):
if not string:
return ""
# 防止过长存入数据库失败
return string[:50]
# 处理年龄类数据
def handle_age(string):
if not string:
return ""
string = string.split("")[0]
num = re.sub(r'\D', '', string)
return num[-3:]