102 lines
2.5 KiB
Python
102 lines
2.5 KiB
Python
import re
|
||
from datetime import datetime
|
||
|
||
|
||
# 处理金额类数据
|
||
def handle_decimal(string):
|
||
if not string:
|
||
return ""
|
||
string = re.sub(r'[^0-9.]', '', string)
|
||
if "." not in string:
|
||
front = string
|
||
back = ""
|
||
else:
|
||
front, back = string.rsplit('.', 1)
|
||
front = front.replace(".", "")
|
||
|
||
front = front[-16:]
|
||
if back:
|
||
back = "." + back
|
||
return front + back
|
||
|
||
|
||
# 处理日期类数据
|
||
def handle_date(string):
|
||
if not string:
|
||
return ""
|
||
|
||
string = string.replace("年", "-").replace("月", "-").replace("日", "").replace("/", "-").replace(".", "-")
|
||
string = re.sub(r'[^0-9-]', '', string)
|
||
length = len(string)
|
||
if length > 8 and "-" not in string:
|
||
string = string[:8]
|
||
elif length > 10 and "-" in string:
|
||
string = string[:10]
|
||
|
||
if len(string) < 6:
|
||
return ""
|
||
|
||
# 定义可能的日期格式
|
||
formats = [
|
||
# yyyy-MM-dd
|
||
'%Y-%m-%d',
|
||
# yy-MM-dd
|
||
'%y-%m-%d',
|
||
# yyyyMMdd
|
||
'%Y%m%d',
|
||
# yyMMdd
|
||
'%y%m%d',
|
||
]
|
||
|
||
# 遍历所有格式,尝试解析日期
|
||
for fmt in formats:
|
||
try:
|
||
date = datetime.strptime(string, fmt)
|
||
# 限定日期的年份范围
|
||
if 2000 < date.year < 2100:
|
||
return date.strftime("%Y-%m-%d")
|
||
continue
|
||
except ValueError:
|
||
continue
|
||
|
||
return ""
|
||
|
||
|
||
def handle_department(string):
|
||
result = []
|
||
if not string:
|
||
return result
|
||
result.append(string)
|
||
string_without_num = re.sub(r'\d|一|二|三|四|五|六|七|八|九|十', '', string)
|
||
if string_without_num != string:
|
||
result.append(string_without_num)
|
||
string_without_brackets = re.sub(r'\([^()]*\)|\[[^\[\]]*\]|\{[^\{\}]*\}|([^()]*)', "", string_without_num)
|
||
if string_without_brackets != string_without_num:
|
||
result.append(string_without_brackets)
|
||
pure_string = string_without_brackets.split("科")[0] + "科"
|
||
if pure_string != string_without_brackets:
|
||
result.append(pure_string)
|
||
return result
|
||
|
||
|
||
# 处理姓名类数据
|
||
def handle_name(string):
|
||
if not string:
|
||
return ""
|
||
return re.sub(r'[^⺀-鿿·]', '', string)
|
||
|
||
|
||
# 处理医保类型数据
|
||
def handle_insurance_type(string):
|
||
if not string:
|
||
return ""
|
||
return string.replace(":", "").replace(":", "")
|
||
|
||
|
||
# 处理原始数据
|
||
def handle_original_data(string):
|
||
if not string:
|
||
return ""
|
||
# 防止过长存入数据库失败
|
||
return string[:255]
|