Files
fcb_photo_review/photo_review/util/data_util.py

83 lines
2.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import re
from datetime import datetime
# 处理金额类数据
def handle_decimal(string):
if not string:
return ""
string = re.sub(r'[^0-9.]', '', string)
if "." not in string:
return string
front, back = string.rsplit('.', 1)
front = front.replace(".", "")
front = front[-16:]
if back:
back = "." + back
return front + back
# 处理日期类数据
def handle_date(string):
if not string:
return ""
string = string.replace("", "-").replace("", "-").replace("", "").replace("/", "-").replace(".", "-")
string = re.sub(r'[^0-9-]', '', string)
length = len(string)
if length > 8 and "-" not in string:
string = string[:8]
elif length > 10 and "-" in string:
string = string[:10]
if is_valid_date_format(string):
return string
else:
return ""
# 判断是否是合法的日期格式
def is_valid_date_format(date_str):
if len(date_str) < 6:
return False
# 定义可能的日期格式
formats = [
# yyyy-MM-dd
'%Y-%m-%d',
# yy-MM-dd
'%y-%m-%d',
# yyyyMMdd
'%Y%m%d',
# yyMMdd
'%y%m%d',
]
# 遍历所有格式,尝试解析日期
for fmt in formats:
try:
datetime.strptime(date_str, fmt)
return True
except ValueError:
pass
return False
def handle_department(string):
result = []
if not string:
return result
result.append(string)
string_without_num = re.sub(r'\d|一|二|三|四|五|六|七|八|九|十', '', string)
if string_without_num != string:
result.append(string_without_num)
string_without_brackets = re.sub(r'\([^()]*\)|\[[^\[\]]*\]|\{[^\{\}]*\}|[^]*', "", string_without_num)
if string_without_brackets != string_without_num:
result.append(string_without_brackets)
pure_string = string_without_brackets.split("")[0] + ""
if pure_string != string_without_brackets:
result.append(pure_string)
return result
if __name__ == '__main__':
print(handle_decimal("~202312167,214.83金额1,920.008,888.38740.00交三医专用章广东省医疗压院收票据电子政策性游其他支付0.00医保类型跨省异地预缴金额11564.15备注病历号2165996治疗费手术费护理费中成药费项目名称政电电子“东省结算方式个人账户支付0.00医保编号补缴金额0.00住院号216599612,971.56金额7,483.062,192.0211.96复核人小写68,214.38"))