Files
fcb_photo_review/photo_review/util/data_util.py
2024-05-29 11:31:22 +08:00

78 lines
2.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import re
from datetime import datetime
# 处理金额类数据
def handle_decimal(string):
if not string:
return ""
string = re.sub(r'[^0-9.]', '', string)
front, back = string.rsplit('.', 1)
front = front.replace(".", "")
if back:
back = "." + back
return front + back
# 处理日期类数据
def handle_date(string):
if not string:
return ""
string = string.replace("", "-").replace("", "-").replace("", "").replace("/", "-").replace(".", "-")
string = re.sub(r'[^0-9-]', '', string)
length = len(string)
if length > 8 and "-" not in string:
string = string[:8]
elif length > 10 and "-" in string:
string = string[:10]
if is_valid_date_format(string):
return string
else:
return ""
# 判断是否是合法的日期格式
def is_valid_date_format(date_str):
if len(date_str) < 6:
return False
# 定义可能的日期格式
formats = [
# yyyy-MM-dd
'%Y-%m-%d',
# yy-MM-dd
'%y-%m-%d',
# yyyyMMdd
'%Y%m%d',
# yyMMdd
'%y%m%d',
]
# 遍历所有格式,尝试解析日期
for fmt in formats:
try:
datetime.strptime(date_str, fmt)
return True
except ValueError:
pass
return False
def handle_department(string):
result = []
if not string:
return result
result.append(string)
string_without_num = re.sub(r'\d|一|二|三|四|五|六|七|八|九|十', '', string)
if string_without_num != string:
result.append(string_without_num)
string_without_brackets = re.sub(r'\([^()]*\)|\[[^\[\]]*\]|\{[^\{\}]*\}|[^]*', "", string_without_num)
if string_without_brackets != string_without_num:
result.append(string_without_brackets)
pure_string = string_without_brackets.split("")[0] + ""
if pure_string != string_without_brackets:
result.append(pure_string)
return result