优化费用总额的解析
This commit is contained in:
@@ -1,6 +1,9 @@
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
from util import util
|
||||
|
||||
|
||||
# 处理金额类数据
|
||||
def handle_decimal(string):
|
||||
@@ -8,21 +11,28 @@ def handle_decimal(string):
|
||||
if not string:
|
||||
return ""
|
||||
if "." not in string:
|
||||
front = string
|
||||
back = ""
|
||||
if len(string) > 2:
|
||||
result = string[:-2] + "." + string[-2:]
|
||||
else:
|
||||
result = string
|
||||
else:
|
||||
front, back = string.rsplit('.', 1)
|
||||
front = front.replace(".", "")
|
||||
if back:
|
||||
back = "." + back[:2]
|
||||
result = front + back
|
||||
return result[:16]
|
||||
|
||||
front = front[-16:]
|
||||
if back:
|
||||
back = "." + back
|
||||
result = float(front + back)
|
||||
# 金额较大的暂且交给人工确认
|
||||
if result > 100000:
|
||||
return ""
|
||||
else:
|
||||
return front + back
|
||||
|
||||
def parse_money(capital_num, num):
|
||||
if capital_num:
|
||||
try:
|
||||
money = util.chinese_money_to_number(capital_num)
|
||||
return capital_num, money
|
||||
except Exception as e:
|
||||
logging.warning("大写金额解析失败", exc_info=e)
|
||||
|
||||
return num, handle_decimal(num)
|
||||
|
||||
|
||||
# 处理日期类数据
|
||||
@@ -161,7 +171,3 @@ def handle_age(string):
|
||||
string = string.split("岁")[0]
|
||||
num = re.sub(r'\D', '', string)
|
||||
return num[-3:]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(handle_decimal(" "))
|
||||
7
util/string_util.py
Normal file
7
util/string_util.py
Normal file
@@ -0,0 +1,7 @@
|
||||
def blank(string):
|
||||
"""
|
||||
判断字符串是否为空或者纯空格
|
||||
:param string: 字符串
|
||||
:return: 字符串是否为空或者纯空格
|
||||
"""
|
||||
return not string or string.isspace()
|
||||
129
util/util.py
129
util/util.py
@@ -2,6 +2,8 @@ import logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
from util import string_util
|
||||
|
||||
|
||||
# 获取yyyy-MM-dd HH:mm:ss格式的当前时间
|
||||
def get_default_datetime():
|
||||
@@ -76,3 +78,130 @@ def zoom_rectangle(rectangle, ratio):
|
||||
x2 = round(x2 + x2 * ratio)
|
||||
y2 = round(y2 + y2 * ratio)
|
||||
return [x1, y1, x2, y2]
|
||||
|
||||
|
||||
def chinese_to_money_unit(chinese):
|
||||
if chinese in ["拾", "十"]:
|
||||
return 10, False
|
||||
elif chinese in ["佰", "百"]:
|
||||
return 100, False
|
||||
elif chinese in ["仟", "千"]:
|
||||
return 1000, False
|
||||
elif chinese == "万":
|
||||
return 10000, True
|
||||
elif chinese == "亿":
|
||||
return 100000000, True
|
||||
else:
|
||||
return None, False
|
||||
|
||||
|
||||
def chinese_char_to_number(chinese):
|
||||
if chinese == "零":
|
||||
return 0
|
||||
elif chinese in ["一", "壹"]:
|
||||
return 1
|
||||
elif chinese in ["二", "贰"]:
|
||||
return 2
|
||||
elif chinese in ["三", "叁"]:
|
||||
return 3
|
||||
elif chinese in ["四", "肆"]:
|
||||
return 4
|
||||
elif chinese in ["五", "伍"]:
|
||||
return 5
|
||||
elif chinese in ["六", "陆"]:
|
||||
return 6
|
||||
elif chinese in ["七", "柒"]:
|
||||
return 7
|
||||
elif chinese in ["八", "捌"]:
|
||||
return 8
|
||||
elif chinese in ["九", "玖"]:
|
||||
return 9
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
def chinese_to_number(chinese):
|
||||
length = len(chinese)
|
||||
result = 0
|
||||
section = 0
|
||||
number = 0
|
||||
unit = [None, False]
|
||||
for i in range(length):
|
||||
c = chinese[i]
|
||||
num = chinese_char_to_number(c)
|
||||
if num >= 0:
|
||||
if num == 0:
|
||||
if number > 0 and unit[0] != None:
|
||||
section += number * (unit[0] / 10)
|
||||
unit = [None, False]
|
||||
elif number > 0:
|
||||
raise ValueError(f"Bad number '{chinese[i - 1]}{c}' at: {i}")
|
||||
number = num
|
||||
else:
|
||||
unit = chinese_to_money_unit(c)
|
||||
if unit[0] == None:
|
||||
raise ValueError(f"Unknown unit '{c}' at: {i}")
|
||||
if unit[1]:
|
||||
section = (section + number) * unit[0]
|
||||
result += section
|
||||
section = 0
|
||||
else:
|
||||
unitNumber = number
|
||||
if number == 0 and i == 0:
|
||||
unitNumber = 1
|
||||
|
||||
section += unitNumber * unit[0]
|
||||
number = 0
|
||||
|
||||
if number > 0 and unit[0] != None:
|
||||
number *= unit[0] / 10
|
||||
return result + section + number
|
||||
|
||||
|
||||
def chinese_money_to_number(chinese_money_amount):
|
||||
if string_util.blank(chinese_money_amount):
|
||||
return None
|
||||
yi = chinese_money_amount.find("元")
|
||||
if yi == -1:
|
||||
yi = chinese_money_amount.find("圆")
|
||||
ji = chinese_money_amount.find("角")
|
||||
fi = chinese_money_amount.find("分")
|
||||
y_str = None
|
||||
if yi > 0:
|
||||
y_str = chinese_money_amount[0:yi]
|
||||
|
||||
j_str = None
|
||||
if ji > 0:
|
||||
if yi >= 0:
|
||||
if ji > yi:
|
||||
j_str = chinese_money_amount[yi + 1:ji]
|
||||
else:
|
||||
j_str = chinese_money_amount[0:ji]
|
||||
|
||||
f_str = None
|
||||
if fi > 0:
|
||||
if ji >= 0:
|
||||
if fi > ji:
|
||||
f_str = chinese_money_amount[ji + 1:fi]
|
||||
elif yi > 0:
|
||||
if fi > yi:
|
||||
f_str = chinese_money_amount[yi + 1:fi]
|
||||
else:
|
||||
f_str = chinese_money_amount[0: fi]
|
||||
|
||||
y = 0
|
||||
j = 0
|
||||
f = 0
|
||||
if not string_util.blank(y_str):
|
||||
y = chinese_to_number(y_str)
|
||||
|
||||
if not string_util.blank(j_str):
|
||||
j = chinese_to_number(j_str)
|
||||
|
||||
if not string_util.blank(f_str):
|
||||
f = chinese_to_number(f_str)
|
||||
|
||||
amount = y
|
||||
amount += j / 10
|
||||
amount += f / 100
|
||||
return round(amount, 2)
|
||||
|
||||
Reference in New Issue
Block a user