优化费用总额的解析

This commit is contained in:
2024-07-20 15:52:16 +08:00
parent a71e64c24f
commit d7ad7380a5
5 changed files with 170 additions and 22 deletions

View File

@@ -1,6 +1,9 @@
import logging
import re
from datetime import datetime
from util import util
# 处理金额类数据
def handle_decimal(string):
@@ -8,21 +11,28 @@ def handle_decimal(string):
if not string:
return ""
if "." not in string:
front = string
back = ""
if len(string) > 2:
result = string[:-2] + "." + string[-2:]
else:
result = string
else:
front, back = string.rsplit('.', 1)
front = front.replace(".", "")
if back:
back = "." + back[:2]
result = front + back
return result[:16]
front = front[-16:]
if back:
back = "." + back
result = float(front + back)
# 金额较大的暂且交给人工确认
if result > 100000:
return ""
else:
return front + back
def parse_money(capital_num, num):
if capital_num:
try:
money = util.chinese_money_to_number(capital_num)
return capital_num, money
except Exception as e:
logging.warning("大写金额解析失败", exc_info=e)
return num, handle_decimal(num)
# 处理日期类数据
@@ -161,7 +171,3 @@ def handle_age(string):
string = string.split("")[0]
num = re.sub(r'\D', '', string)
return num[-3:]
if __name__ == '__main__':
print(handle_decimal(" "))

7
util/string_util.py Normal file
View File

@@ -0,0 +1,7 @@
def blank(string):
"""
判断字符串是否为空或者纯空格
:param string: 字符串
:return: 字符串是否为空或者纯空格
"""
return not string or string.isspace()

View File

@@ -2,6 +2,8 @@ import logging
import os
from datetime import datetime
from util import string_util
# 获取yyyy-MM-dd HH:mm:ss格式的当前时间
def get_default_datetime():
@@ -76,3 +78,130 @@ def zoom_rectangle(rectangle, ratio):
x2 = round(x2 + x2 * ratio)
y2 = round(y2 + y2 * ratio)
return [x1, y1, x2, y2]
def chinese_to_money_unit(chinese):
if chinese in ["", ""]:
return 10, False
elif chinese in ["", ""]:
return 100, False
elif chinese in ["", ""]:
return 1000, False
elif chinese == "":
return 10000, True
elif chinese == "亿":
return 100000000, True
else:
return None, False
def chinese_char_to_number(chinese):
if chinese == "":
return 0
elif chinese in ["", ""]:
return 1
elif chinese in ["", ""]:
return 2
elif chinese in ["", ""]:
return 3
elif chinese in ["", ""]:
return 4
elif chinese in ["", ""]:
return 5
elif chinese in ["", ""]:
return 6
elif chinese in ["", ""]:
return 7
elif chinese in ["", ""]:
return 8
elif chinese in ["", ""]:
return 9
else:
return -1
def chinese_to_number(chinese):
length = len(chinese)
result = 0
section = 0
number = 0
unit = [None, False]
for i in range(length):
c = chinese[i]
num = chinese_char_to_number(c)
if num >= 0:
if num == 0:
if number > 0 and unit[0] != None:
section += number * (unit[0] / 10)
unit = [None, False]
elif number > 0:
raise ValueError(f"Bad number '{chinese[i - 1]}{c}' at: {i}")
number = num
else:
unit = chinese_to_money_unit(c)
if unit[0] == None:
raise ValueError(f"Unknown unit '{c}' at: {i}")
if unit[1]:
section = (section + number) * unit[0]
result += section
section = 0
else:
unitNumber = number
if number == 0 and i == 0:
unitNumber = 1
section += unitNumber * unit[0]
number = 0
if number > 0 and unit[0] != None:
number *= unit[0] / 10
return result + section + number
def chinese_money_to_number(chinese_money_amount):
if string_util.blank(chinese_money_amount):
return None
yi = chinese_money_amount.find("")
if yi == -1:
yi = chinese_money_amount.find("")
ji = chinese_money_amount.find("")
fi = chinese_money_amount.find("")
y_str = None
if yi > 0:
y_str = chinese_money_amount[0:yi]
j_str = None
if ji > 0:
if yi >= 0:
if ji > yi:
j_str = chinese_money_amount[yi + 1:ji]
else:
j_str = chinese_money_amount[0:ji]
f_str = None
if fi > 0:
if ji >= 0:
if fi > ji:
f_str = chinese_money_amount[ji + 1:fi]
elif yi > 0:
if fi > yi:
f_str = chinese_money_amount[yi + 1:fi]
else:
f_str = chinese_money_amount[0: fi]
y = 0
j = 0
f = 0
if not string_util.blank(y_str):
y = chinese_to_number(y_str)
if not string_util.blank(j_str):
j = chinese_to_number(j_str)
if not string_util.blank(f_str):
f = chinese_to_number(f_str)
amount = y
amount += j / 10
amount += f / 100
return round(amount, 2)