diff --git a/photo_review/util/data_util.py b/photo_review/util/data_util.py index dab8d85..3d581a0 100644 --- a/photo_review/util/data_util.py +++ b/photo_review/util/data_util.py @@ -64,18 +64,19 @@ def handle_date(string): def handle_department(string): result = [] + max_length = 255 if not string: return result - result.append(string) + result.append(string[:max_length]) string_without_num = re.sub(r'\d|一|二|三|四|五|六|七|八|九|十', '', string) if string_without_num != string: - result.append(string_without_num) + result.append(string_without_num[:max_length]) string_without_brackets = re.sub(r'\([^()]*\)|\[[^\[\]]*\]|\{[^\{\}]*\}|([^()]*)', "", string_without_num) if string_without_brackets != string_without_num: - result.append(string_without_brackets) + result.append(string_without_brackets[:max_length]) pure_string = string_without_brackets.split("科")[0] + "科" if pure_string != string_without_brackets: - result.append(pure_string) + result.append(pure_string[:max_length]) return result @@ -83,14 +84,14 @@ def handle_department(string): def handle_name(string): if not string: return "" - return re.sub(r'[^⺀-鿿·]', '', string) + return re.sub(r'[^⺀-鿿·]', '', string)[:30] # 处理医保类型数据 def handle_insurance_type(string): if not string: return "" - return string.replace(":", "").replace(":", "") + return string.replace(":", "").replace(":", "")[:255] # 处理原始数据