优化科室的匹配

2024-08-20 16:54:36 +08:00
parent 896d2aaf9b
commit d5181c33b8
2 changed files with 44 additions and 31 deletions
--- a/util/data_util.py
+++ b/util/data_util.py
@@ -102,26 +102,16 @@ def parse_department(string):
    result = []
    if not string:
        return result
-    string = re.sub(r'\([^()]*\)|\[[^\[\]]*]|\{[^{}]*}|（[^（）]*）|[^⺀-鿿]', '', string)[:255]
-    if string == "科":
-        return result
-    result.append(string)
-    string_without_num = re.sub(r'\d|一|二|三|四|五|六|七|八|九|十', '', string)
-    if string == "科":
-        return result
-    if string_without_num != string:
-        result.append(string_without_num)
-    pure_string = string_without_num.split("科")[0] + "科"
-    if string == "科":
-        return result
-    if pure_string != string_without_num:
-        result.append(pure_string)
-    pure_string_without_io = pure_string.replace("内", "").replace("外", "")
-    if string == "科":
-        return result
-    if pure_string_without_io != pure_string:
-        result.append(pure_string)
-    return result
+
+    string = string.replace(")", "").replace("）", "").replace("(", " ").replace("（", " ")  # 去除括号
+    string = re.sub(r'[^⺀-鿿 ]', '', string)  # 去除非汉字字符，除了空格
+    string = re.sub(r'[一二三四五六七八九十]', '', string)  # 去除中文数字
+    string = string.replace("科", " ")  # 分离科室
+    departments = string.strip().split(" ")
+    for department in departments:
+        if department:
+            result.append(department)
+    return set(result)


 # 处理姓名类数据