From d5181c33b84b7d573db22bd2acc18194adc98b6b Mon Sep 17 00:00:00 2001
From: liuyebo <1515783401@qq.com>
Date: Tue, 20 Aug 2024 16:54:36 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E7=A7=91=E5=AE=A4=E7=9A=84?=
 =?UTF-8?q?=E5=8C=B9=E9=85=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 photo_review/photo_review.py | 45 +++++++++++++++++++++++++++---------
 util/data_util.py            | 30 ++++++++----------------
 2 files changed, 44 insertions(+), 31 deletions(-)

diff --git a/photo_review/photo_review.py b/photo_review/photo_review.py
index 69fccce..e4f1e38 100644
--- a/photo_review/photo_review.py
+++ b/photo_review/photo_review.py
@@ -221,6 +221,22 @@ def search_hospital(hospital):
     return best_match
 
 
+def search_department(department):
+    cut_list = jieba.lcut(department)
+    session = MysqlSession()
+    ylks = session.query(BdYlks.pk_ylks, BdYlks.name).filter(BdYlks.name.like(f"%{'%'.join(cut_list)}%")).all()
+    if not ylks:
+        filter_keywords = cut_list
+        for filter_keyword in filter_keywords:
+            ylks = session.query(BdYlks.pk_ylks, BdYlks.name).filter(BdYlks.name.like(f"%{filter_keyword}%")).all()
+            if ylks:
+                break
+    session.close()
+    ylks = {row.pk_ylks: row.name for row in ylks}
+    best_match = process.extractOne(department, ylks, scorer=fuzz.partial_token_set_ratio)
+    return best_match
+
+
 def settlement_task(pk_phhd, settlement_list, identity):
     settlement_list_ie_result = information_extraction(SETTLEMENT_IE, settlement_list, identity)
     settlement_data = {
@@ -299,17 +315,24 @@ def discharge_task(pk_phhd, discharge_record, identity):
         if best_match:
             discharge_data["pk_yljg"] = best_match[2]
     if departments:
-        department_values = []
-        for dept in departments:
-            department_values += parse_department(dept)
-        department_values = list(set(department_values))
-        if department_values:
-            session = MysqlSession()
-            ylks = session.query(BdYlks.pk_ylks, BdYlks.name) \
-                .filter(BdYlks.name.in_(department_values)).limit(1).one_or_none()
-            session.close()
-            if ylks:
-                discharge_data["pk_ylks"] = ylks.pk_ylks
+        match_departments = []
+        for department in departments:
+            parsed_departments = parse_department(department)
+            for parsed_department in parsed_departments:
+                search_result = search_department(parsed_department)
+                match_departments.append(search_result)
+                if search_result and search_result[1] == 100:
+                    break
+        best_match = None
+        best_score = 0
+        for match_department in match_departments:
+            if match_department and match_department[1] > best_score:
+                best_match = match_department
+                best_score = match_department[1]
+                if best_score == 100:
+                    break
+        if best_match:
+            discharge_data["pk_ylks"] = best_match[2]
     save_or_update_ie(ZxIeDischarge, pk_phhd, discharge_data)
 
 
diff --git a/util/data_util.py b/util/data_util.py
index 9f20831..ee0d5c9 100644
--- a/util/data_util.py
+++ b/util/data_util.py
@@ -102,26 +102,16 @@ def parse_department(string):
     result = []
     if not string:
         return result
-    string = re.sub(r'\([^()]*\)|\[[^\[\]]*]|\{[^{}]*}|（[^（）]*）|[^⺀-鿿]', '', string)[:255]
-    if string == "科":
-        return result
-    result.append(string)
-    string_without_num = re.sub(r'\d|一|二|三|四|五|六|七|八|九|十', '', string)
-    if string == "科":
-        return result
-    if string_without_num != string:
-        result.append(string_without_num)
-    pure_string = string_without_num.split("科")[0] + "科"
-    if string == "科":
-        return result
-    if pure_string != string_without_num:
-        result.append(pure_string)
-    pure_string_without_io = pure_string.replace("内", "").replace("外", "")
-    if string == "科":
-        return result
-    if pure_string_without_io != pure_string:
-        result.append(pure_string)
-    return result
+
+    string = string.replace(")", "").replace("）", "").replace("(", " ").replace("（", " ")  # 去除括号
+    string = re.sub(r'[^⺀-鿿 ]', '', string)  # 去除非汉字字符，除了空格
+    string = re.sub(r'[一二三四五六七八九十]', '', string)  # 去除中文数字
+    string = string.replace("科", " ")  # 分离科室
+    departments = string.strip().split(" ")
+    for department in departments:
+        if department:
+            result.append(department)
+    return set(result)
 
 
 # 处理姓名类数据