优化科室的匹配

This commit is contained in:
2024-08-20 16:54:36 +08:00
parent 896d2aaf9b
commit d5181c33b8
2 changed files with 44 additions and 31 deletions

View File

@@ -221,6 +221,22 @@ def search_hospital(hospital):
return best_match return best_match
def search_department(department):
cut_list = jieba.lcut(department)
session = MysqlSession()
ylks = session.query(BdYlks.pk_ylks, BdYlks.name).filter(BdYlks.name.like(f"%{'%'.join(cut_list)}%")).all()
if not ylks:
filter_keywords = cut_list
for filter_keyword in filter_keywords:
ylks = session.query(BdYlks.pk_ylks, BdYlks.name).filter(BdYlks.name.like(f"%{filter_keyword}%")).all()
if ylks:
break
session.close()
ylks = {row.pk_ylks: row.name for row in ylks}
best_match = process.extractOne(department, ylks, scorer=fuzz.partial_token_set_ratio)
return best_match
def settlement_task(pk_phhd, settlement_list, identity): def settlement_task(pk_phhd, settlement_list, identity):
settlement_list_ie_result = information_extraction(SETTLEMENT_IE, settlement_list, identity) settlement_list_ie_result = information_extraction(SETTLEMENT_IE, settlement_list, identity)
settlement_data = { settlement_data = {
@@ -299,17 +315,24 @@ def discharge_task(pk_phhd, discharge_record, identity):
if best_match: if best_match:
discharge_data["pk_yljg"] = best_match[2] discharge_data["pk_yljg"] = best_match[2]
if departments: if departments:
department_values = [] match_departments = []
for dept in departments: for department in departments:
department_values += parse_department(dept) parsed_departments = parse_department(department)
department_values = list(set(department_values)) for parsed_department in parsed_departments:
if department_values: search_result = search_department(parsed_department)
session = MysqlSession() match_departments.append(search_result)
ylks = session.query(BdYlks.pk_ylks, BdYlks.name) \ if search_result and search_result[1] == 100:
.filter(BdYlks.name.in_(department_values)).limit(1).one_or_none() break
session.close() best_match = None
if ylks: best_score = 0
discharge_data["pk_ylks"] = ylks.pk_ylks for match_department in match_departments:
if match_department and match_department[1] > best_score:
best_match = match_department
best_score = match_department[1]
if best_score == 100:
break
if best_match:
discharge_data["pk_ylks"] = best_match[2]
save_or_update_ie(ZxIeDischarge, pk_phhd, discharge_data) save_or_update_ie(ZxIeDischarge, pk_phhd, discharge_data)

View File

@@ -102,26 +102,16 @@ def parse_department(string):
result = [] result = []
if not string: if not string:
return result return result
string = re.sub(r'\([^()]*\)|\[[^\[\]]*]|\{[^{}]*}|[^]*|[^⺀-鿿]', '', string)[:255]
if string == "": string = string.replace(")", "").replace("", "").replace("(", " ").replace("", " ") # 去除括号
return result string = re.sub(r'[^⺀-鿿 ]', '', string) # 去除非汉字字符,除了空格
result.append(string) string = re.sub(r'[一二三四五六七八九十]', '', string) # 去除中文数字
string_without_num = re.sub(r'\d|一|二|三|四|五|六|七|八|九|十', '', string) string = string.replace("", " ") # 分离科室
if string == "": departments = string.strip().split(" ")
return result for department in departments:
if string_without_num != string: if department:
result.append(string_without_num) result.append(department)
pure_string = string_without_num.split("")[0] + "" return set(result)
if string == "":
return result
if pure_string != string_without_num:
result.append(pure_string)
pure_string_without_io = pure_string.replace("", "").replace("", "")
if string == "":
return result
if pure_string_without_io != pure_string:
result.append(pure_string)
return result
# 处理姓名类数据 # 处理姓名类数据