优化科室的匹配

This commit is contained in:
2024-08-20 16:54:36 +08:00
parent 896d2aaf9b
commit d5181c33b8
2 changed files with 44 additions and 31 deletions

View File

@@ -102,26 +102,16 @@ def parse_department(string):
result = []
if not string:
return result
string = re.sub(r'\([^()]*\)|\[[^\[\]]*]|\{[^{}]*}|[^]*|[^⺀-鿿]', '', string)[:255]
if string == "":
return result
result.append(string)
string_without_num = re.sub(r'\d|一|二|三|四|五|六|七|八|九|十', '', string)
if string == "":
return result
if string_without_num != string:
result.append(string_without_num)
pure_string = string_without_num.split("")[0] + ""
if string == "":
return result
if pure_string != string_without_num:
result.append(pure_string)
pure_string_without_io = pure_string.replace("", "").replace("", "")
if string == "":
return result
if pure_string_without_io != pure_string:
result.append(pure_string)
return result
string = string.replace(")", "").replace("", "").replace("(", " ").replace("", " ") # 去除括号
string = re.sub(r'[^⺀-鿿 ]', '', string) # 去除非汉字字符,除了空格
string = re.sub(r'[一二三四五六七八九十]', '', string) # 去除中文数字
string = string.replace("", " ") # 分离科室
departments = string.strip().split(" ")
for department in departments:
if department:
result.append(department)
return set(result)
# 处理姓名类数据