优化科室的匹配
This commit is contained in:
@@ -102,26 +102,16 @@ def parse_department(string):
|
||||
result = []
|
||||
if not string:
|
||||
return result
|
||||
string = re.sub(r'\([^()]*\)|\[[^\[\]]*]|\{[^{}]*}|([^()]*)|[^⺀-鿿]', '', string)[:255]
|
||||
if string == "科":
|
||||
return result
|
||||
result.append(string)
|
||||
string_without_num = re.sub(r'\d|一|二|三|四|五|六|七|八|九|十', '', string)
|
||||
if string == "科":
|
||||
return result
|
||||
if string_without_num != string:
|
||||
result.append(string_without_num)
|
||||
pure_string = string_without_num.split("科")[0] + "科"
|
||||
if string == "科":
|
||||
return result
|
||||
if pure_string != string_without_num:
|
||||
result.append(pure_string)
|
||||
pure_string_without_io = pure_string.replace("内", "").replace("外", "")
|
||||
if string == "科":
|
||||
return result
|
||||
if pure_string_without_io != pure_string:
|
||||
result.append(pure_string)
|
||||
return result
|
||||
|
||||
string = string.replace(")", "").replace(")", "").replace("(", " ").replace("(", " ") # 去除括号
|
||||
string = re.sub(r'[^⺀-鿿 ]', '', string) # 去除非汉字字符,除了空格
|
||||
string = re.sub(r'[一二三四五六七八九十]', '', string) # 去除中文数字
|
||||
string = string.replace("科", " ") # 分离科室
|
||||
departments = string.strip().split(" ")
|
||||
for department in departments:
|
||||
if department:
|
||||
result.append(department)
|
||||
return set(result)
|
||||
|
||||
|
||||
# 处理姓名类数据
|
||||
|
||||
Reference in New Issue
Block a user