优化“病区”的处理
This commit is contained in:
@@ -106,6 +106,7 @@ def parse_department(string):
|
||||
string = string.replace(")", "").replace(")", "").replace("(", " ").replace("(", " ") # 去除括号
|
||||
string = re.sub(r'[^⺀-鿿 ]', '', string) # 去除非汉字字符,除了空格
|
||||
string = re.sub(r'[一二三四五六七八九十]', '', string) # 去除中文数字
|
||||
string = string.replace("病区", "").replace("病", "") # 去除常见的无意义词
|
||||
string = string.replace("科", " ") # 分离科室
|
||||
departments = string.strip().split(" ")
|
||||
for department in departments:
|
||||
|
||||
Reference in New Issue
Block a user