优化“病区”的处理

This commit is contained in:
2024-08-21 12:48:16 +08:00
parent 2ae43d6e88
commit 0e17f2b9aa
3 changed files with 3 additions and 5 deletions

View File

@@ -1,6 +1,6 @@
x-env:
&template
image: fcb_photo_review:1.12.12
image: fcb_photo_review:1.12.13
restart: always
services:

View File

@@ -85,19 +85,16 @@ DEPARTMENT_ALIAS = {
# 默认会过滤单字
HOSPITAL_FILTER = ["医院", "人民", "第一", "第二", "第三", "大学", "附属"]
DEPARTMENT_FILTER = ["", "", "病区", "", "西", ""]
DEPARTMENT_FILTER = ["", "", "西", ""]
"""
分词配置
"""
jieba.suggest_freq(("肿瘤", "医院"), True)
jieba.suggest_freq(("心血管", ""), True)
jieba.suggest_freq(("", ""), True)
jieba.suggest_freq(("感染", ""), True)
jieba.suggest_freq(("", ""), True)
jieba.suggest_freq(("", ""), True)
jieba.suggest_freq(("", ""), True)
jieba.add_word("病区", 10000)
"""
模型配置

View File

@@ -106,6 +106,7 @@ def parse_department(string):
string = string.replace(")", "").replace("", "").replace("(", " ").replace("", " ") # 去除括号
string = re.sub(r'[^⺀-鿿 ]', '', string) # 去除非汉字字符,除了空格
string = re.sub(r'[一二三四五六七八九十]', '', string) # 去除中文数字
string = string.replace("病区", "").replace("", "") # 去除常见的无意义词
string = string.replace("", " ") # 分离科室
departments = string.strip().split(" ")
for department in departments: