From 0e17f2b9aa8d4507ee2950b65362ac98b592a33b Mon Sep 17 00:00:00 2001 From: liuyebo <1515783401@qq.com> Date: Wed, 21 Aug 2024 12:48:16 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E2=80=9C=E7=97=85=E5=8C=BA?= =?UTF-8?q?=E2=80=9D=E7=9A=84=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker-compose.yml | 2 +- photo_review/__init__.py | 5 +---- util/data_util.py | 1 + 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index fdf8fb3..5453658 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,6 @@ x-env: &template - image: fcb_photo_review:1.12.12 + image: fcb_photo_review:1.12.13 restart: always services: diff --git a/photo_review/__init__.py b/photo_review/__init__.py index ed509f4..7d5a9fd 100644 --- a/photo_review/__init__.py +++ b/photo_review/__init__.py @@ -85,19 +85,16 @@ DEPARTMENT_ALIAS = { # 默认会过滤单字 HOSPITAL_FILTER = ["医院", "人民", "第一", "第二", "第三", "大学", "附属"] -DEPARTMENT_FILTER = ["医", "病", "病区", "伤", "西", "新"] +DEPARTMENT_FILTER = ["医", "伤", "西", "新"] """ 分词配置 """ jieba.suggest_freq(("肿瘤", "医院"), True) -jieba.suggest_freq(("心血管", "病"), True) jieba.suggest_freq(("骨", "伤"), True) jieba.suggest_freq(("感染", "性"), True) jieba.suggest_freq(("胆", "道"), True) jieba.suggest_freq(("脾", "胃"), True) -jieba.suggest_freq(("肺", "病"), True) -jieba.add_word("病区", 10000) """ 模型配置 diff --git a/util/data_util.py b/util/data_util.py index ee0d5c9..1504b5c 100644 --- a/util/data_util.py +++ b/util/data_util.py @@ -106,6 +106,7 @@ def parse_department(string): string = string.replace(")", "").replace(")", "").replace("(", " ").replace("(", " ") # 去除括号 string = re.sub(r'[^⺀-鿿 ]', '', string) # 去除非汉字字符,除了空格 string = re.sub(r'[一二三四五六七八九十]', '', string) # 去除中文数字 + string = string.replace("病区", "").replace("病", "") # 去除常见的无意义词 string = string.replace("科", " ") # 分离科室 departments = string.strip().split(" ") for department in departments: