使用分词模糊查询优化医院的匹配

This commit is contained in:
2024-08-16 16:04:59 +08:00
parent 729167abca
commit 478f98abfd
4 changed files with 68 additions and 19 deletions

View File

@@ -1,5 +1,6 @@
import socket
import jieba
from paddlenlp import Taskflow
from paddleocr import PaddleOCR
@@ -67,14 +68,23 @@ COST_LIST_SCHEMA = PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPE
"""
# 使用别名中的value替换key。考虑到效率问题只会替换第一个匹配到的key。
HOSPITAL_ALIAS = {
"江阴": ["江阴市"],
"溧阳": ["溧阳市"],
"六合": ["六合区"],
"沐阳": ["沭阳"],
"连水": ["涟水"],
"中医医院": ["中医院"],
"唯宁": ["睢宁"],
"九〇四": ["904"],
"漂水": ["溧水"],
}
"""
搜索过滤配置
"""
HOSPITAL_FILTER = ["医院", "", "", "", "", "", "人民", "第一", "第二", "第三", "大学", "附属"]
"""
分词配置
"""
jieba.suggest_freq(('肿瘤', '医院'), True)
"""
模型配置
"""