优化科室的匹配

This commit is contained in:
2024-08-21 10:21:23 +08:00
parent 2f3b7e7dc0
commit 7fbcf7d268
3 changed files with 23 additions and 6 deletions

View File

@@ -1,6 +1,6 @@
x-env: x-env:
&template &template
image: fcb_photo_review:1.12.9 image: fcb_photo_review:1.12.10
restart: always restart: always
services: services:

View File

@@ -75,7 +75,7 @@ HOSPITAL_ALIAS = {
"漂水": ["溧水"], "漂水": ["溧水"],
} }
DEPARTMENT_ALIAS = { DEPARTMENT_ALIAS = {
"耳鼻喉": ["耳鼻咽喉"] "耳鼻喉": ["耳鼻咽喉"],
} }
""" """
@@ -84,10 +84,16 @@ DEPARTMENT_ALIAS = {
# 默认会过滤单字 # 默认会过滤单字
HOSPITAL_FILTER = ["医院", "人民", "第一", "第二", "第三", "大学", "附属"] HOSPITAL_FILTER = ["医院", "人民", "第一", "第二", "第三", "大学", "附属"]
DEPARTMENT_FILTER = ["", "", "病区", ""]
""" """
分词配置 分词配置
""" """
jieba.suggest_freq(('肿瘤', '医院'), True) jieba.suggest_freq(('肿瘤', '医院'), True)
jieba.suggest_freq(('心血管', ''), True)
jieba.suggest_freq(('', ''), True)
jieba.suggest_freq(('感染', ''), True)
jieba.suggest_freq(('', ''), True)
""" """
模型配置 模型配置

View File

@@ -17,7 +17,7 @@ from db.mysql import BdYljg, BdYlks, ZxIeResult, ZxIeCost, ZxIeDischarge, ZxIeSe
from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \ from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \ PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \
ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \ ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \
UPPERCASE_MEDICAL_EXPENSES, HOSTNAME, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS UPPERCASE_MEDICAL_EXPENSES, HOSTNAME, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, DEPARTMENT_FILTER
from ucloud import ufile from ucloud import ufile
from util import image_util, util from util import image_util, util
from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \ from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \
@@ -206,7 +206,7 @@ def search_hospital(hospital):
result.append(result2) result.append(result2)
return result return result
cut_list = jieba.lcut(hospital) cut_list = jieba.lcut(hospital, HMM=False)
session = MysqlSession() session = MysqlSession()
yljg = session.query(BdYljg.pk_yljg, BdYljg.name).filter(BdYljg.name.like(f"%{'%'.join(cut_list)}%")).all() yljg = session.query(BdYljg.pk_yljg, BdYljg.name).filter(BdYljg.name.like(f"%{'%'.join(cut_list)}%")).all()
if not yljg: if not yljg:
@@ -222,8 +222,15 @@ def search_hospital(hospital):
def search_department(department): def search_department(department):
cut_list = jieba.lcut(department) def _filter_search_keywords(keywords):
keywords = [x for x in keywords if x not in DEPARTMENT_FILTER]
return keywords
cut_list = jieba.lcut(department, HMM=False)
session = MysqlSession() session = MysqlSession()
cut_list = _filter_search_keywords(cut_list)
if not cut_list:
return None
ylks = session.query(BdYlks.pk_ylks, BdYlks.name).filter(BdYlks.name.like(f"%{'%'.join(cut_list)}%")).all() ylks = session.query(BdYlks.pk_ylks, BdYlks.name).filter(BdYlks.name.like(f"%{'%'.join(cut_list)}%")).all()
if not ylks: if not ylks:
filter_keywords = cut_list filter_keywords = cut_list
@@ -233,7 +240,11 @@ def search_department(department):
break break
session.close() session.close()
ylks = {row.pk_ylks: row.name for row in ylks} ylks = {row.pk_ylks: row.name for row in ylks}
best_match = process.extractOne(department, ylks, scorer=fuzz.partial_token_set_ratio) best_match = process.extractOne(department, ylks, scorer=fuzz.token_ratio)
if best_match and best_match[0] in ["内科", "外科"]:
# 降低内科、外科的优先级
best_match = list(best_match)
best_match[1] -= 100
return best_match return best_match