使用分词模糊查询优化医院的匹配
This commit is contained in:
@@ -7,15 +7,17 @@ from collections import defaultdict
|
||||
from time import sleep
|
||||
|
||||
import cv2
|
||||
import jieba
|
||||
import requests
|
||||
from sqlalchemy import update, or_
|
||||
from rapidfuzz import process, fuzz
|
||||
from sqlalchemy import update
|
||||
|
||||
from db import MysqlSession
|
||||
from db.mysql import BdYljg, BdYlks, ZxIeResult, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec
|
||||
from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
|
||||
PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \
|
||||
ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \
|
||||
UPPERCASE_MEDICAL_EXPENSES, HOSTNAME, HOSPITAL_ALIAS
|
||||
UPPERCASE_MEDICAL_EXPENSES, HOSTNAME, HOSPITAL_ALIAS, HOSPITAL_FILTER
|
||||
from ucloud import ufile
|
||||
from util import image_util, util
|
||||
from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \
|
||||
@@ -189,6 +191,36 @@ def save_or_update_ie(table, pk_phhd, data):
|
||||
session.close()
|
||||
|
||||
|
||||
def search_hospital(hospital):
|
||||
def _filter_search_keywords(keywords):
|
||||
keywords = [x for x in keywords if x not in HOSPITAL_FILTER]
|
||||
result1 = ""
|
||||
result2 = ""
|
||||
for keyword in keywords:
|
||||
if "医院" in keyword:
|
||||
break
|
||||
result2 = result1
|
||||
result1 = keyword
|
||||
result = [result1]
|
||||
if result2:
|
||||
result.append(result2)
|
||||
return result
|
||||
|
||||
cut_list = jieba.lcut(hospital)
|
||||
session = MysqlSession()
|
||||
yljg = session.query(BdYljg.pk_yljg, BdYljg.name).filter(BdYljg.name.like(f"%{'%'.join(cut_list)}%")).all()
|
||||
if not yljg:
|
||||
filter_keywords = _filter_search_keywords(cut_list)
|
||||
for filter_keyword in filter_keywords:
|
||||
yljg = session.query(BdYljg.pk_yljg, BdYljg.name).filter(BdYljg.name.like(f"%{filter_keyword}%")).all()
|
||||
if yljg:
|
||||
break
|
||||
session.close()
|
||||
yljg = {row.pk_yljg: row.name for row in yljg}
|
||||
best_match = process.extractOne(hospital, yljg, scorer=fuzz.partial_token_set_ratio)
|
||||
return best_match
|
||||
|
||||
|
||||
def settlement_task(pk_phhd, settlement_list, identity):
|
||||
settlement_list_ie_result = information_extraction(SETTLEMENT_IE, settlement_list, identity)
|
||||
settlement_data = {
|
||||
@@ -241,25 +273,31 @@ def discharge_task(pk_phhd, discharge_record, identity):
|
||||
discharge_data["discharge_date"] = handle_date(discharge_data["discharge_date_str"])
|
||||
|
||||
if hospitals:
|
||||
hospital_like_conditions = []
|
||||
match_hospitals = []
|
||||
for hospital in hospitals:
|
||||
parsed_hospitals = parse_hospital(hospital)
|
||||
for parsed_hospital in parsed_hospitals:
|
||||
hospital_index = parsed_hospital.find("医院")
|
||||
if hospital_index != -1 and hospital_index != len(parsed_hospital) - 2:
|
||||
parsed_hospital = parsed_hospital[:hospital_index + 2] + "%" + parsed_hospital[hospital_index + 2:]
|
||||
hospital_like_conditions.append(BdYljg.name.like(f'%{parsed_hospital}%'))
|
||||
search_result = search_hospital(parsed_hospital)
|
||||
match_hospitals.append(search_result)
|
||||
if search_result and search_result[1] == 100:
|
||||
break
|
||||
for hospital_alias_key in HOSPITAL_ALIAS.keys():
|
||||
if hospital_alias_key in parsed_hospital:
|
||||
for hospital_alias in HOSPITAL_ALIAS[hospital_alias_key]:
|
||||
new_hospital = parsed_hospital.replace(hospital_alias_key, hospital_alias)
|
||||
hospital_like_conditions.append(BdYljg.name.like(f'%{new_hospital}%'))
|
||||
match_hospitals.append(search_hospital(new_hospital))
|
||||
break
|
||||
session = MysqlSession()
|
||||
yljg = session.query(BdYljg.pk_yljg, BdYljg.name).filter(or_(*hospital_like_conditions)).limit(1).one_or_none()
|
||||
session.close()
|
||||
if yljg:
|
||||
discharge_data["pk_yljg"] = yljg.pk_yljg
|
||||
best_match = None
|
||||
best_score = 0
|
||||
for match_hospital in match_hospitals:
|
||||
if match_hospital and match_hospital[1] > best_score:
|
||||
best_match = match_hospital
|
||||
best_score = match_hospital[1]
|
||||
if best_score == 100:
|
||||
break
|
||||
|
||||
if best_match:
|
||||
discharge_data["pk_yljg"] = best_match[2]
|
||||
if departments:
|
||||
department_values = []
|
||||
for dept in departments:
|
||||
|
||||
Reference in New Issue
Block a user