使用分词模糊查询优化医院的匹配
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
x-env:
|
x-env:
|
||||||
&template
|
&template
|
||||||
image: fcb_photo_review:1.12.6
|
image: fcb_photo_review:1.12.7
|
||||||
restart: always
|
restart: always
|
||||||
|
|
||||||
services:
|
services:
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import socket
|
import socket
|
||||||
|
|
||||||
|
import jieba
|
||||||
from paddlenlp import Taskflow
|
from paddlenlp import Taskflow
|
||||||
from paddleocr import PaddleOCR
|
from paddleocr import PaddleOCR
|
||||||
|
|
||||||
@@ -67,14 +68,23 @@ COST_LIST_SCHEMA = PATIENT_NAME + ADMISSION_DATE + DISCHARGE_DATE + MEDICAL_EXPE
|
|||||||
"""
|
"""
|
||||||
# 使用别名中的value替换key。考虑到效率问题,只会替换第一个匹配到的key。
|
# 使用别名中的value替换key。考虑到效率问题,只会替换第一个匹配到的key。
|
||||||
HOSPITAL_ALIAS = {
|
HOSPITAL_ALIAS = {
|
||||||
"江阴": ["江阴市"],
|
|
||||||
"溧阳": ["溧阳市"],
|
|
||||||
"六合": ["六合区"],
|
|
||||||
"沐阳": ["沭阳"],
|
"沐阳": ["沭阳"],
|
||||||
"连水": ["涟水"],
|
"连水": ["涟水"],
|
||||||
"中医医院": ["中医院"],
|
"唯宁": ["睢宁"],
|
||||||
|
"九〇四": ["904"],
|
||||||
|
"漂水": ["溧水"],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
"""
|
||||||
|
搜索过滤配置
|
||||||
|
"""
|
||||||
|
HOSPITAL_FILTER = ["医院", "省", "市", "县", "区", "州", "人民", "第一", "第二", "第三", "大学", "附属"]
|
||||||
|
|
||||||
|
"""
|
||||||
|
分词配置
|
||||||
|
"""
|
||||||
|
jieba.suggest_freq(('肿瘤', '医院'), True)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
模型配置
|
模型配置
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -7,15 +7,17 @@ from collections import defaultdict
|
|||||||
from time import sleep
|
from time import sleep
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
|
import jieba
|
||||||
import requests
|
import requests
|
||||||
from sqlalchemy import update, or_
|
from rapidfuzz import process, fuzz
|
||||||
|
from sqlalchemy import update
|
||||||
|
|
||||||
from db import MysqlSession
|
from db import MysqlSession
|
||||||
from db.mysql import BdYljg, BdYlks, ZxIeResult, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec
|
from db.mysql import BdYljg, BdYlks, ZxIeResult, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec
|
||||||
from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
|
from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
|
||||||
PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \
|
PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \
|
||||||
ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \
|
ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \
|
||||||
UPPERCASE_MEDICAL_EXPENSES, HOSTNAME, HOSPITAL_ALIAS
|
UPPERCASE_MEDICAL_EXPENSES, HOSTNAME, HOSPITAL_ALIAS, HOSPITAL_FILTER
|
||||||
from ucloud import ufile
|
from ucloud import ufile
|
||||||
from util import image_util, util
|
from util import image_util, util
|
||||||
from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \
|
from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \
|
||||||
@@ -189,6 +191,36 @@ def save_or_update_ie(table, pk_phhd, data):
|
|||||||
session.close()
|
session.close()
|
||||||
|
|
||||||
|
|
||||||
|
def search_hospital(hospital):
|
||||||
|
def _filter_search_keywords(keywords):
|
||||||
|
keywords = [x for x in keywords if x not in HOSPITAL_FILTER]
|
||||||
|
result1 = ""
|
||||||
|
result2 = ""
|
||||||
|
for keyword in keywords:
|
||||||
|
if "医院" in keyword:
|
||||||
|
break
|
||||||
|
result2 = result1
|
||||||
|
result1 = keyword
|
||||||
|
result = [result1]
|
||||||
|
if result2:
|
||||||
|
result.append(result2)
|
||||||
|
return result
|
||||||
|
|
||||||
|
cut_list = jieba.lcut(hospital)
|
||||||
|
session = MysqlSession()
|
||||||
|
yljg = session.query(BdYljg.pk_yljg, BdYljg.name).filter(BdYljg.name.like(f"%{'%'.join(cut_list)}%")).all()
|
||||||
|
if not yljg:
|
||||||
|
filter_keywords = _filter_search_keywords(cut_list)
|
||||||
|
for filter_keyword in filter_keywords:
|
||||||
|
yljg = session.query(BdYljg.pk_yljg, BdYljg.name).filter(BdYljg.name.like(f"%{filter_keyword}%")).all()
|
||||||
|
if yljg:
|
||||||
|
break
|
||||||
|
session.close()
|
||||||
|
yljg = {row.pk_yljg: row.name for row in yljg}
|
||||||
|
best_match = process.extractOne(hospital, yljg, scorer=fuzz.partial_token_set_ratio)
|
||||||
|
return best_match
|
||||||
|
|
||||||
|
|
||||||
def settlement_task(pk_phhd, settlement_list, identity):
|
def settlement_task(pk_phhd, settlement_list, identity):
|
||||||
settlement_list_ie_result = information_extraction(SETTLEMENT_IE, settlement_list, identity)
|
settlement_list_ie_result = information_extraction(SETTLEMENT_IE, settlement_list, identity)
|
||||||
settlement_data = {
|
settlement_data = {
|
||||||
@@ -241,25 +273,31 @@ def discharge_task(pk_phhd, discharge_record, identity):
|
|||||||
discharge_data["discharge_date"] = handle_date(discharge_data["discharge_date_str"])
|
discharge_data["discharge_date"] = handle_date(discharge_data["discharge_date_str"])
|
||||||
|
|
||||||
if hospitals:
|
if hospitals:
|
||||||
hospital_like_conditions = []
|
match_hospitals = []
|
||||||
for hospital in hospitals:
|
for hospital in hospitals:
|
||||||
parsed_hospitals = parse_hospital(hospital)
|
parsed_hospitals = parse_hospital(hospital)
|
||||||
for parsed_hospital in parsed_hospitals:
|
for parsed_hospital in parsed_hospitals:
|
||||||
hospital_index = parsed_hospital.find("医院")
|
search_result = search_hospital(parsed_hospital)
|
||||||
if hospital_index != -1 and hospital_index != len(parsed_hospital) - 2:
|
match_hospitals.append(search_result)
|
||||||
parsed_hospital = parsed_hospital[:hospital_index + 2] + "%" + parsed_hospital[hospital_index + 2:]
|
if search_result and search_result[1] == 100:
|
||||||
hospital_like_conditions.append(BdYljg.name.like(f'%{parsed_hospital}%'))
|
break
|
||||||
for hospital_alias_key in HOSPITAL_ALIAS.keys():
|
for hospital_alias_key in HOSPITAL_ALIAS.keys():
|
||||||
if hospital_alias_key in parsed_hospital:
|
if hospital_alias_key in parsed_hospital:
|
||||||
for hospital_alias in HOSPITAL_ALIAS[hospital_alias_key]:
|
for hospital_alias in HOSPITAL_ALIAS[hospital_alias_key]:
|
||||||
new_hospital = parsed_hospital.replace(hospital_alias_key, hospital_alias)
|
new_hospital = parsed_hospital.replace(hospital_alias_key, hospital_alias)
|
||||||
hospital_like_conditions.append(BdYljg.name.like(f'%{new_hospital}%'))
|
match_hospitals.append(search_hospital(new_hospital))
|
||||||
break
|
break
|
||||||
session = MysqlSession()
|
best_match = None
|
||||||
yljg = session.query(BdYljg.pk_yljg, BdYljg.name).filter(or_(*hospital_like_conditions)).limit(1).one_or_none()
|
best_score = 0
|
||||||
session.close()
|
for match_hospital in match_hospitals:
|
||||||
if yljg:
|
if match_hospital and match_hospital[1] > best_score:
|
||||||
discharge_data["pk_yljg"] = yljg.pk_yljg
|
best_match = match_hospital
|
||||||
|
best_score = match_hospital[1]
|
||||||
|
if best_score == 100:
|
||||||
|
break
|
||||||
|
|
||||||
|
if best_match:
|
||||||
|
discharge_data["pk_yljg"] = best_match[2]
|
||||||
if departments:
|
if departments:
|
||||||
department_values = []
|
department_values = []
|
||||||
for dept in departments:
|
for dept in departments:
|
||||||
|
|||||||
@@ -184,5 +184,6 @@ def parse_hospital(string):
|
|||||||
string = util.traditional_to_simple_chinese(string)
|
string = util.traditional_to_simple_chinese(string)
|
||||||
string_without_brackets = string.replace(")", "").replace(")", "").replace("(", " ").replace("(", " ")
|
string_without_brackets = string.replace(")", "").replace(")", "").replace("(", " ").replace("(", " ")
|
||||||
string_without_company = string_without_brackets.replace("有限公司", "")
|
string_without_company = string_without_brackets.replace("有限公司", "")
|
||||||
result += string_without_company.split(" ")
|
split_hospitals = string_without_company.replace("医院", "医院 ")
|
||||||
|
result += split_hospitals.strip().split(" ")
|
||||||
return result
|
return result
|
||||||
|
|||||||
Reference in New Issue
Block a user