diff --git a/docker-compose.yml b/docker-compose.yml index 134329c..7ea5ab7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,6 @@ x-env: &template - image: fcb_photo_review:1.12.9 + image: fcb_photo_review:1.12.10 restart: always services: diff --git a/photo_review/__init__.py b/photo_review/__init__.py index 95043dc..82d7b9e 100644 --- a/photo_review/__init__.py +++ b/photo_review/__init__.py @@ -75,7 +75,7 @@ HOSPITAL_ALIAS = { "漂水": ["溧水"], } DEPARTMENT_ALIAS = { - "耳鼻喉": ["耳鼻咽喉"] + "耳鼻喉": ["耳鼻咽喉"], } """ @@ -84,10 +84,16 @@ DEPARTMENT_ALIAS = { # 默认会过滤单字 HOSPITAL_FILTER = ["医院", "人民", "第一", "第二", "第三", "大学", "附属"] +DEPARTMENT_FILTER = ["医", "病", "病区", "伤"] + """ 分词配置 """ jieba.suggest_freq(('肿瘤', '医院'), True) +jieba.suggest_freq(('心血管', '病'), True) +jieba.suggest_freq(('骨', '伤'), True) +jieba.suggest_freq(('感染', '性'), True) +jieba.suggest_freq(('胆', '道'), True) """ 模型配置 diff --git a/photo_review/photo_review.py b/photo_review/photo_review.py index 9ff297f..577d0a2 100644 --- a/photo_review/photo_review.py +++ b/photo_review/photo_review.py @@ -17,7 +17,7 @@ from db.mysql import BdYljg, BdYlks, ZxIeResult, ZxIeCost, ZxIeDischarge, ZxIeSe from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \ PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \ ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \ - UPPERCASE_MEDICAL_EXPENSES, HOSTNAME, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS + UPPERCASE_MEDICAL_EXPENSES, HOSTNAME, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, DEPARTMENT_FILTER from ucloud import ufile from util import image_util, util from util.data_util import handle_date, handle_decimal, parse_department, handle_name, \ @@ -206,7 +206,7 @@ def search_hospital(hospital): result.append(result2) return result - cut_list = jieba.lcut(hospital) + cut_list = jieba.lcut(hospital, HMM=False) session = MysqlSession() yljg = session.query(BdYljg.pk_yljg, BdYljg.name).filter(BdYljg.name.like(f"%{'%'.join(cut_list)}%")).all() if not yljg: @@ -222,8 +222,15 @@ def search_hospital(hospital): def search_department(department): - cut_list = jieba.lcut(department) + def _filter_search_keywords(keywords): + keywords = [x for x in keywords if x not in DEPARTMENT_FILTER] + return keywords + + cut_list = jieba.lcut(department, HMM=False) session = MysqlSession() + cut_list = _filter_search_keywords(cut_list) + if not cut_list: + return None ylks = session.query(BdYlks.pk_ylks, BdYlks.name).filter(BdYlks.name.like(f"%{'%'.join(cut_list)}%")).all() if not ylks: filter_keywords = cut_list @@ -233,7 +240,11 @@ def search_department(department): break session.close() ylks = {row.pk_ylks: row.name for row in ylks} - best_match = process.extractOne(department, ylks, scorer=fuzz.partial_token_set_ratio) + best_match = process.extractOne(department, ylks, scorer=fuzz.token_ratio) + if best_match and best_match[0] in ["内科", "外科"]: + # 降低内科、外科的优先级 + best_match = list(best_match) + best_match[1] -= 100 return best_match