优化科室的匹配
This commit is contained in:
@@ -221,6 +221,22 @@ def search_hospital(hospital):
|
|||||||
return best_match
|
return best_match
|
||||||
|
|
||||||
|
|
||||||
|
def search_department(department):
|
||||||
|
cut_list = jieba.lcut(department)
|
||||||
|
session = MysqlSession()
|
||||||
|
ylks = session.query(BdYlks.pk_ylks, BdYlks.name).filter(BdYlks.name.like(f"%{'%'.join(cut_list)}%")).all()
|
||||||
|
if not ylks:
|
||||||
|
filter_keywords = cut_list
|
||||||
|
for filter_keyword in filter_keywords:
|
||||||
|
ylks = session.query(BdYlks.pk_ylks, BdYlks.name).filter(BdYlks.name.like(f"%{filter_keyword}%")).all()
|
||||||
|
if ylks:
|
||||||
|
break
|
||||||
|
session.close()
|
||||||
|
ylks = {row.pk_ylks: row.name for row in ylks}
|
||||||
|
best_match = process.extractOne(department, ylks, scorer=fuzz.partial_token_set_ratio)
|
||||||
|
return best_match
|
||||||
|
|
||||||
|
|
||||||
def settlement_task(pk_phhd, settlement_list, identity):
|
def settlement_task(pk_phhd, settlement_list, identity):
|
||||||
settlement_list_ie_result = information_extraction(SETTLEMENT_IE, settlement_list, identity)
|
settlement_list_ie_result = information_extraction(SETTLEMENT_IE, settlement_list, identity)
|
||||||
settlement_data = {
|
settlement_data = {
|
||||||
@@ -299,17 +315,24 @@ def discharge_task(pk_phhd, discharge_record, identity):
|
|||||||
if best_match:
|
if best_match:
|
||||||
discharge_data["pk_yljg"] = best_match[2]
|
discharge_data["pk_yljg"] = best_match[2]
|
||||||
if departments:
|
if departments:
|
||||||
department_values = []
|
match_departments = []
|
||||||
for dept in departments:
|
for department in departments:
|
||||||
department_values += parse_department(dept)
|
parsed_departments = parse_department(department)
|
||||||
department_values = list(set(department_values))
|
for parsed_department in parsed_departments:
|
||||||
if department_values:
|
search_result = search_department(parsed_department)
|
||||||
session = MysqlSession()
|
match_departments.append(search_result)
|
||||||
ylks = session.query(BdYlks.pk_ylks, BdYlks.name) \
|
if search_result and search_result[1] == 100:
|
||||||
.filter(BdYlks.name.in_(department_values)).limit(1).one_or_none()
|
break
|
||||||
session.close()
|
best_match = None
|
||||||
if ylks:
|
best_score = 0
|
||||||
discharge_data["pk_ylks"] = ylks.pk_ylks
|
for match_department in match_departments:
|
||||||
|
if match_department and match_department[1] > best_score:
|
||||||
|
best_match = match_department
|
||||||
|
best_score = match_department[1]
|
||||||
|
if best_score == 100:
|
||||||
|
break
|
||||||
|
if best_match:
|
||||||
|
discharge_data["pk_ylks"] = best_match[2]
|
||||||
save_or_update_ie(ZxIeDischarge, pk_phhd, discharge_data)
|
save_or_update_ie(ZxIeDischarge, pk_phhd, discharge_data)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -102,26 +102,16 @@ def parse_department(string):
|
|||||||
result = []
|
result = []
|
||||||
if not string:
|
if not string:
|
||||||
return result
|
return result
|
||||||
string = re.sub(r'\([^()]*\)|\[[^\[\]]*]|\{[^{}]*}|([^()]*)|[^⺀-鿿]', '', string)[:255]
|
|
||||||
if string == "科":
|
string = string.replace(")", "").replace(")", "").replace("(", " ").replace("(", " ") # 去除括号
|
||||||
return result
|
string = re.sub(r'[^⺀-鿿 ]', '', string) # 去除非汉字字符,除了空格
|
||||||
result.append(string)
|
string = re.sub(r'[一二三四五六七八九十]', '', string) # 去除中文数字
|
||||||
string_without_num = re.sub(r'\d|一|二|三|四|五|六|七|八|九|十', '', string)
|
string = string.replace("科", " ") # 分离科室
|
||||||
if string == "科":
|
departments = string.strip().split(" ")
|
||||||
return result
|
for department in departments:
|
||||||
if string_without_num != string:
|
if department:
|
||||||
result.append(string_without_num)
|
result.append(department)
|
||||||
pure_string = string_without_num.split("科")[0] + "科"
|
return set(result)
|
||||||
if string == "科":
|
|
||||||
return result
|
|
||||||
if pure_string != string_without_num:
|
|
||||||
result.append(pure_string)
|
|
||||||
pure_string_without_io = pure_string.replace("内", "").replace("外", "")
|
|
||||||
if string == "科":
|
|
||||||
return result
|
|
||||||
if pure_string_without_io != pure_string:
|
|
||||||
result.append(pure_string)
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
# 处理姓名类数据
|
# 处理姓名类数据
|
||||||
|
|||||||
Reference in New Issue
Block a user