优化含pdf时出院记录的处理

This commit is contained in:
2024-10-17 12:58:23 +08:00
parent 0060c4ad59
commit 9c41fab95c

View File

@@ -473,6 +473,7 @@ def photo_review(pk_phhd, name):
better_settlement_path = None better_settlement_path = None
better_cost_path = None better_cost_path = None
settlement_text = '' settlement_text = ''
qrcode_img_id = None
for phrec in phrecs: for phrec in phrecs:
original_img_path = common_util.get_img_path(phrec.cfjaddress) original_img_path = common_util.get_img_path(phrec.cfjaddress)
if not original_img_path: if not original_img_path:
@@ -486,31 +487,29 @@ def photo_review(pk_phhd, name):
better_settlement_path, settlement_text, better_cost_path = parse_qrcode(img_path, phrec.cfjaddress) better_settlement_path, settlement_text, better_cost_path = parse_qrcode(img_path, phrec.cfjaddress)
if better_settlement_path: if better_settlement_path:
has_pdf = True has_pdf = True
qrcode_img_id = phrec.cfjaddress
break break
discharge_text = '' discharge_text = ''
if has_pdf: if has_pdf:
settlement_result, discharge_result, cost_result = parse_pdf_text(settlement_text) settlement_result, discharge_result, cost_result = parse_pdf_text(settlement_text)
discharge_result = defaultdict(list, discharge_result) discharge_ie_result = defaultdict(list)
is_settlement_updated = False
is_cost_updated = False is_cost_updated = False
for phrec in phrecs: for phrec in phrecs:
if phrec.cRectype == '1': if phrec.cRectype == '1':
if not is_settlement_updated: if phrec.cfjaddress == qrcode_img_id:
try: try:
ufile.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress) ufile.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress)
ufile.upload_file(phrec.cfjaddress, better_settlement_path) ufile.upload_file(phrec.cfjaddress, better_settlement_path)
except Exception as e: except Exception as e:
logging.error("更新结算单pdf图片出错", exc_info=e) logging.error("更新结算单pdf图片出错", exc_info=e)
finally:
is_settlement_updated = True
elif phrec.cRectype == '3': elif phrec.cRectype == '3':
rec_type, ie_result, ocr_text = information_extraction(phrec, pk_phhd) rec_type, ie_result, ocr_text = information_extraction(phrec, pk_phhd)
if rec_type == '出院记录': if rec_type == '出院记录':
discharge_text += ocr_text discharge_text += ocr_text
for key, value in ie_result.items(): for key, value in ie_result.items():
discharge_result[key].append(value) discharge_ie_result[key].append(value)
elif phrec.cRectype == '4': elif phrec.cRectype == '4':
if not is_cost_updated: if not is_cost_updated:
try: try:
@@ -520,6 +519,14 @@ def photo_review(pk_phhd, name):
logging.error("更新费用清单pdf图片出错", exc_info=e) logging.error("更新费用清单pdf图片出错", exc_info=e)
finally: finally:
is_cost_updated = True is_cost_updated = True
# 合并出院记录
for key, value in discharge_ie_result.items():
ie_value = get_best_value_of_key(discharge_ie_result, key)
pdf_value = discharge_result.get(key)[0][0]['text']
similarity_ratio = fuzz.ratio(ie_value, pdf_value)
if similarity_ratio < 60:
discharge_result[key] = [[{'text': ie_value, 'probability': 1}]]
else: else:
for phrec in phrecs: for phrec in phrecs:
rec_type, ie_result, ocr_text = information_extraction(phrec, pk_phhd) rec_type, ie_result, ocr_text = information_extraction(phrec, pk_phhd)