From 9c41fab95c37b4bcedef7e1f80b0ceffe9f89f98 Mon Sep 17 00:00:00 2001 From: liuyebo <1515783401@qq.com> Date: Thu, 17 Oct 2024 12:58:23 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=90=ABpdf=E6=97=B6?= =?UTF-8?q?=E5=87=BA=E9=99=A2=E8=AE=B0=E5=BD=95=E7=9A=84=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- photo_review/auto_photo_review.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/photo_review/auto_photo_review.py b/photo_review/auto_photo_review.py index 4ed9d17..b357f0d 100644 --- a/photo_review/auto_photo_review.py +++ b/photo_review/auto_photo_review.py @@ -473,6 +473,7 @@ def photo_review(pk_phhd, name): better_settlement_path = None better_cost_path = None settlement_text = '' + qrcode_img_id = None for phrec in phrecs: original_img_path = common_util.get_img_path(phrec.cfjaddress) if not original_img_path: @@ -486,31 +487,29 @@ def photo_review(pk_phhd, name): better_settlement_path, settlement_text, better_cost_path = parse_qrcode(img_path, phrec.cfjaddress) if better_settlement_path: has_pdf = True + qrcode_img_id = phrec.cfjaddress break discharge_text = '' if has_pdf: settlement_result, discharge_result, cost_result = parse_pdf_text(settlement_text) - discharge_result = defaultdict(list, discharge_result) + discharge_ie_result = defaultdict(list) - is_settlement_updated = False is_cost_updated = False for phrec in phrecs: if phrec.cRectype == '1': - if not is_settlement_updated: + if phrec.cfjaddress == qrcode_img_id: try: ufile.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress) ufile.upload_file(phrec.cfjaddress, better_settlement_path) except Exception as e: logging.error("更新结算单pdf图片出错", exc_info=e) - finally: - is_settlement_updated = True elif phrec.cRectype == '3': rec_type, ie_result, ocr_text = information_extraction(phrec, pk_phhd) if rec_type == '出院记录': discharge_text += ocr_text for key, value in ie_result.items(): - discharge_result[key].append(value) + discharge_ie_result[key].append(value) elif phrec.cRectype == '4': if not is_cost_updated: try: @@ -520,6 +519,14 @@ def photo_review(pk_phhd, name): logging.error("更新费用清单pdf图片出错", exc_info=e) finally: is_cost_updated = True + + # 合并出院记录 + for key, value in discharge_ie_result.items(): + ie_value = get_best_value_of_key(discharge_ie_result, key) + pdf_value = discharge_result.get(key)[0][0]['text'] + similarity_ratio = fuzz.ratio(ie_value, pdf_value) + if similarity_ratio < 60: + discharge_result[key] = [[{'text': ie_value, 'probability': 1}]] else: for phrec in phrecs: rec_type, ie_result, ocr_text = information_extraction(phrec, pk_phhd)