优化含pdf时出院记录的处理

2024-10-17 12:58:23 +08:00
parent 0060c4ad59
commit 9c41fab95c
1 changed files with 13 additions and 6 deletions
--- a/photo_review/auto_photo_review.py
+++ b/photo_review/auto_photo_review.py
@@ -473,6 +473,7 @@ def photo_review(pk_phhd, name):
    better_settlement_path = None
    better_cost_path = None
    settlement_text = ''
    qrcode_img_id = None
    for phrec in phrecs:
        original_img_path = common_util.get_img_path(phrec.cfjaddress)
        if not original_img_path:
@@ -486,31 +487,29 @@ def photo_review(pk_phhd, name):
        better_settlement_path, settlement_text, better_cost_path = parse_qrcode(img_path, phrec.cfjaddress)
        if better_settlement_path:
            has_pdf = True
            qrcode_img_id = phrec.cfjaddress
            break
    discharge_text = ''
    if has_pdf:
        settlement_result, discharge_result, cost_result = parse_pdf_text(settlement_text)
-        discharge_result = defaultdict(list, discharge_result)
+        discharge_ie_result = defaultdict(list)
        is_settlement_updated = False
        is_cost_updated = False
        for phrec in phrecs:
            if phrec.cRectype == '1':
-                if not is_settlement_updated:
+                if phrec.cfjaddress == qrcode_img_id:
                    try:
                        ufile.copy_file(BUCKET, phrec.cfjaddress, "drg2015", phrec.cfjaddress)
                        ufile.upload_file(phrec.cfjaddress, better_settlement_path)
                    except Exception as e:
                        logging.error("更新结算单pdf图片出错", exc_info=e)
                    finally:
                        is_settlement_updated = True
            elif phrec.cRectype == '3':
                rec_type, ie_result, ocr_text = information_extraction(phrec, pk_phhd)
                if rec_type == '出院记录':
                    discharge_text += ocr_text
                    for key, value in ie_result.items():
-                        discharge_result[key].append(value)
+                        discharge_ie_result[key].append(value)
            elif phrec.cRectype == '4':
                if not is_cost_updated:
                    try:
@@ -520,6 +519,14 @@ def photo_review(pk_phhd, name):
                        logging.error("更新费用清单pdf图片出错", exc_info=e)
                    finally:
                        is_cost_updated = True
        # 合并出院记录
        for key, value in discharge_ie_result.items():
            ie_value = get_best_value_of_key(discharge_ie_result, key)
            pdf_value = discharge_result.get(key)[0][0]['text']
            similarity_ratio = fuzz.ratio(ie_value, pdf_value)
            if similarity_ratio < 60:
                discharge_result[key] = [[{'text': ie_value, 'probability': 1}]]
    else:
        for phrec in phrecs:
            rec_type, ie_result, ocr_text = information_extraction(phrec, pk_phhd)