添加信息抽取存表，根据抽取结果进行缺项判断

2024-10-10 09:24:09 +08:00
parent fc69aa5b9d
commit 15fe5d4f0d
1 changed files with 27 additions and 11 deletions
--- a/photo_review/auto_photo_review.py
+++ b/photo_review/auto_photo_review.py
@@ -1,3 +1,4 @@
 import json
 import logging
 import time
 from collections import defaultdict
@@ -12,7 +13,7 @@ from rapidfuzz import process, fuzz
 from sqlalchemy import update
 from db import MysqlSession
-from db.mysql import BdYljg, BdYlks, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec, ZxIeReview
+from db.mysql import BdYljg, BdYlks, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec, ZxIeReview, ZxIeResult
 from log import HOSTNAME
 from photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, \
    DEPARTMENT_FILTER
@@ -98,11 +99,13 @@ def get_better_image_from_qrcode(img_path, image_id, dpi=150):
 # 关键信息提取
-def information_extraction(phrec, identity):
+def information_extraction(phrec, pk_phhd, identity):
    """
    处理单张图片
-    :param phrec:
+    :param phrec:图片信息
-    :return:
+    :param pk_phhd:案子主键
    :param identity:处理批次标识
    :return:记录类型，信息抽取结果
    """
    img_path = image_util.get_img_path(phrec.cfjaddress)
    if not img_path:
@@ -114,13 +117,13 @@ def information_extraction(phrec, identity):
    if phrec.cRectype != '1':
        better_img_path = None  # 非结算单暂时不进行替换
    if better_img_path is not None:
        rec_type = '基本医保结算单'
        if text:
            info_extract = model_util.ie_settlement_text(text)
        else:
            info_extract = model_util.ie_settlement(
                better_img_path, common_util.ocr_result_to_layout(model_util.ocr(better_img_path))
            )
        return '基本医保结算单', info_extract
    else:
        target_image = model_util.det_book(img_path)  # 识别文档区域并裁剪
        dewarped_image = model_util.dewarp(target_image)  # 去扭曲
@@ -143,7 +146,19 @@ def information_extraction(phrec, identity):
        else:
            info_extract = None
-        return rec_type, info_extract
+    if info_extract:
        result_json = json.dumps(info_extract, ensure_ascii=False)
        if len(result_json) > 5000:
            result_json = result_json[:5000]
        now = common_util.get_default_datetime()
        session = MysqlSession()
        session.add(ZxIeResult(pk_phhd=pk_phhd, pk_phrec=phrec.pk_phrec, id=identity,
                               cfjaddress=phrec.cfjaddress, content=result_json, create_time=now,
                               creator=HOSTNAME, update_time=now, updater=HOSTNAME))
        session.commit()
        session.close()
    return rec_type, info_extract
 # 从keys中获取准确率最高的value
@@ -402,7 +417,7 @@ def photo_review(pk_phhd, name):
    # 同一批图的标识
    identity = int(time.time())
    for phrec in phrecs:
-        rec_type, ie_result = information_extraction(phrec, identity)
+        rec_type, ie_result = information_extraction(phrec, pk_phhd, identity)
        if rec_type == '基本医保结算单':
            rec_result = settlement_result
        elif rec_type == '出院记录':
@@ -424,12 +439,13 @@ def photo_review(pk_phhd, name):
    }
    # 三项资料完整性判断
    # 三项资料缺项判断
-    if (settlement_data['personal_account_payment'] + settlement_data['personal_cash_payment']
+    if (bool(settlement_data) and settlement_data['personal_account_payment']
            and settlement_data['personal_cash_payment'] and settlement_data['medical_expenses']
            and settlement_data['personal_account_payment'] + settlement_data['personal_cash_payment']
            < settlement_data['medical_expenses']):
        review_result['has_settlement'] = True
-    # TODO:出院记录和费用清单暂时没想好怎么判断
+    review_result['has_discharge'] = bool(discharge_result)
-    review_result['has_discharge'] = True
+    review_result['has_cost'] = bool(cost_result)
    review_result['has_cost'] = True
    # 三项资料缺页判断
    # TODO:缺页需要对页码进行抽取，暂未训练相关模型
    review_result['full_page'] = True