From 15fe5d4f0d0988016c430357118f3bd5460e76d5 Mon Sep 17 00:00:00 2001
From: liuyebo <1515783401@qq.com>
Date: Thu, 10 Oct 2024 09:24:09 +0800
Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BF=A1=E6=81=AF=E6=8A=BD?=
 =?UTF-8?q?=E5=8F=96=E5=AD=98=E8=A1=A8=EF=BC=8C=E6=A0=B9=E6=8D=AE=E6=8A=BD?=
 =?UTF-8?q?=E5=8F=96=E7=BB=93=E6=9E=9C=E8=BF=9B=E8=A1=8C=E7=BC=BA=E9=A1=B9?=
 =?UTF-8?q?=E5=88=A4=E6=96=AD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 photo_review/auto_photo_review.py | 38 ++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/photo_review/auto_photo_review.py b/photo_review/auto_photo_review.py
index 460ec39..6abe6d2 100644
--- a/photo_review/auto_photo_review.py
+++ b/photo_review/auto_photo_review.py
@@ -1,3 +1,4 @@
+import json
 import logging
 import time
 from collections import defaultdict
@@ -12,7 +13,7 @@ from rapidfuzz import process, fuzz
 from sqlalchemy import update
 
 from db import MysqlSession
-from db.mysql import BdYljg, BdYlks, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec, ZxIeReview
+from db.mysql import BdYljg, BdYlks, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec, ZxIeReview, ZxIeResult
 from log import HOSTNAME
 from photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, HOSPITAL_ALIAS, HOSPITAL_FILTER, DEPARTMENT_ALIAS, \
     DEPARTMENT_FILTER
@@ -98,11 +99,13 @@ def get_better_image_from_qrcode(img_path, image_id, dpi=150):
 
 
 # 关键信息提取
-def information_extraction(phrec, identity):
+def information_extraction(phrec, pk_phhd, identity):
     """
     处理单张图片
-    :param phrec:
-    :return:
+    :param phrec:图片信息
+    :param pk_phhd:案子主键
+    :param identity:处理批次标识
+    :return:记录类型，信息抽取结果
     """
     img_path = image_util.get_img_path(phrec.cfjaddress)
     if not img_path:
@@ -114,13 +117,13 @@ def information_extraction(phrec, identity):
     if phrec.cRectype != '1':
         better_img_path = None  # 非结算单暂时不进行替换
     if better_img_path is not None:
+        rec_type = '基本医保结算单'
         if text:
             info_extract = model_util.ie_settlement_text(text)
         else:
             info_extract = model_util.ie_settlement(
                 better_img_path, common_util.ocr_result_to_layout(model_util.ocr(better_img_path))
             )
-        return '基本医保结算单', info_extract
     else:
         target_image = model_util.det_book(img_path)  # 识别文档区域并裁剪
         dewarped_image = model_util.dewarp(target_image)  # 去扭曲
@@ -143,7 +146,19 @@ def information_extraction(phrec, identity):
         else:
             info_extract = None
 
-        return rec_type, info_extract
+    if info_extract:
+        result_json = json.dumps(info_extract, ensure_ascii=False)
+        if len(result_json) > 5000:
+            result_json = result_json[:5000]
+
+        now = common_util.get_default_datetime()
+        session = MysqlSession()
+        session.add(ZxIeResult(pk_phhd=pk_phhd, pk_phrec=phrec.pk_phrec, id=identity,
+                               cfjaddress=phrec.cfjaddress, content=result_json, create_time=now,
+                               creator=HOSTNAME, update_time=now, updater=HOSTNAME))
+        session.commit()
+        session.close()
+    return rec_type, info_extract
 
 
 # 从keys中获取准确率最高的value
@@ -402,7 +417,7 @@ def photo_review(pk_phhd, name):
     # 同一批图的标识
     identity = int(time.time())
     for phrec in phrecs:
-        rec_type, ie_result = information_extraction(phrec, identity)
+        rec_type, ie_result = information_extraction(phrec, pk_phhd, identity)
         if rec_type == '基本医保结算单':
             rec_result = settlement_result
         elif rec_type == '出院记录':
@@ -424,12 +439,13 @@ def photo_review(pk_phhd, name):
     }
     # 三项资料完整性判断
     # 三项资料缺项判断
-    if (settlement_data['personal_account_payment'] + settlement_data['personal_cash_payment']
+    if (bool(settlement_data) and settlement_data['personal_account_payment']
+            and settlement_data['personal_cash_payment'] and settlement_data['medical_expenses']
+            and settlement_data['personal_account_payment'] + settlement_data['personal_cash_payment']
             < settlement_data['medical_expenses']):
         review_result['has_settlement'] = True
-    # TODO:出院记录和费用清单暂时没想好怎么判断
-    review_result['has_discharge'] = True
-    review_result['has_cost'] = True
+    review_result['has_discharge'] = bool(discharge_result)
+    review_result['has_cost'] = bool(cost_result)
     # 三项资料缺页判断
     # TODO:缺页需要对页码进行抽取，暂未训练相关模型
     review_result['full_page'] = True