From fd6b70680366a602c115d6b023e37ff9c6a7ec84 Mon Sep 17 00:00:00 2001
From: liuyebo <1515783401@qq.com>
Date: Thu, 22 Aug 2024 15:26:37 +0800
Subject: [PATCH] =?UTF-8?q?=E8=87=AA=E5=8A=A8=E8=AF=86=E5=88=AB=E6=96=B0?=
 =?UTF-8?q?=E5=A2=9E=E6=96=87=E6=A1=A3=E6=A3=80=E6=B5=8B=E4=B8=8E=E6=89=AD?=
 =?UTF-8?q?=E6=9B=B2=E7=9F=AB=E6=AD=A3=E5=8A=9F=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                    |  5 ++-
 docker-compose.yml           |  2 +-
 photo_review/photo_review.py | 62 ++++++++++++++++++++----------------
 3 files changed, 40 insertions(+), 29 deletions(-)

diff --git a/README.md b/README.md
index 47e7c9e..d1090f3 100644
--- a/README.md
+++ b/README.md
@@ -113,4 +113,7 @@
 19. 版本号：1.12.0
     1. 优化医院、科室匹配方法，采用模糊匹配
     2. 匹配时为医院、科室添加别名
-    3. 添加医院、科室名的分析处理
\ No newline at end of file
+    3. 添加医院、科室名的分析处理
+20. 版本号：1.13.0
+    1. 新增文档检测功能
+    2. 新增扭曲矫正功能
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
index 5453658..63d87c5 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,6 +1,6 @@
 x-env:
   &template
-  image: fcb_photo_review:1.12.13
+  image: fcb_photo_review:1.13.0
   restart: always
 
 services:
diff --git a/photo_review/photo_review.py b/photo_review/photo_review.py
index 0fbf6b1..dab3b23 100644
--- a/photo_review/photo_review.py
+++ b/photo_review/photo_review.py
@@ -14,6 +14,8 @@ from sqlalchemy import update
 
 from db import MysqlSession
 from db.mysql import BdYljg, BdYlks, ZxIeResult, ZxIeCost, ZxIeDischarge, ZxIeSettlement, ZxPhhd, ZxPhrec
+from doc_dewarp import dewarp
+from object_detection import doc_detect
 from photo_review import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
     PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR, \
     ADMISSION_ID, SETTLEMENT_ID, AGE, OCR, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, PHHD_BATCH_SIZE, SLEEP_MINUTES, \
@@ -79,39 +81,45 @@ def information_extraction(ie, phrecs, identity):
             continue
 
         image = image_util.read(img_path)
-        angles = image_util.parse_rotation_angles(image)
+        target_images = []
+        target_images += doc_detect.capture_target_area(image)  # 识别文档区域并裁剪
+        if not target_images:
+            target_images.append(image)  # 识别失败
         angle_count = defaultdict(int, {"0": 0})  # 分割后图片的最优角度统计
-        zx_ie_results = []
-        split_results = image_util.split(image)
-        for split_result in split_results:
-            rotated_img = image_util.rotate(split_result["img"], int(angles[0]))
-            ie_results = [{"result": ie_temp_image(ie, OCR, rotated_img), "angle": angles[0]}]
-            if not ie_results[0]["result"] or len(ie_results[0]["result"]) < len(ie.kwargs.get("schema")):
-                rotated_img = image_util.rotate(split_result["img"], int(angles[1]))
-                ie_results.append({"result": ie_temp_image(ie, OCR, rotated_img), "angle": angles[1]})
+        for target_image in target_images:
+            dewarped_image = dewarp.dewarp_image(target_image)  # 去扭曲
+            angles = image_util.parse_rotation_angles(dewarped_image)
+            zx_ie_results = []
+            split_results = image_util.split(dewarped_image)
+            for split_result in split_results:
+                rotated_img = image_util.rotate(split_result["img"], int(angles[0]))
+                ie_results = [{"result": ie_temp_image(ie, OCR, rotated_img), "angle": angles[0]}]
+                if not ie_results[0]["result"] or len(ie_results[0]["result"]) < len(ie.kwargs.get("schema")):
+                    rotated_img = image_util.rotate(split_result["img"], int(angles[1]))
+                    ie_results.append({"result": ie_temp_image(ie, OCR, rotated_img), "angle": angles[1]})
 
-            now = util.get_default_datetime()
-            best_angle = ["0", 0]
-            for ie_result in ie_results:
-                if not ie_result["result"]:
-                    continue
+                now = util.get_default_datetime()
+                best_angle = ["0", 0]
+                for ie_result in ie_results:
+                    if not ie_result["result"]:
+                        continue
 
-                result_json = json.dumps(ie_result["result"], ensure_ascii=False)
-                if len(result_json) > 5000:
-                    result_json = result_json[:5000]
-                zx_ie_results.append(ZxIeResult(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, id=identity,
-                                                cfjaddress=phrec.cfjaddress, content=result_json,
-                                                rotation_angle=int(ie_result["angle"]),
-                                                x_offset=split_result["x_offset"],
-                                                y_offset=split_result["y_offset"], create_time=now, creator=HOSTNAME,
-                                                update_time=now, updater=HOSTNAME))
+                    result_json = json.dumps(ie_result["result"], ensure_ascii=False)
+                    if len(result_json) > 5000:
+                        result_json = result_json[:5000]
+                    zx_ie_results.append(ZxIeResult(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, id=identity,
+                                                    cfjaddress=phrec.cfjaddress, content=result_json,
+                                                    rotation_angle=int(ie_result["angle"]),
+                                                    x_offset=split_result["x_offset"],
+                                                    y_offset=split_result["y_offset"], create_time=now,
+                                                    creator=HOSTNAME, update_time=now, updater=HOSTNAME))
 
-                result = merge_result(result, ie_result["result"])
+                    result = merge_result(result, ie_result["result"])
 
-                if len(ie_result["result"]) > best_angle[1]:
-                    best_angle = [ie_result["angle"], len(ie_result["result"])]
+                    if len(ie_result["result"]) > best_angle[1]:
+                        best_angle = [ie_result["angle"], len(ie_result["result"])]
 
-            angle_count[best_angle[0]] += 1
+                angle_count[best_angle[0]] += 1
 
         img_angle = max(angle_count, key=angle_count.get)
         if img_angle != "0":