From a81719bdc7c58732412244bd2e4d6bd9cf92035f Mon Sep 17 00:00:00 2001
From: liuyebo <1515783401@qq.com>
Date: Mon, 24 Jun 2024 11:03:43 +0800
Subject: [PATCH] =?UTF-8?q?=E5=90=AF=E7=94=A8ocr=E4=B8=AD=E7=9A=84cls?=
 =?UTF-8?q?=E5=8A=9F=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 config/photo_review.py                 |  4 +++
 photo_review/photo_review.py           | 36 ++++++++++++++++++++++++--
 visual_model_test/visual_model_test.py |  6 ++---
 3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/config/photo_review.py b/config/photo_review.py
index 523012b..1690baa 100644
--- a/config/photo_review.py
+++ b/config/photo_review.py
@@ -1,4 +1,5 @@
 from paddlenlp import Taskflow
+from paddleocr import PaddleOCR
 
 from config.keys import SETTLEMENT_LIST_SCHEMA, DISCHARGE_RECORD_SCHEMA, COST_LIST_SCHEMA
 
@@ -36,3 +37,6 @@ DISCHARGE_IE = Taskflow("information_extraction", schema=DISCHARGE_RECORD_SCHEMA
 # 费用清单
 COST_IE = Taskflow("information_extraction", schema=COST_LIST_SCHEMA, model="uie-x-base",
                    task_path="config/model/cost_list_model", layout_analysis=LAYOUT_ANALYSIS, batch_size=IE_BATCH_SIZE)
+
+# OCR
+OCR = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
diff --git a/photo_review/photo_review.py b/photo_review/photo_review.py
index caf1a3d..a63a09a 100644
--- a/photo_review/photo_review.py
+++ b/photo_review/photo_review.py
@@ -18,7 +18,7 @@ from sqlalchemy import update
 from config.keys import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
     PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR
 from config.mysql import MysqlSession
-from config.photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, SETTLEMENT_IE, DISCHARGE_IE, COST_IE
+from config.photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, OCR
 from photo_review.entity.bd_yljg import BdYljg
 from photo_review.entity.bd_ylks import BdYlks
 from photo_review.entity.zx_ie_cost import ZxIeCost
@@ -95,6 +95,36 @@ def merge_result(result1, result2):
     return result1
 
 
+# 获取图片OCR，并将其box转为两点矩形框
+def get_ocr_layout(img_path):
+    def _get_box(box):
+        box = [
+            min(box[0][0], box[3][0]),  # x1
+            min(box[0][1], box[1][1]),  # y1
+            max(box[1][0], box[2][0]),  # x2
+            max(box[2][1], box[3][1]),  # y2
+        ]
+        return box
+
+    def _normal_box(box):
+        # Ensure the height and width of bbox are greater than zero
+        if box[3] - box[1] < 0 or box[2] - box[0] < 0:
+            return False
+        return True
+
+    layout = []
+    ocr_result = OCR.ocr(img_path)
+    ocr_result = ocr_result[0]
+    for segment in ocr_result:
+        box = segment[0]
+        box = _get_box(box)
+        if not _normal_box(box):
+            continue
+        text = segment[1][0]
+        layout.append((box, text))
+    return layout
+
+
 # 关键信息提取
 def information_extraction(ie, phrecs):
     result = {}
@@ -107,7 +137,9 @@ def information_extraction(ie, phrecs):
             for img in split_result:
                 with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
                     cv2.imwrite(temp_file.name, img["img"])
-                    docs.append({"doc": temp_file.name})
+                    # 为使用ocr中的cls，单独调用ocr
+                    layout = get_ocr_layout(temp_file.name)
+                    docs.append({"doc": temp_file.name, "layout": layout})
                     doc_phrecs.append({"phrec": phrec, "x_offset": img["x_offset"], "y_offset": img["y_offset"]})
     if not docs:
         return result
diff --git a/visual_model_test/visual_model_test.py b/visual_model_test/visual_model_test.py
index 2b4fed1..f9bdb6f 100644
--- a/visual_model_test/visual_model_test.py
+++ b/visual_model_test/visual_model_test.py
@@ -10,7 +10,7 @@ import cv2
 
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from photo_review.photo_review import split_image
+from photo_review.photo_review import split_image, get_ocr_layout
 from paddlenlp import Taskflow
 from paddlenlp.utils.doc_parser import DocParser
 from ucloud import ucloud
@@ -70,7 +70,7 @@ def visual_model_test(model_type, test_img, task_path, schema):
             with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
                 cv2.imwrite(temp_file.name, img["img"])
                 temp_files_paths.append(temp_file.name)
-                docs.append({"doc": temp_file.name})
+                docs.append({"doc": temp_file.name, "layout": get_ocr_layout(temp_file.name)})
 
         my_ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path,
                          layout_analysis=False)
@@ -133,7 +133,7 @@ def main(model_type, pic_name=None):
 
 if __name__ == '__main__':
     # main("ocr")
-    main("settlement", "PH20240529000194_1_075936_1.PNG")
+    main("settlement")
     # main("discharge")
     # main("cost")
     # main("cost_detail")