From fc0c54fbd2979c5002ffcb6d0aaa90fbcc4d6327 Mon Sep 17 00:00:00 2001
From: liuyebo <1515783401@qq.com>
Date: Tue, 25 Jun 2024 14:13:43 +0800
Subject: [PATCH] =?UTF-8?q?ocr=E9=85=8D=E7=BD=AE=E7=9A=84cls=E5=8F=AA?=
 =?UTF-8?q?=E8=83=BD=E5=8C=BA=E5=88=860=E5=BA=A6=E4=B8=8E180=E5=BA=A6?=
 =?UTF-8?q?=EF=BC=8C=E4=B8=8D=E7=AC=A6=E5=90=88=E9=9C=80=E6=B1=82=EF=BC=8C?=
 =?UTF-8?q?=E6=9B=B4=E6=8D=A2=E4=B8=BApaddleclas=E4=B8=AD=E7=9A=84?=
 =?UTF-8?q?=E5=9B=BE=E7=89=87=E6=96=B9=E5=90=91=E8=AF=86=E5=88=AB=E6=A8=A1?=
 =?UTF-8?q?=E5=9E=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 config/photo_review.py                 |  4 --
 photo_review/photo_review.py           | 71 +++++++++++++-------------
 visual_model_test/visual_model_test.py | 56 ++++++++++++--------
 3 files changed, 69 insertions(+), 62 deletions(-)

diff --git a/config/photo_review.py b/config/photo_review.py
index 1690baa..523012b 100644
--- a/config/photo_review.py
+++ b/config/photo_review.py
@@ -1,5 +1,4 @@
 from paddlenlp import Taskflow
-from paddleocr import PaddleOCR
 
 from config.keys import SETTLEMENT_LIST_SCHEMA, DISCHARGE_RECORD_SCHEMA, COST_LIST_SCHEMA
 
@@ -37,6 +36,3 @@ DISCHARGE_IE = Taskflow("information_extraction", schema=DISCHARGE_RECORD_SCHEMA
 # 费用清单
 COST_IE = Taskflow("information_extraction", schema=COST_LIST_SCHEMA, model="uie-x-base",
                    task_path="config/model/cost_list_model", layout_analysis=LAYOUT_ANALYSIS, batch_size=IE_BATCH_SIZE)
-
-# OCR
-OCR = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
diff --git a/photo_review/photo_review.py b/photo_review/photo_review.py
index 56d7f78..f8e5ba0 100644
--- a/photo_review/photo_review.py
+++ b/photo_review/photo_review.py
@@ -10,6 +10,7 @@ import urllib.request
 import cv2
 import numpy as np
 import paddle
+import paddleclas
 
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
@@ -18,7 +19,7 @@ from sqlalchemy import update
 from config.keys import PATIENT_NAME, ADMISSION_DATE, DISCHARGE_DATE, MEDICAL_EXPENSES, PERSONAL_CASH_PAYMENT, \
     PERSONAL_ACCOUNT_PAYMENT, PERSONAL_FUNDED_AMOUNT, MEDICAL_INSURANCE_TYPE, HOSPITAL, DEPARTMENT, DOCTOR
 from config.mysql import MysqlSession
-from config.photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, SETTLEMENT_IE, DISCHARGE_IE, COST_IE, OCR
+from config.photo_review import PHHD_BATCH_SIZE, SLEEP_MINUTES, SETTLEMENT_IE, DISCHARGE_IE, COST_IE
 from photo_review.entity.bd_yljg import BdYljg
 from photo_review.entity.bd_ylks import BdYlks
 from photo_review.entity.zx_ie_cost import ZxIeCost
@@ -95,36 +96,34 @@ def merge_result(result1, result2):
     return result1
 
 
-# 获取图片OCR，并将其box转为两点矩形框
-def get_ocr_layout(img_path):
-    def _get_box(box):
-        box = [
-            min(box[0][0], box[3][0]),  # x1
-            min(box[0][1], box[1][1]),  # y1
-            max(box[1][0], box[2][0]),  # x2
-            max(box[2][1], box[3][1]),  # y2
-        ]
-        return box
+# 获取图片旋转角度
+def get_image_rotation_angle(img):
+    model = paddleclas.PaddleClas(model_name="text_image_orientation")
+    result = model.predict(input_data=img)
+    angle = int(next(result)[0]["label_names"][0])
+    return angle
 
-    def _normal_box(box):
-        # Ensure the height and width of bbox are greater than zero
-        if box[3] - box[1] < 0 or box[2] - box[0] < 0:
-            return False
-        return True
 
-    layout = []
-    ocr_result = OCR.ocr(img_path)
-    ocr_result = ocr_result[0]
-    if not ocr_result:
-        return layout
-    for segment in ocr_result:
-        box = segment[0]
-        box = _get_box(box)
-        if not _normal_box(box):
-            continue
-        text = segment[1][0]
-        layout.append((box, text))
-    return layout
+# 旋转图片
+def rotate_image(img, angle):
+    if angle == 0:
+        return
+    height, width, _ = img.shape
+    if angle == 180:
+        new_width = width
+        new_height = height
+    else:
+        new_width = height
+        new_height = width
+    # 绕图像的中心旋转
+    # 参数：旋转中心 旋转度数 scale
+    matrix = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1)
+    # 旋转后平移
+    matrix[0, 2] += (new_width - width) / 2
+    matrix[1, 2] += (new_height - height) / 2
+    # 参数：原始图像 旋转参数 元素图像宽高
+    rotated = cv2.warpAffine(img, matrix, (new_width, new_height))
+    return rotated
 
 
 # 关键信息提取
@@ -138,11 +137,11 @@ def information_extraction(ie, phrecs):
             split_result = split_image(pic_path)
             for img in split_result:
                 with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
-                    cv2.imwrite(temp_file.name, img["img"])
-                    # 为使用ocr中的cls，单独调用ocr
-                    layout = get_ocr_layout(temp_file.name)
-                    docs.append({"doc": temp_file.name, "layout": layout})
-                    doc_phrecs.append({"phrec": phrec, "x_offset": img["x_offset"], "y_offset": img["y_offset"]})
+                    angle = get_image_rotation_angle(img["img"])
+                    rotated_img = rotate_image(img["img"], angle)
+                    cv2.imwrite(temp_file.name, rotated_img)
+                    docs.append({"doc": temp_file.name})
+                    doc_phrecs.append({"phrec": phrec, "rotation": angle, "x_offset": img["x_offset"], "y_offset": img["y_offset"]})
     if not docs:
         return result
 
@@ -170,8 +169,8 @@ def information_extraction(ie, phrecs):
             result_json = result_json[:5000]
         session = MysqlSession()
         zx_ocr = ZxOcr(pk_phhd=phrec.pk_phhd, pk_phrec=phrec.pk_phrec, id=id, cfjaddress=phrec.cfjaddress,
-                       content=result_json, x_offset=doc_phrec["x_offset"], y_offset=doc_phrec["y_offset"],
-                       create_time=now, update_time=now)
+                       content=result_json, rotation=doc_phrec["rotation"], x_offset=doc_phrec["x_offset"],
+                       y_offset=doc_phrec["y_offset"], create_time=now, update_time=now)
         session.add(zx_ocr)
         session.commit()
         session.close()
diff --git a/visual_model_test/visual_model_test.py b/visual_model_test/visual_model_test.py
index 7054e39..3d5480f 100644
--- a/visual_model_test/visual_model_test.py
+++ b/visual_model_test/visual_model_test.py
@@ -10,13 +10,13 @@ import cv2
 
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from photo_review.photo_review import split_image, get_ocr_layout
+from photo_review.photo_review import split_image, get_image_rotation_angle, rotate_image, open_image
 from paddlenlp import Taskflow
 from paddlenlp.utils.doc_parser import DocParser
 from ucloud import ucloud
 
 
-def write_visual_result(image, layout=None, result=None):
+def write_visual_result(image, angle=0, layout=None, result=None):
     img = image.split("?")[0]
     img = re.split(r'[\\/]', img)[-1]
     img_name = ""
@@ -26,19 +26,25 @@ def write_visual_result(image, layout=None, result=None):
         img_name = img[:last_dot_index]
         img_type = img[last_dot_index + 1:]
 
-    if layout:
-        print(layout)
-        DocParser.write_image_with_results(
-            image,
-            layout=layout,
-            save_path="./img_result/" + img_name + "_layout." + img_type)
+    img_array = open_image(image)
+    if angle != 0:
+        img_array = rotate_image(img_array, angle)
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
+        cv2.imwrite(temp_file.name, img_array)
+        if layout:
+            print(layout)
+            DocParser.write_image_with_results(
+                temp_file.name,
+                layout=layout,
+                save_path="./img_result/" + img_name + "_layout." + img_type)
 
-    if result:
-        print(result)
-        DocParser.write_image_with_results(
-            image,
-            result=result,
-            save_path="./img_result/" + img_name + "_result." + img_type)
+        if result:
+            print(result)
+            DocParser.write_image_with_results(
+                temp_file.name,
+                result=result,
+                save_path="./img_result/" + img_name + "_result." + img_type)
+    os.remove(temp_file.name)
 
 
 def visual_model_test(model_type, test_img, task_path, schema):
@@ -46,34 +52,40 @@ def visual_model_test(model_type, test_img, task_path, schema):
         imgs = split_image(test_img)
         layout = []
         temp_files_paths = []
+        doc_parser = DocParser(layout_analysis=False)
         for img in imgs:
             with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
-                cv2.imwrite(temp_file.name, img["img"])
+                angle = get_image_rotation_angle(img["img"])
+                rotated_img = rotate_image(img["img"], angle)
+                cv2.imwrite(temp_file.name, rotated_img)
                 temp_files_paths.append(temp_file.name)
-                ocr_layout = get_ocr_layout(temp_file.name)
+                parsed_doc = doc_parser.parse({"doc": temp_file.name})
                 if img["x_offset"] or img["y_offset"]:
-                    for box in ocr_layout:
+                    for p in parsed_doc["layout"]:
+                        box = p[0]
                         box[0] += img["x_offset"]
                         box[1] += img["y_offset"]
                         box[2] += img["x_offset"]
                         box[3] += img["y_offset"]
-                layout += ocr_layout
+                layout += parsed_doc["layout"]
 
-        write_visual_result(test_img, layout=layout)
+        write_visual_result(test_img, angle, layout=layout)
     else:
         docs = []
         split_result = split_image(test_img)
         temp_files_paths = []
         for img in split_result:
             with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
-                cv2.imwrite(temp_file.name, img["img"])
+                angle = get_image_rotation_angle(img["img"])
+                rotated_img = rotate_image(img["img"], angle)
+                cv2.imwrite(temp_file.name, rotated_img)
                 temp_files_paths.append(temp_file.name)
-                docs.append({"doc": temp_file.name, "layout": get_ocr_layout(temp_file.name)})
+                docs.append({"doc": temp_file.name})
 
         my_ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path,
                          layout_analysis=False)
         my_results = my_ie(docs)
-        write_visual_result(test_img, result=my_results[0])
+        write_visual_result(test_img, angle, result=my_results[0])
 
     # 使用完临时文件后，记得清理（删除）它们
     for path in temp_files_paths: