ocr配置的cls只能区分0度与180度，不符合需求，更换为paddleclas中的图片方向识别模型

2024-06-25 14:13:43 +08:00
parent e8dd62e1f5
commit fc0c54fbd2
3 changed files with 69 additions and 62 deletions
--- a/visual_model_test/visual_model_test.py
+++ b/visual_model_test/visual_model_test.py
@@ -10,13 +10,13 @@ import cv2

 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

-from photo_review.photo_review import split_image, get_ocr_layout
+from photo_review.photo_review import split_image, get_image_rotation_angle, rotate_image, open_image
 from paddlenlp import Taskflow
 from paddlenlp.utils.doc_parser import DocParser
 from ucloud import ucloud


-def write_visual_result(image, layout=None, result=None):
+def write_visual_result(image, angle=0, layout=None, result=None):
    img = image.split("?")[0]
    img = re.split(r'[\\/]', img)[-1]
    img_name = ""
@@ -26,19 +26,25 @@ def write_visual_result(image, layout=None, result=None):
        img_name = img[:last_dot_index]
        img_type = img[last_dot_index + 1:]

-    if layout:
-        print(layout)
-        DocParser.write_image_with_results(
-            image,
-            layout=layout,
-            save_path="./img_result/" + img_name + "_layout." + img_type)
+    img_array = open_image(image)
+    if angle != 0:
+        img_array = rotate_image(img_array, angle)
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
+        cv2.imwrite(temp_file.name, img_array)
+        if layout:
+            print(layout)
+            DocParser.write_image_with_results(
+                temp_file.name,
+                layout=layout,
+                save_path="./img_result/" + img_name + "_layout." + img_type)

-    if result:
-        print(result)
-        DocParser.write_image_with_results(
-            image,
-            result=result,
-            save_path="./img_result/" + img_name + "_result." + img_type)
+        if result:
+            print(result)
+            DocParser.write_image_with_results(
+                temp_file.name,
+                result=result,
+                save_path="./img_result/" + img_name + "_result." + img_type)
+    os.remove(temp_file.name)


 def visual_model_test(model_type, test_img, task_path, schema):
@@ -46,34 +52,40 @@ def visual_model_test(model_type, test_img, task_path, schema):
        imgs = split_image(test_img)
        layout = []
        temp_files_paths = []
+        doc_parser = DocParser(layout_analysis=False)
        for img in imgs:
            with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
-                cv2.imwrite(temp_file.name, img["img"])
+                angle = get_image_rotation_angle(img["img"])
+                rotated_img = rotate_image(img["img"], angle)
+                cv2.imwrite(temp_file.name, rotated_img)
                temp_files_paths.append(temp_file.name)
-                ocr_layout = get_ocr_layout(temp_file.name)
+                parsed_doc = doc_parser.parse({"doc": temp_file.name})
                if img["x_offset"] or img["y_offset"]:
-                    for box in ocr_layout:
+                    for p in parsed_doc["layout"]:
+                        box = p[0]
                        box[0] += img["x_offset"]
                        box[1] += img["y_offset"]
                        box[2] += img["x_offset"]
                        box[3] += img["y_offset"]
-                layout += ocr_layout
+                layout += parsed_doc["layout"]

-        write_visual_result(test_img, layout=layout)
+        write_visual_result(test_img, angle, layout=layout)
    else:
        docs = []
        split_result = split_image(test_img)
        temp_files_paths = []
        for img in split_result:
            with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
-                cv2.imwrite(temp_file.name, img["img"])
+                angle = get_image_rotation_angle(img["img"])
+                rotated_img = rotate_image(img["img"], angle)
+                cv2.imwrite(temp_file.name, rotated_img)
                temp_files_paths.append(temp_file.name)
-                docs.append({"doc": temp_file.name, "layout": get_ocr_layout(temp_file.name)})
+                docs.append({"doc": temp_file.name})

        my_ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path,
                         layout_analysis=False)
        my_results = my_ie(docs)
-        write_visual_result(test_img, result=my_results[0])
+        write_visual_result(test_img, angle, result=my_results[0])

    # 使用完临时文件后，记得清理（删除）它们
    for path in temp_files_paths: