优化案子处理逻辑

2024-10-09 09:39:29 +08:00
parent a3fa1e502e
commit 795134f566
10 changed files with 257 additions and 304 deletions
--- a/util/common_util.py
+++ b/util/common_util.py
@@ -12,6 +12,44 @@ def get_default_datetime():
    return datetime.now().strftime('%Y-%m-%d %H:%M:%S')


+def ocr_result_to_layout(ocr_result):
+    def _get_box(old_box):
+        new_box = [
+            min(old_box[0][0], old_box[3][0]),  # x1
+            min(old_box[0][1], old_box[1][1]),  # y1
+            max(old_box[1][0], old_box[2][0]),  # x2
+            max(old_box[2][1], old_box[3][1]),  # y2
+        ]
+        return new_box
+
+    def _normal_box(box_data):
+        # Ensure the height and width of bbox are greater than zero
+        if box_data[3] - box_data[1] < 0 or box_data[2] - box_data[0] < 0:
+            return False
+        return True
+
+    layout = []
+    if not ocr_result:
+        return layout
+    for segment in ocr_result:
+        box = segment[0]
+        box = _get_box(box)
+        if not _normal_box(box):
+            continue
+        text = segment[1][0]
+        layout.append((box, text))
+    return layout
+
+
+def ocr_result_to_text(ocr_results):
+    text = ''
+    for ocr_result in ocr_results:
+        text += ocr_result[1][0]
+        if len(text) >= 2048:
+            break
+    return text[:2048]
+
+
 def get_ocr_layout(ocr, img_path):
    """
    获取ocr识别的结果，转为合适的layout形式