添加可视化测试中的长图处理
This commit is contained in:
@@ -2,13 +2,16 @@
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
import tempfile
|
||||||
import time
|
import time
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
|
|
||||||
from paddlenlp import Taskflow
|
from photo_review.photo_review import split_image
|
||||||
from paddlenlp.utils.doc_parser import DocParser
|
|
||||||
|
|
||||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from paddlenlp import Taskflow
|
||||||
|
from paddlenlp.utils.doc_parser import DocParser
|
||||||
from ucloud import ucloud
|
from ucloud import ucloud
|
||||||
|
|
||||||
|
|
||||||
@@ -39,22 +42,55 @@ def write_visual_result(image, layout=None, result=None):
|
|||||||
|
|
||||||
def visual_model_test(model_type, test_img, task_path, schema):
|
def visual_model_test(model_type, test_img, task_path, schema):
|
||||||
if model_type == "ocr":
|
if model_type == "ocr":
|
||||||
doc_parser = DocParser(layout_analysis=True)
|
imgs = split_image(test_img)
|
||||||
parsed_doc = doc_parser.parse({"doc": test_img})
|
layout = []
|
||||||
write_visual_result(test_img, layout=parsed_doc["layout"])
|
temp_files_paths = []
|
||||||
|
doc_parser = DocParser(layout_analysis=False)
|
||||||
|
for img in imgs:
|
||||||
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
||||||
|
img["img"].save(temp_file.name)
|
||||||
|
temp_files_paths.append(temp_file.name)
|
||||||
|
parsed_doc = doc_parser.parse({"doc": temp_file.name}, expand_to_a4_size=True)
|
||||||
|
if img["x_offset"] or img["y_offset"]:
|
||||||
|
for p in parsed_doc["layout"]:
|
||||||
|
box = p[0]
|
||||||
|
box[0] += img["x_offset"]
|
||||||
|
box[1] += img["y_offset"]
|
||||||
|
box[2] += img["x_offset"]
|
||||||
|
box[3] += img["y_offset"]
|
||||||
|
layout += parsed_doc["layout"]
|
||||||
|
|
||||||
|
write_visual_result(test_img, layout=layout)
|
||||||
else:
|
else:
|
||||||
|
docs = []
|
||||||
|
split_result = split_image(test_img)
|
||||||
|
temp_files_paths = []
|
||||||
|
for img in split_result:
|
||||||
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
||||||
|
img["img"].save(temp_file.name)
|
||||||
|
temp_files_paths.append(temp_file.name)
|
||||||
|
docs.append({"doc": temp_file.name})
|
||||||
|
|
||||||
my_ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path,
|
my_ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path,
|
||||||
layout_analysis=False)
|
layout_analysis=False)
|
||||||
my_results = my_ie({"doc": test_img})
|
my_results = my_ie(docs)
|
||||||
write_visual_result(test_img, result=my_results[0])
|
write_visual_result(test_img, result=my_results[0])
|
||||||
|
|
||||||
|
# 使用完临时文件后,记得清理(删除)它们
|
||||||
|
for path in temp_files_paths:
|
||||||
|
try:
|
||||||
|
os.remove(path)
|
||||||
|
print(f"临时文件 {path} 已删除")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"删除临时文件 {path} 时出错: {e}")
|
||||||
|
|
||||||
|
|
||||||
def batch_test(test_imgs, task_path, schema):
|
def batch_test(test_imgs, task_path, schema):
|
||||||
docs = []
|
docs = []
|
||||||
for test_img in test_imgs:
|
for test_img in test_imgs:
|
||||||
docs.append({"doc": test_img})
|
docs.append({"doc": test_img})
|
||||||
my_ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path,
|
my_ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path,
|
||||||
layout_analysis=True, batch_size=16)
|
layout_analysis=False, batch_size=16)
|
||||||
# 批量抽取写法:(ie([{"doc": "./data/6.jpg"}, {"doc": "./data/7.jpg"}])
|
# 批量抽取写法:(ie([{"doc": "./data/6.jpg"}, {"doc": "./data/7.jpg"}])
|
||||||
my_results = my_ie(docs)
|
my_results = my_ie(docs)
|
||||||
pprint(my_results)
|
pprint(my_results)
|
||||||
|
|||||||
Reference in New Issue
Block a user