Files
fcb_photo_review/visual_model_test/visual_model_test.py
2024-06-05 08:45:14 +08:00

96 lines
3.7 KiB
Python

# 可视化的模型对比测试
import os
import re
import sys
import time
from pprint import pprint
from paddlenlp import Taskflow
from paddlenlp.utils.doc_parser import DocParser
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from photo_review.util.ucloud import get_private_url
def write_visual_result(image, layout=None, result=None):
img = re.split(r'[\\/]', image)[-1]
img = img.split("?")[0]
img_name = ""
img_type = "jpg"
last_dot_index = img.rfind(".")
if last_dot_index != -1:
img_name = img[:last_dot_index]
img_type = img[last_dot_index + 1:]
if layout:
print(layout)
DocParser.write_image_with_results(
image,
layout=layout,
save_path="./img_result/" + img_name + "_layout." + img_type)
if result:
print(result)
DocParser.write_image_with_results(
image,
result=result,
save_path="./img_result/" + img_name + "_result." + img_type)
def visual_model_test(model_type, test_img, task_path, schema):
if model_type == "ocr":
doc_parser = DocParser(layout_analysis=True)
parsed_doc = doc_parser.parse({"doc": test_img})
write_visual_result(test_img, layout=parsed_doc["layout"])
else:
my_ie = Taskflow("information_extraction", schema=schema, model="uie-x-base", task_path=task_path,
layout_analysis=True)
my_results = my_ie({"doc": test_img})
write_visual_result(test_img, result=my_results[0])
def main(model_type, pic_name=None):
# 开始时间
start_time = time.time()
if model_type == "ocr":
task_path = None
test_img_path = get_private_url(pic_name) if pic_name else "img/PH20240428000832_1_093844_2.jpg"
schema = None
elif model_type == "settlement":
task_path = "../config/model/settlement_list_model"
test_img_path = get_private_url(pic_name) if pic_name else "img/PH20240511000638_1_094306_1.jpg"
schema = ["姓名", "入院日期", "出院日期", "费用总额", "个人现金支付", "个人账户支付", "自费", "医保类型"]
elif model_type == "discharge":
task_path = "../config/model/discharge_record_model"
test_img_path = get_private_url(pic_name) if pic_name else "img/PH20240401000003_3_001938_2.jpg"
schema = ["医院", "科别", "姓名", "入院日期", "出院日期", "主治医生"]
elif model_type == "cost":
task_path = "../config/model/cost_list_model"
test_img_path = get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg"
schema = ["姓名", "入院日期", "出院日期", "费用总额"]
elif model_type == "cost_detail":
task_path = "../config/model/cost_list_detail_model"
test_img_path = get_private_url(pic_name) if pic_name else "img/PH20240511000648_4_094542_2.jpg"
schema = {"名称": ["类别", "规格", "单价", "数量", "金额"]}
else:
print("请输入正确的类型!")
return
visual_model_test(model_type, test_img_path, task_path, schema)
# 结束时间
end_time = time.time()
pprint(f"处理时长:{end_time - start_time}")
if __name__ == '__main__':
# main("ocr")
main("settlement")
# main("discharge")
# main("cost")
# main("cost_detail")
# write_visual_result("img/PH20240428000832_1_093844_2.jpg", layout=[([508.0975609756094,
# 659.7073170731707,
# 1000,
# 745.756097560976], 'lay', 'figure')])