优化ocr模型,增加关键词

This commit is contained in:
2024-07-10 17:09:00 +08:00
parent c11c0c0c6e
commit f89f68abd1

View File

@@ -9,7 +9,7 @@ from time import sleep
import cv2
import numpy as np
import paddleclas
from paddleocr import PaddleOCR
from paddlenlp.utils.doc_parser import DocParser
from sqlalchemy import update
from auto_email.error_email import send_an_error_email
@@ -20,7 +20,7 @@ from config.ucloud import BUCKET
from models import ZxPhrec, ZxPhhd
from ucloud import ucloud
OCR = PaddleOCR(use_angle_cls=False, lang="ch", show_log=False, gpu_id=1)
DOC_PARSER = DocParser(use_gpu=True, device_id=1)
def open_image(img_path):
@@ -258,7 +258,8 @@ def get_mask_layout(image, content):
result = []
try:
layouts = get_ocr_layout(OCR, temp_file.name)
# layouts = get_ocr_layout(OCR, temp_file.name)
layouts = DOC_PARSER.parse({"doc": temp_file.name})["layout"]
if not layouts:
# 无识别结果
return result
@@ -273,6 +274,8 @@ def get_mask_layout(image, content):
result.append(find_box_of_value("交款人", layout))
if "文款人" in layout[1]:
result.append(find_box_of_value("文款人", layout))
if "购买方名称" in layout[1]:
result.append(find_box_of_value("购买方名称", layout))
return result
except Exception as e:
logging.error("涂抹时出错", exc_info=e)