优化ocr模型,增加关键词
This commit is contained in:
@@ -9,7 +9,7 @@ from time import sleep
|
||||
import cv2
|
||||
import numpy as np
|
||||
import paddleclas
|
||||
from paddleocr import PaddleOCR
|
||||
from paddlenlp.utils.doc_parser import DocParser
|
||||
from sqlalchemy import update
|
||||
|
||||
from auto_email.error_email import send_an_error_email
|
||||
@@ -20,7 +20,7 @@ from config.ucloud import BUCKET
|
||||
from models import ZxPhrec, ZxPhhd
|
||||
from ucloud import ucloud
|
||||
|
||||
OCR = PaddleOCR(use_angle_cls=False, lang="ch", show_log=False, gpu_id=1)
|
||||
DOC_PARSER = DocParser(use_gpu=True, device_id=1)
|
||||
|
||||
|
||||
def open_image(img_path):
|
||||
@@ -258,7 +258,8 @@ def get_mask_layout(image, content):
|
||||
|
||||
result = []
|
||||
try:
|
||||
layouts = get_ocr_layout(OCR, temp_file.name)
|
||||
# layouts = get_ocr_layout(OCR, temp_file.name)
|
||||
layouts = DOC_PARSER.parse({"doc": temp_file.name})["layout"]
|
||||
if not layouts:
|
||||
# 无识别结果
|
||||
return result
|
||||
@@ -273,6 +274,8 @@ def get_mask_layout(image, content):
|
||||
result.append(find_box_of_value("交款人", layout))
|
||||
if "文款人" in layout[1]:
|
||||
result.append(find_box_of_value("文款人", layout))
|
||||
if "购买方名称" in layout[1]:
|
||||
result.append(find_box_of_value("购买方名称", layout))
|
||||
return result
|
||||
except Exception as e:
|
||||
logging.error("涂抹时出错", exc_info=e)
|
||||
|
||||
Reference in New Issue
Block a user