优化ocr模型,增加关键词
This commit is contained in:
@@ -9,7 +9,7 @@ from time import sleep
|
|||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import paddleclas
|
import paddleclas
|
||||||
from paddleocr import PaddleOCR
|
from paddlenlp.utils.doc_parser import DocParser
|
||||||
from sqlalchemy import update
|
from sqlalchemy import update
|
||||||
|
|
||||||
from auto_email.error_email import send_an_error_email
|
from auto_email.error_email import send_an_error_email
|
||||||
@@ -20,7 +20,7 @@ from config.ucloud import BUCKET
|
|||||||
from models import ZxPhrec, ZxPhhd
|
from models import ZxPhrec, ZxPhhd
|
||||||
from ucloud import ucloud
|
from ucloud import ucloud
|
||||||
|
|
||||||
OCR = PaddleOCR(use_angle_cls=False, lang="ch", show_log=False, gpu_id=1)
|
DOC_PARSER = DocParser(use_gpu=True, device_id=1)
|
||||||
|
|
||||||
|
|
||||||
def open_image(img_path):
|
def open_image(img_path):
|
||||||
@@ -258,7 +258,8 @@ def get_mask_layout(image, content):
|
|||||||
|
|
||||||
result = []
|
result = []
|
||||||
try:
|
try:
|
||||||
layouts = get_ocr_layout(OCR, temp_file.name)
|
# layouts = get_ocr_layout(OCR, temp_file.name)
|
||||||
|
layouts = DOC_PARSER.parse({"doc": temp_file.name})["layout"]
|
||||||
if not layouts:
|
if not layouts:
|
||||||
# 无识别结果
|
# 无识别结果
|
||||||
return result
|
return result
|
||||||
@@ -273,6 +274,8 @@ def get_mask_layout(image, content):
|
|||||||
result.append(find_box_of_value("交款人", layout))
|
result.append(find_box_of_value("交款人", layout))
|
||||||
if "文款人" in layout[1]:
|
if "文款人" in layout[1]:
|
||||||
result.append(find_box_of_value("文款人", layout))
|
result.append(find_box_of_value("文款人", layout))
|
||||||
|
if "购买方名称" in layout[1]:
|
||||||
|
result.append(find_box_of_value("购买方名称", layout))
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("涂抹时出错", exc_info=e)
|
logging.error("涂抹时出错", exc_info=e)
|
||||||
|
|||||||
Reference in New Issue
Block a user