优化ocr模型，增加关键词

2024-07-10 17:09:00 +08:00
parent c11c0c0c6e
commit f89f68abd1
1 changed files with 6 additions and 3 deletions
--- a/photo_mask.py
+++ b/photo_mask.py
@@ -9,7 +9,7 @@ from time import sleep
 import cv2
 import numpy as np
 import paddleclas
-from paddleocr import PaddleOCR
+from paddlenlp.utils.doc_parser import DocParser
 from sqlalchemy import update

 from auto_email.error_email import send_an_error_email
@@ -20,7 +20,7 @@ from config.ucloud import BUCKET
 from models import ZxPhrec, ZxPhhd
 from ucloud import ucloud

-OCR = PaddleOCR(use_angle_cls=False, lang="ch", show_log=False, gpu_id=1)
+DOC_PARSER = DocParser(use_gpu=True, device_id=1)


 def open_image(img_path):
@@ -258,7 +258,8 @@ def get_mask_layout(image, content):

    result = []
    try:
-        layouts = get_ocr_layout(OCR, temp_file.name)
+        # layouts = get_ocr_layout(OCR, temp_file.name)
+        layouts = DOC_PARSER.parse({"doc": temp_file.name})["layout"]
        if not layouts:
            # 无识别结果
            return result
@@ -273,6 +274,8 @@ def get_mask_layout(image, content):
                    result.append(find_box_of_value("交款人", layout))
                if "文款人" in layout[1]:
                    result.append(find_box_of_value("文款人", layout))
+                if "购买方名称" in layout[1]:
+                    result.append(find_box_of_value("购买方名称", layout))
            return result
    except Exception as e:
        logging.error("涂抹时出错", exc_info=e)