优化旋转图片的识别条件
This commit is contained in:
@@ -35,8 +35,6 @@ from photo_review.util.data_util import handle_date, handle_decimal, parse_depar
|
|||||||
from photo_review.util.util import get_default_datetime
|
from photo_review.util.util import get_default_datetime
|
||||||
from ucloud import ucloud
|
from ucloud import ucloud
|
||||||
|
|
||||||
OCR = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
|
|
||||||
|
|
||||||
|
|
||||||
# 获取图片
|
# 获取图片
|
||||||
def open_image(img_path):
|
def open_image(img_path):
|
||||||
@@ -137,7 +135,7 @@ def rotate_image(img, angle):
|
|||||||
|
|
||||||
|
|
||||||
# 获取图片OCR,并将其box转为两点矩形框
|
# 获取图片OCR,并将其box转为两点矩形框
|
||||||
def get_ocr_layout(img_path):
|
def get_ocr_layout(ocr, img_path):
|
||||||
def _get_box(old_box):
|
def _get_box(old_box):
|
||||||
new_box = [
|
new_box = [
|
||||||
min(old_box[0][0], old_box[3][0]), # x1
|
min(old_box[0][0], old_box[3][0]), # x1
|
||||||
@@ -154,7 +152,7 @@ def get_ocr_layout(img_path):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
layout = []
|
layout = []
|
||||||
ocr_result = OCR.ocr(img_path)
|
ocr_result = ocr.ocr(img_path)
|
||||||
ocr_result = ocr_result[0]
|
ocr_result = ocr_result[0]
|
||||||
if not ocr_result:
|
if not ocr_result:
|
||||||
return layout
|
return layout
|
||||||
@@ -168,11 +166,34 @@ def get_ocr_layout(img_path):
|
|||||||
return layout
|
return layout
|
||||||
|
|
||||||
|
|
||||||
|
def ie_temp_image(ie, ocr, image):
|
||||||
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
||||||
|
cv2.imwrite(temp_file.name, image)
|
||||||
|
|
||||||
|
ie_result = []
|
||||||
|
try:
|
||||||
|
layout = get_ocr_layout(ocr, temp_file.name)
|
||||||
|
if not layout:
|
||||||
|
# 无识别结果
|
||||||
|
ie_result = []
|
||||||
|
else:
|
||||||
|
ie_result = ie({"doc": temp_file.name, "layout": layout})[0]
|
||||||
|
except Exception as e:
|
||||||
|
logging.error("信息抽取时出错", exc_info=e)
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
os.remove(temp_file.name)
|
||||||
|
except Exception as e:
|
||||||
|
logging.info(f"删除临时文件 {temp_file.name} 时出错", exc_info=e)
|
||||||
|
return ie_result
|
||||||
|
|
||||||
|
|
||||||
# 关键信息提取
|
# 关键信息提取
|
||||||
def information_extraction(ie, phrecs):
|
def information_extraction(ie, phrecs):
|
||||||
result = {}
|
result = {}
|
||||||
# 同一批图的标识
|
# 同一批图的标识
|
||||||
identity = int(time.time())
|
identity = int(time.time())
|
||||||
|
ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
|
||||||
for phrec in phrecs:
|
for phrec in phrecs:
|
||||||
pic_path = ucloud.get_private_url(phrec.cfjaddress)
|
pic_path = ucloud.get_private_url(phrec.cfjaddress)
|
||||||
if not pic_path:
|
if not pic_path:
|
||||||
@@ -180,39 +201,26 @@ def information_extraction(ie, phrecs):
|
|||||||
split_result = split_image(pic_path)
|
split_result = split_image(pic_path)
|
||||||
|
|
||||||
for img in split_result:
|
for img in split_result:
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file1:
|
ie_result1 = ie_temp_image(ie, ocr, img["img"])
|
||||||
cv2.imwrite(temp_file1.name, img["img"])
|
if not ie_result1 or len(ie_result1) < len(ie.kwargs.get("schema")):
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file2:
|
|
||||||
rotated_img = rotate_image(img["img"], 90)
|
rotated_img = rotate_image(img["img"], 90)
|
||||||
cv2.imwrite(temp_file2.name, rotated_img)
|
ie_result2 = ie_temp_image(ie, ocr, rotated_img)
|
||||||
|
if not (ie_result1 or ie_result2):
|
||||||
ie_result = []
|
|
||||||
try:
|
|
||||||
layout1 = get_ocr_layout(temp_file1.name)
|
|
||||||
layout2 = get_ocr_layout(temp_file2.name)
|
|
||||||
if not (layout1 or layout2):
|
|
||||||
# 无识别结果
|
|
||||||
continue
|
continue
|
||||||
ie_result = ie([{"doc": temp_file1.name, "layout": layout1},
|
elif not ie_result1:
|
||||||
{"doc": temp_file2.name, "layout": layout2}])
|
ie_result = ie_result2
|
||||||
except Exception as e:
|
angle = 90
|
||||||
logging.error("信息抽取时出错", exc_info=e)
|
elif not ie_result2:
|
||||||
finally:
|
ie_result = ie_result1
|
||||||
try:
|
angle = 0
|
||||||
os.remove(temp_file1.name)
|
elif len(ie_result2) > len(ie_result1):
|
||||||
except Exception as e:
|
ie_result = ie_result2
|
||||||
logging.info(f"删除临时文件 {temp_file1.name} 时出错", exc_info=e)
|
angle = 90
|
||||||
try:
|
else:
|
||||||
os.remove(temp_file2.name)
|
ie_result = ie_result1
|
||||||
except Exception as e:
|
angle = 0
|
||||||
logging.info(f"删除临时文件 {temp_file2.name} 时出错", exc_info=e)
|
|
||||||
|
|
||||||
if len(ie_result[1]) > len(ie_result[0]):
|
|
||||||
# 旋转90度后识别效果更好
|
|
||||||
ie_result = ie_result[1]
|
|
||||||
angle = 90
|
|
||||||
else:
|
else:
|
||||||
ie_result = ie_result[0]
|
ie_result = ie_result1
|
||||||
angle = 0
|
angle = 0
|
||||||
|
|
||||||
now = get_default_datetime()
|
now = get_default_datetime()
|
||||||
|
|||||||
Reference in New Issue
Block a user