优化图片分类和图片方向判断
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
import logging
|
||||
import math
|
||||
|
||||
import cv2
|
||||
import fitz
|
||||
import numpy
|
||||
from PIL import Image
|
||||
|
||||
@@ -27,7 +29,7 @@ def capture(image, rectangle):
|
||||
def split(img_path, ratio=1.414, overlap=0.05, x_compensation=3):
|
||||
"""
|
||||
分割图片
|
||||
:param img_path:图片路径
|
||||
:param img_path: 图片路径
|
||||
:param ratio: 分割后的比例
|
||||
:param overlap: 图片之间的覆盖比例
|
||||
:param x_compensation: 横向补偿倍率
|
||||
@@ -251,3 +253,33 @@ def is_photo(img_path):
|
||||
if any(tag in exif for tag in (271, 272)):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def pdf_to_imgs(pdf_path, dpi=150):
|
||||
pdf_file = None
|
||||
# 定义缩放系数(DPI)
|
||||
default_dpi = 72
|
||||
zoom = dpi / default_dpi
|
||||
try:
|
||||
# 打开PDF文件
|
||||
pdf_file = fitz.open(pdf_path)
|
||||
|
||||
pdf_imgs = []
|
||||
for page in pdf_file:
|
||||
# 设置矩阵变换参数
|
||||
mat = fitz.Matrix(zoom, zoom)
|
||||
# 渲染页面
|
||||
pix = page.get_pixmap(matrix=mat)
|
||||
# 将渲染结果转换为OpenCV兼容的格式
|
||||
img = numpy.frombuffer(pix.samples, dtype=numpy.uint8).reshape((pix.height, pix.width, -1))
|
||||
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
||||
pdf_imgs.append([img, page.get_text()])
|
||||
|
||||
return pdf_imgs
|
||||
|
||||
except Exception as ex:
|
||||
logging.getLogger('error').error('pdf转图片失败!', exc_info=ex)
|
||||
return None
|
||||
finally:
|
||||
if pdf_file:
|
||||
pdf_file.close()
|
||||
|
||||
Reference in New Issue
Block a user