Files
fcb_photo_review/util/image_util.py

286 lines
9.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import logging
import math
import cv2
import fitz
import numpy
from PIL import Image
from util import common_util
def capture(image, rectangle):
"""
截取图片
:param image: ndarray
:param rectangle: 要截取的矩形
:return: 截取之后的ndarray图片
"""
x1, y1, x2, y2 = rectangle
height, width = image.shape[:2]
# 确保坐标值在图片范围内
x1 = max(0, x1)
y1 = max(0, y1)
x2 = min(width, x2)
y2 = min(height, y2)
return image[int(y1):int(y2), int(x1):int(x2)]
def split(img_path, ratio=1.414, overlap=0.05, x_compensation=3):
"""
分割图片
:param img_path: 图片路径
:param ratio: 分割后的比例
:param overlap: 图片之间的覆盖比例
:param x_compensation: 横向补偿倍率
:return: 分割后的图片组(NumPy数组形式)
"""
split_result = []
image = cv2.imread(img_path)
height, width = image.shape[:2]
hw_ratio = height / width
wh_ratio = width / height
img_name, img_ext = common_util.parse_save_path(img_path)
if hw_ratio > ratio: # 纵向过长
new_img_height = width * ratio
step = width * (ratio - overlap) # 偏移步长
for i in range(math.ceil(height / step)):
offset = round(step * i)
cropped_img = capture(image, [0, offset, width, offset + new_img_height])
split_path = common_util.get_processed_img_path(f'{img_name}.split_{i}.{img_ext}')
cv2.imwrite(split_path, cropped_img)
split_result.append({'img': split_path, 'x_offset': 0, 'y_offset': offset})
elif wh_ratio > ratio: # 横向过长
new_img_width = height * ratio
step = height * (ratio - overlap * x_compensation) # 一般文字是横向的,所以横向截取时增大重叠部分
for i in range(math.ceil(width / step)):
offset = round(step * i)
cropped_img = capture(image, [offset, 0, offset + new_img_width, width])
split_path = common_util.get_processed_img_path(f'{img_name}.split_{i}.{img_ext}')
cv2.imwrite(split_path, cropped_img)
split_result.append({'img': split_path, 'x_offset': offset, 'y_offset': 0})
else:
split_result.append({'img': img_path, 'x_offset': 0, 'y_offset': 0})
return split_result
# def parse_rotation_angles(image):
# """
# 判断图片旋转角度,逆时针旋转该角度后为正。可能值['0', '90', '180', '270']
# :param image: 图片NumPy数组或文件路径
# :return: 最有可能的两个角度
# """
# angles = ['0', '90']
# model = PaddleClas(model_name='text_image_orientation')
# clas_result = model.predict(input_data=image)
# try:
# clas_result = next(clas_result)[0]
# if clas_result['scores'][0] < 0.5:
# return angles
# angles = clas_result['label_names']
# except Exception as e:
# logging.error('获取图片旋转角度失败', exc_info=e)
# return angles
def rotate(img_path, angle):
"""
旋转图片
:param img_path: 图片NumPy数组
:param angle: 逆时针旋转角度
:return: 旋转后的图片NumPy数组
"""
if angle == 0:
return img_path
image = cv2.imread(img_path)
height, width = image.shape[:2]
if angle == 180:
new_width = width
new_height = height
else:
new_width = height
new_height = width
# 绕图像的中心旋转
# 参数:旋转中心 旋转度数 scale
matrix = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1)
# 旋转后平移
matrix[0, 2] += (new_width - width) / 2
matrix[1, 2] += (new_height - height) / 2
# 参数:原始图像 旋转参数 元素图像宽高
rotated = cv2.warpAffine(image, matrix, (new_width, new_height))
img_name, img_ext = common_util.parse_save_path(img_path)
rotated_path = common_util.get_processed_img_path(f'{img_name}.rotate_{angle}.{img_ext}')
cv2.imwrite(rotated_path, rotated)
return rotated_path
def invert_rotate_point(point, center, angle):
"""
反向旋转图片上的点
:param point: 点
:param center: 旋转中心
:param angle: 旋转角度
:return: 旋转后的点坐标
"""
matrix = cv2.getRotationMatrix2D(center, angle, 1)
if angle != 180:
# 旋转后平移
matrix[0, 2] += center[1] - center[0]
matrix[1, 2] += center[0] - center[1]
reverse_matrix = cv2.invertAffineTransform(matrix)
point = numpy.array([[point[0]], [point[1]], [1]])
return numpy.dot(reverse_matrix, point)
def invert_rotate_rectangle(rectangle, center, angle):
"""
反向旋转图片上的矩形
:param rectangle: 矩形
:param center: 旋转中心
:param angle: 旋转角度
:return: 旋转后的矩形坐标
"""
if angle == 0:
return list(rectangle)
x1, y1, x2, y2 = rectangle
# 计算矩形的四个顶点
top_left = (x1, y1)
bot_left = (x1, y2)
top_right = (x2, y1)
bot_right = (x2, y2)
# 旋转矩形的四个顶点
rot_top_left = invert_rotate_point(top_left, center, angle).astype(int)
rot_bot_left = invert_rotate_point(bot_left, center, angle).astype(int)
rot_bot_right = invert_rotate_point(bot_right, center, angle).astype(int)
rot_top_right = invert_rotate_point(top_right, center, angle).astype(int)
# 找出旋转后矩形的新左上角和右下角坐标
new_top_left = (min(rot_top_left[0], rot_bot_left[0], rot_bot_right[0], rot_top_right[0]),
min(rot_top_left[1], rot_bot_left[1], rot_bot_right[1], rot_top_right[1]))
new_bot_right = (max(rot_top_left[0], rot_bot_left[0], rot_bot_right[0], rot_top_right[0]),
max(rot_top_left[1], rot_bot_left[1], rot_bot_right[1], rot_top_right[1]))
return [new_top_left[0], new_top_left[1], new_bot_right[0], new_bot_right[1]]
def expand_to_a4_size(img_path):
"""
以尽量少的方式将图片扩充到a4大小
:param img_path: 图片路径
:return: 扩充后的图片NumPy数组和偏移量
"""
image = cv2.imread(img_path)
img_name, img_ext = common_util.parse_save_path(img_path)
height, width = image.shape[:2]
x_offset, y_offset = 0, 0
hw_ratio = height / width
if hw_ratio >= 1.42:
exp_w = int(height / 1.414 - width)
x_offset = int(exp_w / 2)
exp_img = numpy.zeros((height, x_offset, 3), dtype='uint8')
exp_img.fill(255)
image = numpy.hstack([exp_img, image, exp_img])
elif 1 <= hw_ratio <= 1.40:
exp_h = int(width * 1.414 - height)
y_offset = int(exp_h / 2)
exp_img = numpy.zeros((y_offset, width, 3), dtype='uint8')
exp_img.fill(255)
image = numpy.vstack([exp_img, image, exp_img])
elif 0.72 <= hw_ratio < 1:
exp_w = int(height * 1.414 - width)
x_offset = int(exp_w / 2)
exp_img = numpy.zeros((height, x_offset, 3), dtype='uint8')
exp_img.fill(255)
image = numpy.hstack([exp_img, image, exp_img])
elif hw_ratio <= 0.7:
exp_h = int(width / 1.414 - height)
y_offset = int(exp_h / 2)
exp_img = numpy.zeros((y_offset, width, 3), dtype='uint8')
exp_img.fill(255)
image = numpy.vstack([exp_img, image, exp_img])
else:
return img_path, 0, 0
save_path = common_util.get_processed_img_path(f'{img_name}.a4.{img_ext}')
cv2.imwrite(save_path, image)
return save_path, x_offset, y_offset
def combined(img1, img2):
# 获取两张图片的高度和宽度
height1, width1 = img1.shape[:2]
height2, width2 = img2.shape[:2]
# 确保两张图片的高度相同
if height1 != height2:
# 如果高度不同,调整较小高度的图片
if height1 < height2:
img1 = cv2.resize(img1, (int(width1 * height2 / height1), height2))
else:
img2 = cv2.resize(img2, (int(width2 * height1 / height2), height1))
# 再次获取调整后的图片尺寸
height1, width1 = img1.shape[:2]
height2, width2 = img2.shape[:2]
# 创建一个空白的图像,宽度等于两张图片的宽度之和,高度等于它们共同的高度
total_width = width1 + width2
max_height = max(height1, height2)
combined_img = numpy.zeros((max_height, total_width, 3), dtype=numpy.uint8)
# 将img1和img2复制到新的图像中
combined_img[:height1, :width1] = img1
combined_img[:height2, width1:width1 + width2] = img2
return combined_img
def is_photo(img_path):
"""
是否是拍照照片
:param img_path: 图片路径
:return: True:是照片False:可能不是照片,也可能在传输过程中相机等信息丢失了
"""
img = Image.open(img_path)
exif = img.getexif()
if exif:
# 271:相机制造商, 272:相机型号
if any(tag in exif for tag in (271, 272)):
return True
return False
def pdf_to_imgs(pdf_path, dpi=150):
pdf_file = None
# 定义缩放系数DPI
default_dpi = 72
zoom = dpi / default_dpi
try:
# 打开PDF文件
pdf_file = fitz.open(pdf_path)
pdf_imgs = []
for page in pdf_file:
# 设置矩阵变换参数
mat = fitz.Matrix(zoom, zoom)
# 渲染页面
pix = page.get_pixmap(matrix=mat)
# 将渲染结果转换为OpenCV兼容的格式
img = numpy.frombuffer(pix.samples, dtype=numpy.uint8).reshape((pix.height, pix.width, -1))
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
pdf_imgs.append([img, page.get_text()])
return pdf_imgs
except Exception as ex:
logging.getLogger('error').error('pdf转图片失败', exc_info=ex)
return None
finally:
if pdf_file:
pdf_file.close()