Files
fcb_photo_review/util/image_util.py
2024-07-15 15:21:10 +08:00

203 lines
6.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import logging
import math
import urllib.request
import cv2
import numpy
from paddleclas import PaddleClas
def read(image_path):
"""
从网络或本地读取图片
:param image_path: 网络或本地路径
:return: NumPy数组形式的图片
"""
if image_path.startswith("http"):
# 发送HTTP请求并获取图像数据
resp = urllib.request.urlopen(image_path)
# 将数据读取为字节流
image_data = resp.read()
# 将字节流转换为NumPy数组
image_np = numpy.frombuffer(image_data, numpy.uint8)
# 解码NumPy数组为OpenCV图像格式
image = cv2.imdecode(image_np, cv2.IMREAD_COLOR)
else:
image = cv2.imread(image_path)
return image
def capture(image, rectangle):
"""
截取图片
:param image: 图片NumPy数组
:param rectangle: 要截取的矩形
:return: 截取之后的图片NumPy
"""
x1, y1, x2, y2 = rectangle
return image[int(y1):int(y2), int(x1):int(x2)]
def split(image, ratio=1.414, overlap=0.05):
"""
分割图片,只分割过长的图片,暂不处理过宽的图片
:param image:图片可以是NumPy数组或文件路径
:param ratio: 分割后的比例
:param overlap: 图片之间的覆盖比例
:return: 分割后的图片组(NumPy数组形式)
"""
split_result = []
if isinstance(image, str):
image = read(image)
# 获取图片的宽度和高度
height, width = image.shape[:2]
# 计算宽高比
img_ratio = height / width
# 检查是否需要裁剪
if img_ratio > ratio:
split_ratio = ratio - overlap
# 分割后的高度
new_img_height = width * ratio
for i in range(math.ceil(height / (width * split_ratio))):
offset = round(width * split_ratio * i)
# 参数形式为[y1:y2, x1:x2]
cropped_img = capture(image, [0, offset, width, offset + new_img_height])
split_result.append({"img": cropped_img, "x_offset": 0, "y_offset": offset})
else:
split_result.append({"img": image, "x_offset": 0, "y_offset": 0})
return split_result
def parse_rotation_angles(image):
"""
判断图片旋转角度,逆时针旋转该角度后为正。可能值["0", "90", "180", "270"]
:param image: 图片NumPy数组或文件路径
:return: 最有可能的两个角度
"""
angles = ['0', '90']
model = PaddleClas(model_name="text_image_orientation")
clas_result = model.predict(input_data=image)
try:
clas_result = next(clas_result)[0]
if clas_result["scores"][0] < 0.5:
return angles
angles = clas_result["label_names"]
except Exception as e:
logging.error("获取图片旋转角度失败", exc_info=e)
return angles
def rotate(image, angle):
"""
旋转图片
:param image: 图片NumPy数组
:param angle: 逆时针旋转角度
:return: 旋转后的图片NumPy数组
"""
if angle == 0:
return image
height, width = image.shape[:2]
if angle == 180:
new_width = width
new_height = height
else:
new_width = height
new_height = width
# 绕图像的中心旋转
# 参数:旋转中心 旋转度数 scale
matrix = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1)
# 旋转后平移
matrix[0, 2] += (new_width - width) / 2
matrix[1, 2] += (new_height - height) / 2
# 参数:原始图像 旋转参数 元素图像宽高
rotated = cv2.warpAffine(image, matrix, (new_width, new_height))
return rotated
def invert_rotate_point(point, center, angle):
"""
反向旋转图片上的点
:param point: 点
:param center: 旋转中心
:param angle: 旋转角度
:return: 旋转后的点坐标
"""
matrix = cv2.getRotationMatrix2D(center, angle, 1)
if angle != 180:
# 旋转后平移
matrix[0, 2] += center[1] - center[0]
matrix[1, 2] += center[0] - center[1]
reverse_matrix = cv2.invertAffineTransform(matrix)
point = numpy.array([[point[0]], [point[1]], [1]])
return numpy.dot(reverse_matrix, point)
def invert_rotate_rectangle(rectangle, center, angle):
"""
反向旋转图片上的矩形
:param rectangle: 矩形
:param center: 旋转中心
:param angle: 旋转角度
:return: 旋转后的矩形坐标
"""
if angle == 0:
return list(rectangle)
x1, y1, x2, y2 = rectangle
# 计算矩形的四个顶点
top_left = (x1, y1)
bot_left = (x1, y2)
top_right = (x2, y1)
bot_right = (x2, y2)
# 旋转矩形的四个顶点
rot_top_left = invert_rotate_point(top_left, center, angle).astype(int)
rot_bot_left = invert_rotate_point(bot_left, center, angle).astype(int)
rot_bot_right = invert_rotate_point(bot_right, center, angle).astype(int)
rot_top_right = invert_rotate_point(top_right, center, angle).astype(int)
# 找出旋转后矩形的新左上角和右下角坐标
new_top_left = (min(rot_top_left[0], rot_bot_left[0], rot_bot_right[0], rot_top_right[0]),
min(rot_top_left[1], rot_bot_left[1], rot_bot_right[1], rot_top_right[1]))
new_bot_right = (max(rot_top_left[0], rot_bot_left[0], rot_bot_right[0], rot_top_right[0]),
max(rot_top_left[1], rot_bot_left[1], rot_bot_right[1], rot_top_right[1]))
return [new_top_left[0], new_top_left[1], new_bot_right[0], new_bot_right[1]]
def expand_to_a4_size(image, center=False):
"""
将图片扩充到a4大小
:param image: 图片NumPy数组
:param center: 是否将原图置于中间
:return: 扩充后的图片NumPy数组和偏移量
"""
h, w = image.shape[:2]
offset_x, offset_y = 0, 0
if h * 1.0 / w >= 1.42:
exp_w = int(h / 1.414 - w)
if center:
offset_x = int(exp_w / 2)
exp_img = numpy.zeros((h, offset_x, 3), dtype="uint8")
exp_img.fill(255)
image = numpy.hstack([exp_img, image, exp_img])
else:
exp_img = numpy.zeros((h, exp_w, 3), dtype="uint8")
exp_img.fill(255)
image = numpy.hstack([image, exp_img])
elif h * 1.0 / w <= 1.40:
exp_h = int(w * 1.414 - h)
if center:
offset_y = int(exp_h / 2)
exp_img = numpy.zeros((offset_y, w, 3), dtype="uint8")
exp_img.fill(255)
image = numpy.vstack([exp_img, image, exp_img])
else:
exp_img = numpy.zeros((exp_h, w, 3), dtype="uint8")
exp_img.fill(255)
image = numpy.vstack([image, exp_img])
return image, offset_x, offset_y