文档检测
This commit is contained in:
5
object_detection/core/sample/__init__.py
Normal file
5
object_detection/core/sample/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from .cornernet import cornernet
|
||||
from .cornernet_saccade import cornernet_saccade
|
||||
|
||||
def data_sampling_func(sys_configs, db, k_ind, data_aug=True, debug=False):
|
||||
return globals()[sys_configs.sampling_function](sys_configs, db, k_ind, data_aug, debug)
|
||||
164
object_detection/core/sample/cornernet.py
Normal file
164
object_detection/core/sample/cornernet.py
Normal file
@@ -0,0 +1,164 @@
|
||||
import math
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from .utils import random_crop, draw_gaussian, gaussian_radius, normalize_, color_jittering_, lighting_
|
||||
|
||||
|
||||
def _resize_image(image, detections, size):
|
||||
detections = detections.copy()
|
||||
height, width = image.shape[0:2]
|
||||
new_height, new_width = size
|
||||
|
||||
image = cv2.resize(image, (new_width, new_height))
|
||||
|
||||
height_ratio = new_height / height
|
||||
width_ratio = new_width / width
|
||||
detections[:, 0:4:2] *= width_ratio
|
||||
detections[:, 1:4:2] *= height_ratio
|
||||
return image, detections
|
||||
|
||||
|
||||
def _clip_detections(image, detections):
|
||||
detections = detections.copy()
|
||||
height, width = image.shape[0:2]
|
||||
|
||||
detections[:, 0:4:2] = np.clip(detections[:, 0:4:2], 0, width - 1)
|
||||
detections[:, 1:4:2] = np.clip(detections[:, 1:4:2], 0, height - 1)
|
||||
keep_inds = ((detections[:, 2] - detections[:, 0]) > 0) & \
|
||||
((detections[:, 3] - detections[:, 1]) > 0)
|
||||
detections = detections[keep_inds]
|
||||
return detections
|
||||
|
||||
|
||||
def cornernet(system_configs, db, k_ind, data_aug, debug):
|
||||
data_rng = system_configs.data_rng
|
||||
batch_size = system_configs.batch_size
|
||||
|
||||
categories = db.configs["categories"]
|
||||
input_size = db.configs["input_size"]
|
||||
output_size = db.configs["output_sizes"][0]
|
||||
|
||||
border = db.configs["border"]
|
||||
lighting = db.configs["lighting"]
|
||||
rand_crop = db.configs["rand_crop"]
|
||||
rand_color = db.configs["rand_color"]
|
||||
rand_scales = db.configs["rand_scales"]
|
||||
gaussian_bump = db.configs["gaussian_bump"]
|
||||
gaussian_iou = db.configs["gaussian_iou"]
|
||||
gaussian_rad = db.configs["gaussian_radius"]
|
||||
|
||||
max_tag_len = 128
|
||||
|
||||
# allocating memory
|
||||
images = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32)
|
||||
tl_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32)
|
||||
br_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32)
|
||||
tl_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
|
||||
br_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
|
||||
tl_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
|
||||
br_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
|
||||
tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8)
|
||||
tag_lens = np.zeros((batch_size,), dtype=np.int32)
|
||||
|
||||
db_size = db.db_inds.size
|
||||
for b_ind in range(batch_size):
|
||||
if not debug and k_ind == 0:
|
||||
db.shuffle_inds()
|
||||
|
||||
db_ind = db.db_inds[k_ind]
|
||||
k_ind = (k_ind + 1) % db_size
|
||||
|
||||
# reading image
|
||||
image_path = db.image_path(db_ind)
|
||||
image = cv2.imread(image_path)
|
||||
|
||||
# reading detections
|
||||
detections = db.detections(db_ind)
|
||||
|
||||
# cropping an image randomly
|
||||
if not debug and rand_crop:
|
||||
image, detections = random_crop(image, detections, rand_scales, input_size, border=border)
|
||||
|
||||
image, detections = _resize_image(image, detections, input_size)
|
||||
detections = _clip_detections(image, detections)
|
||||
|
||||
width_ratio = output_size[1] / input_size[1]
|
||||
height_ratio = output_size[0] / input_size[0]
|
||||
|
||||
# flipping an image randomly
|
||||
if not debug and np.random.uniform() > 0.5:
|
||||
image[:] = image[:, ::-1, :]
|
||||
width = image.shape[1]
|
||||
detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1
|
||||
|
||||
if not debug:
|
||||
image = image.astype(np.float32) / 255.
|
||||
if rand_color:
|
||||
color_jittering_(data_rng, image)
|
||||
if lighting:
|
||||
lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec)
|
||||
normalize_(image, db.mean, db.std)
|
||||
images[b_ind] = image.transpose((2, 0, 1))
|
||||
|
||||
for ind, detection in enumerate(detections):
|
||||
category = int(detection[-1]) - 1
|
||||
|
||||
xtl, ytl = detection[0], detection[1]
|
||||
xbr, ybr = detection[2], detection[3]
|
||||
|
||||
fxtl = (xtl * width_ratio)
|
||||
fytl = (ytl * height_ratio)
|
||||
fxbr = (xbr * width_ratio)
|
||||
fybr = (ybr * height_ratio)
|
||||
|
||||
xtl = int(fxtl)
|
||||
ytl = int(fytl)
|
||||
xbr = int(fxbr)
|
||||
ybr = int(fybr)
|
||||
|
||||
if gaussian_bump:
|
||||
width = detection[2] - detection[0]
|
||||
height = detection[3] - detection[1]
|
||||
|
||||
width = math.ceil(width * width_ratio)
|
||||
height = math.ceil(height * height_ratio)
|
||||
|
||||
if gaussian_rad == -1:
|
||||
radius = gaussian_radius((height, width), gaussian_iou)
|
||||
radius = max(0, int(radius))
|
||||
else:
|
||||
radius = gaussian_rad
|
||||
|
||||
draw_gaussian(tl_heatmaps[b_ind, category], [xtl, ytl], radius)
|
||||
draw_gaussian(br_heatmaps[b_ind, category], [xbr, ybr], radius)
|
||||
else:
|
||||
tl_heatmaps[b_ind, category, ytl, xtl] = 1
|
||||
br_heatmaps[b_ind, category, ybr, xbr] = 1
|
||||
|
||||
tag_ind = tag_lens[b_ind]
|
||||
tl_regrs[b_ind, tag_ind, :] = [fxtl - xtl, fytl - ytl]
|
||||
br_regrs[b_ind, tag_ind, :] = [fxbr - xbr, fybr - ybr]
|
||||
tl_tags[b_ind, tag_ind] = ytl * output_size[1] + xtl
|
||||
br_tags[b_ind, tag_ind] = ybr * output_size[1] + xbr
|
||||
tag_lens[b_ind] += 1
|
||||
|
||||
for b_ind in range(batch_size):
|
||||
tag_len = tag_lens[b_ind]
|
||||
tag_masks[b_ind, :tag_len] = 1
|
||||
|
||||
images = torch.from_numpy(images)
|
||||
tl_heatmaps = torch.from_numpy(tl_heatmaps)
|
||||
br_heatmaps = torch.from_numpy(br_heatmaps)
|
||||
tl_regrs = torch.from_numpy(tl_regrs)
|
||||
br_regrs = torch.from_numpy(br_regrs)
|
||||
tl_tags = torch.from_numpy(tl_tags)
|
||||
br_tags = torch.from_numpy(br_tags)
|
||||
tag_masks = torch.from_numpy(tag_masks)
|
||||
|
||||
return {
|
||||
"xs": [images],
|
||||
"ys": [tl_heatmaps, br_heatmaps, tag_masks, tl_regrs, br_regrs, tl_tags, br_tags]
|
||||
}, k_ind
|
||||
293
object_detection/core/sample/cornernet_saccade.py
Normal file
293
object_detection/core/sample/cornernet_saccade.py
Normal file
@@ -0,0 +1,293 @@
|
||||
import math
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from .utils import draw_gaussian, gaussian_radius, normalize_, color_jittering_, lighting_, crop_image
|
||||
|
||||
|
||||
def bbox_overlaps(a_dets, b_dets):
|
||||
a_widths = a_dets[:, 2] - a_dets[:, 0]
|
||||
a_heights = a_dets[:, 3] - a_dets[:, 1]
|
||||
a_areas = a_widths * a_heights
|
||||
|
||||
b_widths = b_dets[:, 2] - b_dets[:, 0]
|
||||
b_heights = b_dets[:, 3] - b_dets[:, 1]
|
||||
b_areas = b_widths * b_heights
|
||||
|
||||
return a_areas / b_areas
|
||||
|
||||
|
||||
def clip_detections(border, detections):
|
||||
detections = detections.copy()
|
||||
|
||||
y0, y1, x0, x1 = border
|
||||
det_xs = detections[:, 0:4:2]
|
||||
det_ys = detections[:, 1:4:2]
|
||||
np.clip(det_xs, x0, x1 - 1, out=det_xs)
|
||||
np.clip(det_ys, y0, y1 - 1, out=det_ys)
|
||||
|
||||
keep_inds = ((det_xs[:, 1] - det_xs[:, 0]) > 0) & \
|
||||
((det_ys[:, 1] - det_ys[:, 0]) > 0)
|
||||
keep_inds = np.where(keep_inds)[0]
|
||||
return detections[keep_inds], keep_inds
|
||||
|
||||
|
||||
def crop_image_dets(image, dets, ind, input_size, output_size=None, random_crop=True, rand_center=True):
|
||||
if ind is not None:
|
||||
det_x0, det_y0, det_x1, det_y1 = dets[ind, 0:4]
|
||||
else:
|
||||
det_x0, det_y0, det_x1, det_y1 = None, None, None, None
|
||||
|
||||
input_height, input_width = input_size
|
||||
image_height, image_width = image.shape[0:2]
|
||||
|
||||
centered = rand_center and np.random.uniform() > 0.5
|
||||
if not random_crop or image_width <= input_width:
|
||||
xc = image_width // 2
|
||||
elif ind is None or not centered:
|
||||
xmin = max(det_x1 - input_width, 0) if ind is not None else 0
|
||||
xmax = min(image_width - input_width, det_x0) if ind is not None else image_width - input_width
|
||||
xrand = np.random.randint(int(xmin), int(xmax) + 1)
|
||||
xc = xrand + input_width // 2
|
||||
else:
|
||||
xmin = max((det_x0 + det_x1) // 2 - np.random.randint(0, 15), 0)
|
||||
xmax = min((det_x0 + det_x1) // 2 + np.random.randint(0, 15), image_width - 1)
|
||||
xc = np.random.randint(int(xmin), int(xmax) + 1)
|
||||
|
||||
if not random_crop or image_height <= input_height:
|
||||
yc = image_height // 2
|
||||
elif ind is None or not centered:
|
||||
ymin = max(det_y1 - input_height, 0) if ind is not None else 0
|
||||
ymax = min(image_height - input_height, det_y0) if ind is not None else image_height - input_height
|
||||
yrand = np.random.randint(int(ymin), int(ymax) + 1)
|
||||
yc = yrand + input_height // 2
|
||||
else:
|
||||
ymin = max((det_y0 + det_y1) // 2 - np.random.randint(0, 15), 0)
|
||||
ymax = min((det_y0 + det_y1) // 2 + np.random.randint(0, 15), image_height - 1)
|
||||
yc = np.random.randint(int(ymin), int(ymax) + 1)
|
||||
|
||||
image, border, offset = crop_image(image, [yc, xc], input_size, output_size=output_size)
|
||||
dets[:, 0:4:2] -= offset[1]
|
||||
dets[:, 1:4:2] -= offset[0]
|
||||
return image, dets, border
|
||||
|
||||
|
||||
def scale_image_detections(image, dets, scale):
|
||||
height, width = image.shape[0:2]
|
||||
|
||||
new_height = int(height * scale)
|
||||
new_width = int(width * scale)
|
||||
|
||||
image = cv2.resize(image, (new_width, new_height))
|
||||
dets = dets.copy()
|
||||
dets[:, 0:4] *= scale
|
||||
return image, dets
|
||||
|
||||
|
||||
def ref_scale(detections, random_crop=False):
|
||||
if detections.shape[0] == 0:
|
||||
return None, None
|
||||
|
||||
if random_crop and np.random.uniform() > 0.7:
|
||||
return None, None
|
||||
|
||||
ref_ind = np.random.randint(detections.shape[0])
|
||||
ref_det = detections[ref_ind].copy()
|
||||
ref_h = ref_det[3] - ref_det[1]
|
||||
ref_w = ref_det[2] - ref_det[0]
|
||||
ref_hw = max(ref_h, ref_w)
|
||||
|
||||
if ref_hw > 96:
|
||||
return np.random.randint(low=96, high=255) / ref_hw, ref_ind
|
||||
elif ref_hw > 32:
|
||||
return np.random.randint(low=32, high=97) / ref_hw, ref_ind
|
||||
return np.random.randint(low=16, high=33) / ref_hw, ref_ind
|
||||
|
||||
|
||||
def create_attention_mask(atts, ratios, sizes, detections):
|
||||
for det in detections:
|
||||
width = det[2] - det[0]
|
||||
height = det[3] - det[1]
|
||||
|
||||
max_hw = max(width, height)
|
||||
for att, ratio, size in zip(atts, ratios, sizes):
|
||||
if max_hw >= size[0] and max_hw <= size[1]:
|
||||
x = (det[0] + det[2]) / 2
|
||||
y = (det[1] + det[3]) / 2
|
||||
x = (x / ratio).astype(np.int32)
|
||||
y = (y / ratio).astype(np.int32)
|
||||
att[y, x] = 1
|
||||
|
||||
|
||||
def cornernet_saccade(system_configs, db, k_ind, data_aug, debug):
|
||||
data_rng = system_configs.data_rng
|
||||
batch_size = system_configs.batch_size
|
||||
|
||||
categories = db.configs["categories"]
|
||||
input_size = db.configs["input_size"]
|
||||
output_size = db.configs["output_sizes"][0]
|
||||
rand_scales = db.configs["rand_scales"]
|
||||
rand_crop = db.configs["rand_crop"]
|
||||
rand_center = db.configs["rand_center"]
|
||||
view_sizes = db.configs["view_sizes"]
|
||||
|
||||
gaussian_iou = db.configs["gaussian_iou"]
|
||||
gaussian_rad = db.configs["gaussian_radius"]
|
||||
|
||||
att_ratios = db.configs["att_ratios"]
|
||||
att_ranges = db.configs["att_ranges"]
|
||||
att_sizes = db.configs["att_sizes"]
|
||||
|
||||
min_scale = db.configs["min_scale"]
|
||||
max_scale = db.configs["max_scale"]
|
||||
max_objects = 128
|
||||
|
||||
images = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32)
|
||||
tl_heats = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32)
|
||||
br_heats = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32)
|
||||
tl_valids = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32)
|
||||
br_valids = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32)
|
||||
tl_regrs = np.zeros((batch_size, max_objects, 2), dtype=np.float32)
|
||||
br_regrs = np.zeros((batch_size, max_objects, 2), dtype=np.float32)
|
||||
tl_tags = np.zeros((batch_size, max_objects), dtype=np.int64)
|
||||
br_tags = np.zeros((batch_size, max_objects), dtype=np.int64)
|
||||
tag_masks = np.zeros((batch_size, max_objects), dtype=np.uint8)
|
||||
tag_lens = np.zeros((batch_size,), dtype=np.int32)
|
||||
attentions = [np.zeros((batch_size, 1, att_size[0], att_size[1]), dtype=np.float32) for att_size in att_sizes]
|
||||
|
||||
db_size = db.db_inds.size
|
||||
for b_ind in range(batch_size):
|
||||
if not debug and k_ind == 0:
|
||||
# if k_ind == 0:
|
||||
db.shuffle_inds()
|
||||
|
||||
db_ind = db.db_inds[k_ind]
|
||||
k_ind = (k_ind + 1) % db_size
|
||||
|
||||
image_path = db.image_path(db_ind)
|
||||
image = cv2.imread(image_path)
|
||||
|
||||
orig_detections = db.detections(db_ind)
|
||||
keep_inds = np.arange(orig_detections.shape[0])
|
||||
|
||||
# clip the detections
|
||||
detections = orig_detections.copy()
|
||||
border = [0, image.shape[0], 0, image.shape[1]]
|
||||
detections, clip_inds = clip_detections(border, detections)
|
||||
keep_inds = keep_inds[clip_inds]
|
||||
|
||||
scale, ref_ind = ref_scale(detections, random_crop=rand_crop)
|
||||
scale = np.random.choice(rand_scales) if scale is None else scale
|
||||
|
||||
orig_detections[:, 0:4:2] *= scale
|
||||
orig_detections[:, 1:4:2] *= scale
|
||||
|
||||
image, detections = scale_image_detections(image, detections, scale)
|
||||
ref_detection = detections[ref_ind].copy()
|
||||
|
||||
image, detections, border = crop_image_dets(image, detections, ref_ind, input_size, rand_center=rand_center)
|
||||
|
||||
detections, clip_inds = clip_detections(border, detections)
|
||||
keep_inds = keep_inds[clip_inds]
|
||||
|
||||
width_ratio = output_size[1] / input_size[1]
|
||||
height_ratio = output_size[0] / input_size[0]
|
||||
|
||||
# flipping an image randomly
|
||||
if not debug and np.random.uniform() > 0.5:
|
||||
image[:] = image[:, ::-1, :]
|
||||
width = image.shape[1]
|
||||
detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1
|
||||
create_attention_mask([att[b_ind, 0] for att in attentions], att_ratios, att_ranges, detections)
|
||||
|
||||
if debug:
|
||||
dimage = image.copy()
|
||||
for det in detections.astype(np.int32):
|
||||
cv2.rectangle(dimage,
|
||||
(det[0], det[1]),
|
||||
(det[2], det[3]),
|
||||
(0, 255, 0), 2
|
||||
)
|
||||
cv2.imwrite('debug/{:03d}.jpg'.format(b_ind), dimage)
|
||||
overlaps = bbox_overlaps(detections, orig_detections[keep_inds]) > 0.5
|
||||
|
||||
if not debug:
|
||||
image = image.astype(np.float32) / 255.
|
||||
color_jittering_(data_rng, image)
|
||||
lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec)
|
||||
normalize_(image, db.mean, db.std)
|
||||
images[b_ind] = image.transpose((2, 0, 1))
|
||||
|
||||
for ind, (detection, overlap) in enumerate(zip(detections, overlaps)):
|
||||
category = int(detection[-1]) - 1
|
||||
|
||||
xtl, ytl = detection[0], detection[1]
|
||||
xbr, ybr = detection[2], detection[3]
|
||||
|
||||
det_height = int(ybr) - int(ytl)
|
||||
det_width = int(xbr) - int(xtl)
|
||||
det_max = max(det_height, det_width)
|
||||
|
||||
valid = det_max >= min_scale
|
||||
|
||||
fxtl = (xtl * width_ratio)
|
||||
fytl = (ytl * height_ratio)
|
||||
fxbr = (xbr * width_ratio)
|
||||
fybr = (ybr * height_ratio)
|
||||
|
||||
xtl = int(fxtl)
|
||||
ytl = int(fytl)
|
||||
xbr = int(fxbr)
|
||||
ybr = int(fybr)
|
||||
|
||||
width = detection[2] - detection[0]
|
||||
height = detection[3] - detection[1]
|
||||
|
||||
width = math.ceil(width * width_ratio)
|
||||
height = math.ceil(height * height_ratio)
|
||||
|
||||
if gaussian_rad == -1:
|
||||
radius = gaussian_radius((height, width), gaussian_iou)
|
||||
radius = max(0, int(radius))
|
||||
else:
|
||||
radius = gaussian_rad
|
||||
|
||||
if overlap and valid:
|
||||
draw_gaussian(tl_heats[b_ind, category], [xtl, ytl], radius)
|
||||
draw_gaussian(br_heats[b_ind, category], [xbr, ybr], radius)
|
||||
|
||||
tag_ind = tag_lens[b_ind]
|
||||
tl_regrs[b_ind, tag_ind, :] = [fxtl - xtl, fytl - ytl]
|
||||
br_regrs[b_ind, tag_ind, :] = [fxbr - xbr, fybr - ybr]
|
||||
tl_tags[b_ind, tag_ind] = ytl * output_size[1] + xtl
|
||||
br_tags[b_ind, tag_ind] = ybr * output_size[1] + xbr
|
||||
tag_lens[b_ind] += 1
|
||||
else:
|
||||
draw_gaussian(tl_valids[b_ind, category], [xtl, ytl], radius)
|
||||
draw_gaussian(br_valids[b_ind, category], [xbr, ybr], radius)
|
||||
|
||||
tl_valids = (tl_valids == 0).astype(np.float32)
|
||||
br_valids = (br_valids == 0).astype(np.float32)
|
||||
|
||||
for b_ind in range(batch_size):
|
||||
tag_len = tag_lens[b_ind]
|
||||
tag_masks[b_ind, :tag_len] = 1
|
||||
|
||||
images = torch.from_numpy(images)
|
||||
tl_heats = torch.from_numpy(tl_heats)
|
||||
br_heats = torch.from_numpy(br_heats)
|
||||
tl_regrs = torch.from_numpy(tl_regrs)
|
||||
br_regrs = torch.from_numpy(br_regrs)
|
||||
tl_tags = torch.from_numpy(tl_tags)
|
||||
br_tags = torch.from_numpy(br_tags)
|
||||
tag_masks = torch.from_numpy(tag_masks)
|
||||
tl_valids = torch.from_numpy(tl_valids)
|
||||
br_valids = torch.from_numpy(br_valids)
|
||||
attentions = [torch.from_numpy(att) for att in attentions]
|
||||
|
||||
return {
|
||||
"xs": [images],
|
||||
"ys": [tl_heats, br_heats, tag_masks, tl_regrs, br_regrs, tl_tags, br_tags, tl_valids, br_valids, attentions]
|
||||
}, k_ind
|
||||
178
object_detection/core/sample/utils.py
Normal file
178
object_detection/core/sample/utils.py
Normal file
@@ -0,0 +1,178 @@
|
||||
import random
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
|
||||
def grayscale(image):
|
||||
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
|
||||
def normalize_(image, mean, std):
|
||||
image -= mean
|
||||
image /= std
|
||||
|
||||
|
||||
def lighting_(data_rng, image, alphastd, eigval, eigvec):
|
||||
alpha = data_rng.normal(scale=alphastd, size=(3,))
|
||||
image += np.dot(eigvec, eigval * alpha)
|
||||
|
||||
|
||||
def blend_(alpha, image1, image2):
|
||||
image1 *= alpha
|
||||
image2 *= (1 - alpha)
|
||||
image1 += image2
|
||||
|
||||
|
||||
def saturation_(data_rng, image, gs, gs_mean, var):
|
||||
alpha = 1. + data_rng.uniform(low=-var, high=var)
|
||||
blend_(alpha, image, gs[:, :, None])
|
||||
|
||||
|
||||
def brightness_(data_rng, image, gs, gs_mean, var):
|
||||
alpha = 1. + data_rng.uniform(low=-var, high=var)
|
||||
image *= alpha
|
||||
|
||||
|
||||
def contrast_(data_rng, image, gs, gs_mean, var):
|
||||
alpha = 1. + data_rng.uniform(low=-var, high=var)
|
||||
blend_(alpha, image, gs_mean)
|
||||
|
||||
|
||||
def color_jittering_(data_rng, image):
|
||||
functions = [brightness_, contrast_, saturation_]
|
||||
random.shuffle(functions)
|
||||
|
||||
gs = grayscale(image)
|
||||
gs_mean = gs.mean()
|
||||
for f in functions:
|
||||
f(data_rng, image, gs, gs_mean, 0.4)
|
||||
|
||||
|
||||
def gaussian2D(shape, sigma=1):
|
||||
m, n = [(ss - 1.) / 2. for ss in shape]
|
||||
y, x = np.ogrid[-m:m + 1, -n:n + 1]
|
||||
|
||||
h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
|
||||
h[h < np.finfo(h.dtype).eps * h.max()] = 0
|
||||
return h
|
||||
|
||||
|
||||
def draw_gaussian(heatmap, center, radius, k=1):
|
||||
diameter = 2 * radius + 1
|
||||
gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
|
||||
|
||||
x, y = center
|
||||
|
||||
height, width = heatmap.shape[0:2]
|
||||
|
||||
left, right = min(x, radius), min(width - x, radius + 1)
|
||||
top, bottom = min(y, radius), min(height - y, radius + 1)
|
||||
|
||||
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
|
||||
masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right]
|
||||
np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
|
||||
|
||||
|
||||
def gaussian_radius(det_size, min_overlap):
|
||||
height, width = det_size
|
||||
|
||||
a1 = 1
|
||||
b1 = (height + width)
|
||||
c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
|
||||
sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1)
|
||||
r1 = (b1 - sq1) / (2 * a1)
|
||||
|
||||
a2 = 4
|
||||
b2 = 2 * (height + width)
|
||||
c2 = (1 - min_overlap) * width * height
|
||||
sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2)
|
||||
r2 = (b2 - sq2) / (2 * a2)
|
||||
|
||||
a3 = 4 * min_overlap
|
||||
b3 = -2 * min_overlap * (height + width)
|
||||
c3 = (min_overlap - 1) * width * height
|
||||
sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3)
|
||||
r3 = (b3 + sq3) / (2 * a3)
|
||||
return min(r1, r2, r3)
|
||||
|
||||
|
||||
def _get_border(border, size):
|
||||
i = 1
|
||||
while size - border // i <= border // i:
|
||||
i *= 2
|
||||
return border // i
|
||||
|
||||
|
||||
def random_crop(image, detections, random_scales, view_size, border=64):
|
||||
view_height, view_width = view_size
|
||||
image_height, image_width = image.shape[0:2]
|
||||
|
||||
scale = np.random.choice(random_scales)
|
||||
height = int(view_height * scale)
|
||||
width = int(view_width * scale)
|
||||
|
||||
cropped_image = np.zeros((height, width, 3), dtype=image.dtype)
|
||||
|
||||
w_border = _get_border(border, image_width)
|
||||
h_border = _get_border(border, image_height)
|
||||
|
||||
ctx = np.random.randint(low=w_border, high=image_width - w_border)
|
||||
cty = np.random.randint(low=h_border, high=image_height - h_border)
|
||||
|
||||
x0, x1 = max(ctx - width // 2, 0), min(ctx + width // 2, image_width)
|
||||
y0, y1 = max(cty - height // 2, 0), min(cty + height // 2, image_height)
|
||||
|
||||
left_w, right_w = ctx - x0, x1 - ctx
|
||||
top_h, bottom_h = cty - y0, y1 - cty
|
||||
|
||||
# crop image
|
||||
cropped_ctx, cropped_cty = width // 2, height // 2
|
||||
x_slice = slice(cropped_ctx - left_w, cropped_ctx + right_w)
|
||||
y_slice = slice(cropped_cty - top_h, cropped_cty + bottom_h)
|
||||
cropped_image[y_slice, x_slice, :] = image[y0:y1, x0:x1, :]
|
||||
|
||||
# crop detections
|
||||
cropped_detections = detections.copy()
|
||||
cropped_detections[:, 0:4:2] -= x0
|
||||
cropped_detections[:, 1:4:2] -= y0
|
||||
cropped_detections[:, 0:4:2] += cropped_ctx - left_w
|
||||
cropped_detections[:, 1:4:2] += cropped_cty - top_h
|
||||
|
||||
return cropped_image, cropped_detections
|
||||
|
||||
|
||||
def crop_image(image, center, size, output_size=None):
|
||||
if output_size == None:
|
||||
output_size = size
|
||||
|
||||
cty, ctx = center
|
||||
height, width = size
|
||||
o_height, o_width = output_size
|
||||
im_height, im_width = image.shape[0:2]
|
||||
cropped_image = np.zeros((o_height, o_width, 3), dtype=image.dtype)
|
||||
|
||||
x0, x1 = max(0, ctx - width // 2), min(ctx + width // 2, im_width)
|
||||
y0, y1 = max(0, cty - height // 2), min(cty + height // 2, im_height)
|
||||
|
||||
left, right = ctx - x0, x1 - ctx
|
||||
top, bottom = cty - y0, y1 - cty
|
||||
|
||||
cropped_cty, cropped_ctx = o_height // 2, o_width // 2
|
||||
y_slice = slice(cropped_cty - top, cropped_cty + bottom)
|
||||
x_slice = slice(cropped_ctx - left, cropped_ctx + right)
|
||||
cropped_image[y_slice, x_slice, :] = image[y0:y1, x0:x1, :]
|
||||
|
||||
border = np.array([
|
||||
cropped_cty - top,
|
||||
cropped_cty + bottom,
|
||||
cropped_ctx - left,
|
||||
cropped_ctx + right
|
||||
], dtype=np.float32)
|
||||
|
||||
offset = np.array([
|
||||
cty - o_height // 2,
|
||||
ctx - o_width // 2
|
||||
])
|
||||
|
||||
return cropped_image, border, offset
|
||||
Reference in New Issue
Block a user