更换文档检测模型

This commit is contained in:
2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions

View File

@@ -0,0 +1,21 @@
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import source
from . import transform
from . import reader
from .source import *
from .transform import *
from .reader import *

View File

@@ -0,0 +1,13 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

View File

@@ -0,0 +1,580 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import math
import random
import numpy as np
from copy import deepcopy
from typing import List, Tuple
from collections import defaultdict
from .chip_box_utils import nms, transform_chip_boxes2image_boxes
from .chip_box_utils import find_chips_to_cover_overlaped_boxes
from .chip_box_utils import transform_chip_box
from .chip_box_utils import intersection_over_box
class AnnoCropper(object):
def __init__(self,
image_target_sizes: List[int],
valid_box_ratio_ranges: List[List[float]],
chip_target_size: int,
chip_target_stride: int,
use_neg_chip: bool=False,
max_neg_num_per_im: int=8,
max_per_img: int=-1,
nms_thresh: int=0.5):
"""
Generate chips by chip_target_size and chip_target_stride.
These two parameters just like kernel_size and stride in cnn.
Each image has its raw size. After resizing, then get its target size.
The resizing scale = target_size / raw_size.
So are chips of the image.
box_ratio = box_raw_size / image_raw_size = box_target_size / image_target_size
The 'size' above mentioned is the size of long-side of image, box or chip.
:param image_target_sizes: [2000, 1000]
:param valid_box_ratio_ranges: [[-1, 0.1],[0.08, -1]]
:param chip_target_size: 500
:param chip_target_stride: 200
"""
self.target_sizes = image_target_sizes
self.valid_box_ratio_ranges = valid_box_ratio_ranges
assert len(self.target_sizes) == len(self.valid_box_ratio_ranges)
self.scale_num = len(self.target_sizes)
self.chip_target_size = chip_target_size # is target size
self.chip_target_stride = chip_target_stride # is target stride
self.use_neg_chip = use_neg_chip
self.max_neg_num_per_im = max_neg_num_per_im
self.max_per_img = max_per_img
self.nms_thresh = nms_thresh
def crop_anno_records(self, records: List[dict]):
"""
The main logic:
# foreach record(image):
# foreach scale:
# 1 generate chips by chip size and stride for each scale
# 2 get pos chips
# - validate boxes: current scale; h,w >= 1
# - find pos chips greedily by valid gt boxes in each scale
# - for every valid gt box, find its corresponding pos chips in each scale
# 3 get neg chips
# - If given proposals, find neg boxes in them which are not in pos chips
# - If got neg boxes in last step, we find neg chips and assign neg boxes to neg chips such as 2.
# 4 sample neg chips if too much each image
# transform this image-scale annotations to chips(pos chips&neg chips) annotations
:param records, standard coco_record but with extra key `proposals`(Px4), which are predicted by stage1
model and maybe have neg boxes in them.
:return: new_records, list of dict like
{
'im_file': 'fake_image1.jpg',
'im_id': np.array([1]), # new _global_chip_id as im_id
'h': h, # chip height
'w': w, # chip width
'is_crowd': is_crowd, # Nx1 -> Mx1
'gt_class': gt_class, # Nx1 -> Mx1
'gt_bbox': gt_bbox, # Nx4 -> Mx4, 4 represents [x1,y1,x2,y2]
'gt_poly': gt_poly, # [None]xN -> [None]xM
'chip': [x1, y1, x2, y2] # added
}
Attention:
------------------------------>x
|
| (x1,y1)------
| | |
| | |
| | |
| | |
| | |
| ----------
| (x2,y2)
|
y
If we use [x1, y1, x2, y2] to represent boxes or chips,
(x1,y1) is the left-top point which is in the box,
but (x2,y2) is the right-bottom point which is not in the box.
So x1 in [0, w-1], x2 in [1, w], y1 in [0, h-1], y2 in [1,h].
And you can use x2-x1 to get width, and you can use image[y1:y2, x1:x2] to get the box area.
"""
self.chip_records = []
self._global_chip_id = 1
for r in records:
self._cur_im_pos_chips = [
] # element: (chip, boxes_idx), chip is [x1, y1, x2, y2], boxes_ids is List[int]
self._cur_im_neg_chips = [] # element: (chip, neg_box_num)
for scale_i in range(self.scale_num):
self._get_current_scale_parameters(scale_i, r)
# Cx4
chips = self._create_chips(r['h'], r['w'], self._cur_scale)
# # dict: chipid->[box_id, ...]
pos_chip2boxes_idx = self._get_valid_boxes_and_pos_chips(
r['gt_bbox'], chips)
# dict: chipid->neg_box_num
neg_chip2box_num = self._get_neg_boxes_and_chips(
chips,
list(pos_chip2boxes_idx.keys()), r.get('proposals', None))
self._add_to_cur_im_chips(chips, pos_chip2boxes_idx,
neg_chip2box_num)
cur_image_records = self._trans_all_chips2annotations(r)
self.chip_records.extend(cur_image_records)
return self.chip_records
def _add_to_cur_im_chips(self, chips, pos_chip2boxes_idx, neg_chip2box_num):
for pos_chipid, boxes_idx in pos_chip2boxes_idx.items():
chip = np.array(chips[pos_chipid]) # copy chips slice
self._cur_im_pos_chips.append((chip, boxes_idx))
if neg_chip2box_num is None:
return
for neg_chipid, neg_box_num in neg_chip2box_num.items():
chip = np.array(chips[neg_chipid])
self._cur_im_neg_chips.append((chip, neg_box_num))
def _trans_all_chips2annotations(self, r):
gt_bbox = r['gt_bbox']
im_file = r['im_file']
is_crowd = r['is_crowd']
gt_class = r['gt_class']
# gt_poly = r['gt_poly'] # [None]xN
# remaining keys: im_id, h, w
chip_records = self._trans_pos_chips2annotations(im_file, gt_bbox,
is_crowd, gt_class)
if not self.use_neg_chip:
return chip_records
sampled_neg_chips = self._sample_neg_chips()
neg_chip_records = self._trans_neg_chips2annotations(im_file,
sampled_neg_chips)
chip_records.extend(neg_chip_records)
return chip_records
def _trans_pos_chips2annotations(self, im_file, gt_bbox, is_crowd,
gt_class):
chip_records = []
for chip, boxes_idx in self._cur_im_pos_chips:
chip_bbox, final_boxes_idx = transform_chip_box(gt_bbox, boxes_idx,
chip)
x1, y1, x2, y2 = chip
chip_h = y2 - y1
chip_w = x2 - x1
rec = {
'im_file': im_file,
'im_id': np.array([self._global_chip_id]),
'h': chip_h,
'w': chip_w,
'gt_bbox': chip_bbox,
'is_crowd': is_crowd[final_boxes_idx].copy(),
'gt_class': gt_class[final_boxes_idx].copy(),
# 'gt_poly': [None] * len(final_boxes_idx),
'chip': chip
}
self._global_chip_id += 1
chip_records.append(rec)
return chip_records
def _sample_neg_chips(self):
pos_num = len(self._cur_im_pos_chips)
neg_num = len(self._cur_im_neg_chips)
sample_num = min(pos_num + 2, self.max_neg_num_per_im)
assert sample_num >= 1
if neg_num <= sample_num:
return self._cur_im_neg_chips
candidate_num = int(sample_num * 1.5)
candidate_neg_chips = sorted(
self._cur_im_neg_chips, key=lambda x: -x[1])[:candidate_num]
random.shuffle(candidate_neg_chips)
sampled_neg_chips = candidate_neg_chips[:sample_num]
return sampled_neg_chips
def _trans_neg_chips2annotations(self,
im_file: str,
sampled_neg_chips: List[Tuple]):
chip_records = []
for chip, neg_box_num in sampled_neg_chips:
x1, y1, x2, y2 = chip
chip_h = y2 - y1
chip_w = x2 - x1
rec = {
'im_file': im_file,
'im_id': np.array([self._global_chip_id]),
'h': chip_h,
'w': chip_w,
'gt_bbox': np.zeros(
(0, 4), dtype=np.float32),
'is_crowd': np.zeros(
(0, 1), dtype=np.int32),
'gt_class': np.zeros(
(0, 1), dtype=np.int32),
# 'gt_poly': [],
'chip': chip
}
self._global_chip_id += 1
chip_records.append(rec)
return chip_records
def _get_current_scale_parameters(self, scale_i, r):
im_size = max(r['h'], r['w'])
im_target_size = self.target_sizes[scale_i]
self._cur_im_size, self._cur_im_target_size = im_size, im_target_size
self._cur_scale = self._get_current_scale(im_target_size, im_size)
self._cur_valid_ratio_range = self.valid_box_ratio_ranges[scale_i]
def _get_current_scale(self, im_target_size, im_size):
return im_target_size / im_size
def _create_chips(self, h: int, w: int, scale: float):
"""
Generate chips by chip_target_size and chip_target_stride.
These two parameters just like kernel_size and stride in cnn.
:return: chips, Cx4, xy in raw size dimension
"""
chip_size = self.chip_target_size # omit target for simplicity
stride = self.chip_target_stride
width = int(scale * w)
height = int(scale * h)
min_chip_location_diff = 20 # in target size
assert chip_size >= stride
chip_overlap = chip_size - stride
if (width - chip_overlap
) % stride > min_chip_location_diff: # 不能被stride整除的部分比较大则保留
w_steps = max(1, int(math.ceil((width - chip_overlap) / stride)))
else: # 不能被stride整除的部分比较小则丢弃
w_steps = max(1, int(math.floor((width - chip_overlap) / stride)))
if (height - chip_overlap) % stride > min_chip_location_diff:
h_steps = max(1, int(math.ceil((height - chip_overlap) / stride)))
else:
h_steps = max(1, int(math.floor((height - chip_overlap) / stride)))
chips = list()
for j in range(h_steps):
for i in range(w_steps):
x1 = i * stride
y1 = j * stride
x2 = min(x1 + chip_size, width)
y2 = min(y1 + chip_size, height)
chips.append([x1, y1, x2, y2])
# check chip size
for item in chips:
if item[2] - item[0] > chip_size * 1.1 or item[3] - item[
1] > chip_size * 1.1:
raise ValueError(item)
chips = np.array(chips, dtype=np.float32)
raw_size_chips = chips / scale
return raw_size_chips
def _get_valid_boxes_and_pos_chips(self, gt_bbox, chips):
valid_ratio_range = self._cur_valid_ratio_range
im_size = self._cur_im_size
scale = self._cur_scale
# Nx4 N
valid_boxes, valid_boxes_idx = self._validate_boxes(
valid_ratio_range, im_size, gt_bbox, scale)
# dict: chipid->[box_id, ...]
pos_chip2boxes_idx = self._find_pos_chips(chips, valid_boxes,
valid_boxes_idx)
return pos_chip2boxes_idx
def _validate_boxes(self,
valid_ratio_range: List[float],
im_size: int,
gt_boxes: 'np.array of Nx4',
scale: float):
"""
:return: valid_boxes: Nx4, valid_boxes_idx: N
"""
ws = (gt_boxes[:, 2] - gt_boxes[:, 0]).astype(np.int32)
hs = (gt_boxes[:, 3] - gt_boxes[:, 1]).astype(np.int32)
maxs = np.maximum(ws, hs)
box_ratio = maxs / im_size
mins = np.minimum(ws, hs)
target_mins = mins * scale
low = valid_ratio_range[0] if valid_ratio_range[0] > 0 else 0
high = valid_ratio_range[1] if valid_ratio_range[1] > 0 else np.finfo(
np.float32).max
valid_boxes_idx = np.nonzero((low <= box_ratio) & (box_ratio < high) & (
target_mins >= 2))[0]
valid_boxes = gt_boxes[valid_boxes_idx]
return valid_boxes, valid_boxes_idx
def _find_pos_chips(self,
chips: 'Cx4',
valid_boxes: 'Bx4',
valid_boxes_idx: 'B'):
"""
:return: pos_chip2boxes_idx, dict: chipid->[box_id, ...]
"""
iob = intersection_over_box(chips, valid_boxes) # overlap, CxB
iob_threshold_to_find_chips = 1.
pos_chip_ids, _ = self._find_chips_to_cover_overlaped_boxes(
iob, iob_threshold_to_find_chips)
pos_chip_ids = set(pos_chip_ids)
iob_threshold_to_assign_box = 0.5
pos_chip2boxes_idx = self._assign_boxes_to_pos_chips(
iob, iob_threshold_to_assign_box, pos_chip_ids, valid_boxes_idx)
return pos_chip2boxes_idx
def _find_chips_to_cover_overlaped_boxes(self, iob, overlap_threshold):
return find_chips_to_cover_overlaped_boxes(iob, overlap_threshold)
def _assign_boxes_to_pos_chips(self, iob, overlap_threshold, pos_chip_ids,
valid_boxes_idx):
chip_ids, box_ids = np.nonzero(iob >= overlap_threshold)
pos_chip2boxes_idx = defaultdict(list)
for chip_id, box_id in zip(chip_ids, box_ids):
if chip_id not in pos_chip_ids:
continue
raw_gt_box_idx = valid_boxes_idx[box_id]
pos_chip2boxes_idx[chip_id].append(raw_gt_box_idx)
return pos_chip2boxes_idx
def _get_neg_boxes_and_chips(self,
chips: 'Cx4',
pos_chip_ids: 'D',
proposals: 'Px4'):
"""
:param chips:
:param pos_chip_ids:
:param proposals:
:return: neg_chip2box_num, None or dict: chipid->neg_box_num
"""
if not self.use_neg_chip:
return None
# train proposals maybe None
if proposals is None or len(proposals) < 1:
return None
valid_ratio_range = self._cur_valid_ratio_range
im_size = self._cur_im_size
scale = self._cur_scale
valid_props, _ = self._validate_boxes(valid_ratio_range, im_size,
proposals, scale)
neg_boxes = self._find_neg_boxes(chips, pos_chip_ids, valid_props)
neg_chip2box_num = self._find_neg_chips(chips, pos_chip_ids, neg_boxes)
return neg_chip2box_num
def _find_neg_boxes(self,
chips: 'Cx4',
pos_chip_ids: 'D',
valid_props: 'Px4'):
"""
:return: neg_boxes: Nx4
"""
if len(pos_chip_ids) == 0:
return valid_props
pos_chips = chips[pos_chip_ids]
iob = intersection_over_box(pos_chips, valid_props)
overlap_per_prop = np.max(iob, axis=0)
non_overlap_props_idx = overlap_per_prop < 0.5
neg_boxes = valid_props[non_overlap_props_idx]
return neg_boxes
def _find_neg_chips(self, chips: 'Cx4', pos_chip_ids: 'D',
neg_boxes: 'Nx4'):
"""
:return: neg_chip2box_num, dict: chipid->neg_box_num
"""
neg_chip_ids = np.setdiff1d(np.arange(len(chips)), pos_chip_ids)
neg_chips = chips[neg_chip_ids]
iob = intersection_over_box(neg_chips, neg_boxes)
iob_threshold_to_find_chips = 0.7
chosen_neg_chip_ids, chip_id2overlap_box_num = \
self._find_chips_to_cover_overlaped_boxes(iob, iob_threshold_to_find_chips)
neg_chipid2box_num = {}
for cid in chosen_neg_chip_ids:
box_num = chip_id2overlap_box_num[cid]
raw_chip_id = neg_chip_ids[cid]
neg_chipid2box_num[raw_chip_id] = box_num
return neg_chipid2box_num
def crop_infer_anno_records(self, records: List[dict]):
"""
transform image record to chips record
:param records:
:return: new_records, list of dict like
{
'im_file': 'fake_image1.jpg',
'im_id': np.array([1]), # new _global_chip_id as im_id
'h': h, # chip height
'w': w, # chip width
'chip': [x1, y1, x2, y2] # added
'ori_im_h': ori_im_h # added, origin image height
'ori_im_w': ori_im_w # added, origin image width
'scale_i': 0 # added,
}
"""
self.chip_records = []
self._global_chip_id = 1 # im_id start from 1
self._global_chip_id2img_id = {}
for r in records:
for scale_i in range(self.scale_num):
self._get_current_scale_parameters(scale_i, r)
# Cx4
chips = self._create_chips(r['h'], r['w'], self._cur_scale)
cur_img_chip_record = self._get_chips_records(r, chips, scale_i)
self.chip_records.extend(cur_img_chip_record)
return self.chip_records
def _get_chips_records(self, rec, chips, scale_i):
cur_img_chip_records = []
ori_im_h = rec["h"]
ori_im_w = rec["w"]
im_file = rec["im_file"]
ori_im_id = rec["im_id"]
for id, chip in enumerate(chips):
chip_rec = {}
x1, y1, x2, y2 = chip
chip_h = y2 - y1
chip_w = x2 - x1
chip_rec["im_file"] = im_file
chip_rec["im_id"] = self._global_chip_id
chip_rec["h"] = chip_h
chip_rec["w"] = chip_w
chip_rec["chip"] = chip
chip_rec["ori_im_h"] = ori_im_h
chip_rec["ori_im_w"] = ori_im_w
chip_rec["scale_i"] = scale_i
self._global_chip_id2img_id[self._global_chip_id] = int(ori_im_id)
self._global_chip_id += 1
cur_img_chip_records.append(chip_rec)
return cur_img_chip_records
def aggregate_chips_detections(self, results, records=None):
"""
# 1. transform chip dets to image dets
# 2. nms boxes per image;
# 3. format output results
:param results:
:param roidb:
:return:
"""
results = deepcopy(results)
records = records if records else self.chip_records
img_id2bbox = self._transform_chip2image_bboxes(results, records)
nms_img_id2bbox = self._nms_dets(img_id2bbox)
aggregate_results = self._reformat_results(nms_img_id2bbox)
return aggregate_results
def _transform_chip2image_bboxes(self, results, records):
# 1. Transform chip dets to image dets;
# 2. Filter valid range;
# 3. Reformat and Aggregate chip dets to Get scale_cls_dets
img_id2bbox = defaultdict(list)
for result in results:
bbox_locs = result['bbox']
bbox_nums = result['bbox_num']
if len(bbox_locs) == 1 and bbox_locs[0][
0] == -1: # current batch has no detections
# bbox_locs = array([[-1.]], dtype=float32); bbox_nums = [[1]]
# MultiClassNMS output: If there is no detected boxes for all images, lod will be set to {1} and Out only contains one value which is -1.
continue
im_ids = result['im_id'] # replace with range(len(bbox_nums))
last_bbox_num = 0
for idx, im_id in enumerate(im_ids):
cur_bbox_len = bbox_nums[idx]
bboxes = bbox_locs[last_bbox_num:last_bbox_num + cur_bbox_len]
last_bbox_num += cur_bbox_len
# box: [num_id, score, xmin, ymin, xmax, ymax]
if len(bboxes) == 0: # current image has no detections
continue
chip_rec = records[int(im_id) -
1] # im_id starts from 1, type is np.int64
image_size = max(chip_rec["ori_im_h"], chip_rec["ori_im_w"])
bboxes = transform_chip_boxes2image_boxes(
bboxes, chip_rec["chip"], chip_rec["ori_im_h"],
chip_rec["ori_im_w"])
scale_i = chip_rec["scale_i"]
cur_scale = self._get_current_scale(self.target_sizes[scale_i],
image_size)
_, valid_boxes_idx = self._validate_boxes(
self.valid_box_ratio_ranges[scale_i], image_size,
bboxes[:, 2:], cur_scale)
ori_img_id = self._global_chip_id2img_id[int(im_id)]
img_id2bbox[ori_img_id].append(bboxes[valid_boxes_idx])
return img_id2bbox
def _nms_dets(self, img_id2bbox):
# 1. NMS on each image-class
# 2. Limit number of detections to MAX_PER_IMAGE if requested
max_per_img = self.max_per_img
nms_thresh = self.nms_thresh
for img_id in img_id2bbox:
box = img_id2bbox[
img_id] # list of np.array of shape [N, 6], 6 is [label, score, x1, y1, x2, y2]
box = np.concatenate(box, axis=0)
nms_dets = nms(box, nms_thresh)
if max_per_img > 0:
if len(nms_dets) > max_per_img:
keep = np.argsort(-nms_dets[:, 1])[:max_per_img]
nms_dets = nms_dets[keep]
img_id2bbox[img_id] = nms_dets
return img_id2bbox
def _reformat_results(self, img_id2bbox):
"""reformat results"""
im_ids = img_id2bbox.keys()
results = []
for img_id in im_ids: # output by original im_id order
if len(img_id2bbox[img_id]) == 0:
bbox = np.array(
[[-1., 0., 0., 0., 0., 0.]]) # edge case: no detections
bbox_num = np.array([0])
else:
# np.array of shape [N, 6], 6 is [label, score, x1, y1, x2, y2]
bbox = img_id2bbox[img_id]
bbox_num = np.array([len(bbox)])
res = dict(im_id=np.array([[img_id]]), bbox=bbox, bbox_num=bbox_num)
results.append(res)
return results

View File

@@ -0,0 +1,170 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
def bbox_area(boxes):
return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
def intersection_over_box(chips, boxes):
"""
intersection area over box area
:param chips: C
:param boxes: B
:return: iob, CxB
"""
M = chips.shape[0]
N = boxes.shape[0]
if M * N == 0:
return np.zeros([M, N], dtype='float32')
box_area = bbox_area(boxes) # B
inter_x2y2 = np.minimum(np.expand_dims(chips, 1)[:, :, 2:],
boxes[:, 2:]) # CxBX2
inter_x1y1 = np.maximum(np.expand_dims(chips, 1)[:, :, :2],
boxes[:, :2]) # CxBx2
inter_wh = inter_x2y2 - inter_x1y1
inter_wh = np.clip(inter_wh, a_min=0, a_max=None)
inter_area = inter_wh[:, :, 0] * inter_wh[:, :, 1] # CxB
iob = inter_area / np.expand_dims(box_area, 0)
return iob
def clip_boxes(boxes, im_shape):
"""
Clip boxes to image boundaries.
:param boxes: [N, 4]
:param im_shape: tuple of 2, [h, w]
:return: [N, 4]
"""
# x1 >= 0
boxes[:, 0] = np.clip(boxes[:, 0], 0, im_shape[1] - 1)
# y1 >= 0
boxes[:, 1] = np.clip(boxes[:, 1], 0, im_shape[0] - 1)
# x2 < im_shape[1]
boxes[:, 2] = np.clip(boxes[:, 2], 1, im_shape[1])
# y2 < im_shape[0]
boxes[:, 3] = np.clip(boxes[:, 3], 1, im_shape[0])
return boxes
def transform_chip_box(gt_bbox: 'Gx4', boxes_idx: 'B', chip: '4'):
boxes_idx = np.array(boxes_idx)
cur_gt_bbox = gt_bbox[boxes_idx].copy() # Bx4
x1, y1, x2, y2 = chip
cur_gt_bbox[:, 0] -= x1
cur_gt_bbox[:, 1] -= y1
cur_gt_bbox[:, 2] -= x1
cur_gt_bbox[:, 3] -= y1
h = y2 - y1
w = x2 - x1
cur_gt_bbox = clip_boxes(cur_gt_bbox, (h, w))
ws = (cur_gt_bbox[:, 2] - cur_gt_bbox[:, 0]).astype(np.int32)
hs = (cur_gt_bbox[:, 3] - cur_gt_bbox[:, 1]).astype(np.int32)
valid_idx = (ws >= 2) & (hs >= 2)
return cur_gt_bbox[valid_idx], boxes_idx[valid_idx]
def find_chips_to_cover_overlaped_boxes(iob, overlap_threshold):
chip_ids, box_ids = np.nonzero(iob >= overlap_threshold)
chip_id2overlap_box_num = np.bincount(chip_ids) # 1d array
chip_id2overlap_box_num = np.pad(
chip_id2overlap_box_num, (0, len(iob) - len(chip_id2overlap_box_num)),
constant_values=0)
chosen_chip_ids = []
while len(box_ids) > 0:
value_counts = np.bincount(chip_ids) # 1d array
max_count_chip_id = np.argmax(value_counts)
assert max_count_chip_id not in chosen_chip_ids
chosen_chip_ids.append(max_count_chip_id)
box_ids_in_cur_chip = box_ids[chip_ids == max_count_chip_id]
ids_not_in_cur_boxes_mask = np.logical_not(
np.isin(box_ids, box_ids_in_cur_chip))
chip_ids = chip_ids[ids_not_in_cur_boxes_mask]
box_ids = box_ids[ids_not_in_cur_boxes_mask]
return chosen_chip_ids, chip_id2overlap_box_num
def transform_chip_boxes2image_boxes(chip_boxes, chip, img_h, img_w):
chip_boxes = np.array(sorted(chip_boxes, key=lambda item: -item[1]))
xmin, ymin, _, _ = chip
# Transform to origin image loc
chip_boxes[:, 2] += xmin
chip_boxes[:, 4] += xmin
chip_boxes[:, 3] += ymin
chip_boxes[:, 5] += ymin
chip_boxes = clip_boxes(chip_boxes, (img_h, img_w))
return chip_boxes
def nms(dets, thresh):
"""Apply classic DPM-style greedy NMS."""
if dets.shape[0] == 0:
return dets[[], :]
scores = dets[:, 1]
x1 = dets[:, 2]
y1 = dets[:, 3]
x2 = dets[:, 4]
y2 = dets[:, 5]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
ndets = dets.shape[0]
suppressed = np.zeros((ndets), dtype=np.int32)
# nominal indices
# _i, _j
# sorted indices
# i, j
# temp variables for box i's (the box currently under consideration)
# ix1, iy1, ix2, iy2, iarea
# variables for computing overlap with box j (lower scoring box)
# xx1, yy1, xx2, yy2
# w, h
# inter, ovr
for _i in range(ndets):
i = order[_i]
if suppressed[i] == 1:
continue
ix1 = x1[i]
iy1 = y1[i]
ix2 = x2[i]
iy2 = y2[i]
iarea = areas[i]
for _j in range(_i + 1, ndets):
j = order[_j]
if suppressed[j] == 1:
continue
xx1 = max(ix1, x1[j])
yy1 = max(iy1, y1[j])
xx2 = min(ix2, x2[j])
yy2 = min(iy2, y2[j])
w = max(0.0, xx2 - xx1 + 1)
h = max(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (iarea + areas[j] - inter)
if ovr >= thresh:
suppressed[j] = 1
keep = np.where(suppressed == 0)[0]
dets = dets[keep, :]
return dets

View File

@@ -0,0 +1,130 @@
import math
import numpy as np
from imgaug.augmentables.lines import LineString
from scipy.interpolate import InterpolatedUnivariateSpline
def lane_to_linestrings(lanes):
lines = []
for lane in lanes:
lines.append(LineString(lane))
return lines
def linestrings_to_lanes(lines):
lanes = []
for line in lines:
lanes.append(line.coords)
return lanes
def sample_lane(points, sample_ys, img_w):
# this function expects the points to be sorted
points = np.array(points)
if not np.all(points[1:, 1] < points[:-1, 1]):
raise Exception('Annotaion points have to be sorted')
x, y = points[:, 0], points[:, 1]
# interpolate points inside domain
assert len(points) > 1
interp = InterpolatedUnivariateSpline(
y[::-1], x[::-1], k=min(3, len(points) - 1))
domain_min_y = y.min()
domain_max_y = y.max()
sample_ys_inside_domain = sample_ys[(sample_ys >= domain_min_y) & (
sample_ys <= domain_max_y)]
assert len(sample_ys_inside_domain) > 0
interp_xs = interp(sample_ys_inside_domain)
# extrapolate lane to the bottom of the image with a straight line using the 2 points closest to the bottom
two_closest_points = points[:2]
extrap = np.polyfit(
two_closest_points[:, 1], two_closest_points[:, 0], deg=1)
extrap_ys = sample_ys[sample_ys > domain_max_y]
extrap_xs = np.polyval(extrap, extrap_ys)
all_xs = np.hstack((extrap_xs, interp_xs))
# separate between inside and outside points
inside_mask = (all_xs >= 0) & (all_xs < img_w)
xs_inside_image = all_xs[inside_mask]
xs_outside_image = all_xs[~inside_mask]
return xs_outside_image, xs_inside_image
def filter_lane(lane):
assert lane[-1][1] <= lane[0][1]
filtered_lane = []
used = set()
for p in lane:
if p[1] not in used:
filtered_lane.append(p)
used.add(p[1])
return filtered_lane
def transform_annotation(img_w, img_h, max_lanes, n_offsets, offsets_ys,
n_strips, strip_size, anno):
old_lanes = anno['lanes']
# removing lanes with less than 2 points
old_lanes = filter(lambda x: len(x) > 1, old_lanes)
# sort lane points by Y (bottom to top of the image)
old_lanes = [sorted(lane, key=lambda x: -x[1]) for lane in old_lanes]
# remove points with same Y (keep first occurrence)
old_lanes = [filter_lane(lane) for lane in old_lanes]
# normalize the annotation coordinates
old_lanes = [[[x * img_w / float(img_w), y * img_h / float(img_h)]
for x, y in lane] for lane in old_lanes]
# create tranformed annotations
lanes = np.ones(
(max_lanes, 2 + 1 + 1 + 2 + n_offsets), dtype=np.float32
) * -1e5 # 2 scores, 1 start_y, 1 start_x, 1 theta, 1 length, S+1 coordinates
lanes_endpoints = np.ones((max_lanes, 2))
# lanes are invalid by default
lanes[:, 0] = 1
lanes[:, 1] = 0
for lane_idx, lane in enumerate(old_lanes):
if lane_idx >= max_lanes:
break
try:
xs_outside_image, xs_inside_image = sample_lane(lane, offsets_ys,
img_w)
except AssertionError:
continue
if len(xs_inside_image) <= 1:
continue
all_xs = np.hstack((xs_outside_image, xs_inside_image))
lanes[lane_idx, 0] = 0
lanes[lane_idx, 1] = 1
lanes[lane_idx, 2] = len(xs_outside_image) / n_strips
lanes[lane_idx, 3] = xs_inside_image[0]
thetas = []
for i in range(1, len(xs_inside_image)):
theta = math.atan(
i * strip_size /
(xs_inside_image[i] - xs_inside_image[0] + 1e-5)) / math.pi
theta = theta if theta > 0 else 1 - abs(theta)
thetas.append(theta)
theta_far = sum(thetas) / len(thetas)
# lanes[lane_idx,
# 4] = (theta_closest + theta_far) / 2 # averaged angle
lanes[lane_idx, 4] = theta_far
lanes[lane_idx, 5] = len(xs_inside_image)
lanes[lane_idx, 6:6 + len(all_xs)] = all_xs
lanes_endpoints[lane_idx, 0] = (len(all_xs) - 1) / n_strips
lanes_endpoints[lane_idx, 1] = xs_inside_image[-1]
new_anno = {
'label': lanes,
'old_anno': anno,
'lane_endpoints': lanes_endpoints
}
return new_anno

View File

@@ -0,0 +1,615 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import os
import traceback
import six
import sys
if sys.version_info >= (3, 0):
pass
else:
pass
import numpy as np
import paddle
import paddle.nn.functional as F
from copy import deepcopy
from paddle.io import DataLoader, DistributedBatchSampler
from .utils import default_collate_fn
from ppdet.core.workspace import register
from . import transform
from .shm_utils import _get_shared_memory_size_in_M
from ppdet.utils.logger import setup_logger
logger = setup_logger('reader')
MAIN_PID = os.getpid()
class Compose(object):
def __init__(self, transforms, num_classes=80):
self.transforms = transforms
self.transforms_cls = []
for t in self.transforms:
for k, v in t.items():
op_cls = getattr(transform, k)
f = op_cls(**v)
if hasattr(f, 'num_classes'):
f.num_classes = num_classes
self.transforms_cls.append(f)
def __call__(self, data):
for f in self.transforms_cls:
try:
data = f(data)
except Exception as e:
stack_info = traceback.format_exc()
logger.warning("fail to map sample transform [{}] "
"with error: {} and stack:\n{}".format(
f, e, str(stack_info)))
raise e
return data
class BatchCompose(Compose):
def __init__(self, transforms, num_classes=80, collate_batch=True):
super(BatchCompose, self).__init__(transforms, num_classes)
self.collate_batch = collate_batch
def __call__(self, data):
for f in self.transforms_cls:
try:
data = f(data)
except Exception as e:
stack_info = traceback.format_exc()
logger.warning("fail to map batch transform [{}] "
"with error: {} and stack:\n{}".format(
f, e, str(stack_info)))
raise e
# remove keys which is not needed by model
extra_key = ['h', 'w', 'flipped']
for k in extra_key:
for sample in data:
if k in sample:
sample.pop(k)
# batch data, if user-define batch function needed
# use user-defined here
if self.collate_batch:
batch_data = default_collate_fn(data)
else:
batch_data = {}
for k in data[0].keys():
tmp_data = []
for i in range(len(data)):
tmp_data.append(data[i][k])
if not 'gt_' in k and not 'is_crowd' in k and not 'difficult' in k:
tmp_data = np.stack(tmp_data, axis=0)
batch_data[k] = tmp_data
return batch_data
class BaseDataLoader(object):
"""
Base DataLoader implementation for detection models
Args:
sample_transforms (list): a list of transforms to perform
on each sample
batch_transforms (list): a list of transforms to perform
on batch
batch_size (int): batch size for batch collating, default 1.
shuffle (bool): whether to shuffle samples
drop_last (bool): whether to drop the last incomplete,
default False
num_classes (int): class number of dataset, default 80
collate_batch (bool): whether to collate batch in dataloader.
If set to True, the samples will collate into batch according
to the batch size. Otherwise, the ground-truth will not collate,
which is used when the number of ground-truch is different in
samples.
use_shared_memory (bool): whether to use shared memory to
accelerate data loading, enable this only if you
are sure that the shared memory size of your OS
is larger than memory cost of input datas of model.
Note that shared memory will be automatically
disabled if the shared memory of OS is less than
1G, which is not enough for detection models.
Default False.
"""
def __init__(self,
sample_transforms=[],
batch_transforms=[],
batch_size=1,
shuffle=False,
drop_last=False,
num_classes=80,
collate_batch=True,
use_shared_memory=False,
**kwargs):
# sample transform
self._sample_transforms = Compose(
sample_transforms, num_classes=num_classes)
# batch transfrom
self._batch_transforms = BatchCompose(batch_transforms, num_classes,
collate_batch)
self.batch_size = batch_size
self.shuffle = shuffle
self.drop_last = drop_last
self.use_shared_memory = use_shared_memory
self.kwargs = kwargs
def __call__(self,
dataset,
worker_num,
batch_sampler=None,
return_list=False):
self.dataset = dataset
self.dataset.check_or_download_dataset()
self.dataset.parse_dataset()
# get data
self.dataset.set_transform(self._sample_transforms)
# set kwargs
self.dataset.set_kwargs(**self.kwargs)
# batch sampler
if batch_sampler is None:
self._batch_sampler = DistributedBatchSampler(
self.dataset,
batch_size=self.batch_size,
shuffle=self.shuffle,
drop_last=self.drop_last)
else:
self._batch_sampler = batch_sampler
# DataLoader do not start sub-process in Windows and Mac
# system, do not need to use shared memory
use_shared_memory = self.use_shared_memory and \
sys.platform not in ['win32', 'darwin']
# check whether shared memory size is bigger than 1G(1024M)
if use_shared_memory:
shm_size = _get_shared_memory_size_in_M()
if shm_size is not None and shm_size < 1024.:
logger.warning("Shared memory size is less than 1G, "
"disable shared_memory in DataLoader")
use_shared_memory = False
self.dataloader = DataLoader(
dataset=self.dataset,
batch_sampler=self._batch_sampler,
collate_fn=self._batch_transforms,
num_workers=worker_num,
return_list=return_list,
use_shared_memory=use_shared_memory)
self.loader = iter(self.dataloader)
return self
def __len__(self):
return len(self._batch_sampler)
def __iter__(self):
return self
def __next__(self):
try:
return next(self.loader)
except StopIteration:
self.loader = iter(self.dataloader)
six.reraise(*sys.exc_info())
def next(self):
# python2 compatibility
return self.__next__()
@register
class TrainReader(BaseDataLoader):
__shared__ = ['num_classes']
def __init__(self,
sample_transforms=[],
batch_transforms=[],
batch_size=1,
shuffle=True,
drop_last=True,
num_classes=80,
collate_batch=True,
**kwargs):
super(TrainReader, self).__init__(sample_transforms, batch_transforms,
batch_size, shuffle, drop_last,
num_classes, collate_batch, **kwargs)
@register
class EvalReader(BaseDataLoader):
__shared__ = ['num_classes']
def __init__(self,
sample_transforms=[],
batch_transforms=[],
batch_size=1,
shuffle=False,
drop_last=False,
num_classes=80,
**kwargs):
super(EvalReader, self).__init__(sample_transforms, batch_transforms,
batch_size, shuffle, drop_last,
num_classes, **kwargs)
@register
class TestReader(BaseDataLoader):
__shared__ = ['num_classes']
def __init__(self,
sample_transforms=[],
batch_transforms=[],
batch_size=1,
shuffle=False,
drop_last=False,
num_classes=80,
**kwargs):
super(TestReader, self).__init__(sample_transforms, batch_transforms,
batch_size, shuffle, drop_last,
num_classes, **kwargs)
@register
class EvalMOTReader(BaseDataLoader):
__shared__ = ['num_classes']
def __init__(self,
sample_transforms=[],
batch_transforms=[],
batch_size=1,
shuffle=False,
drop_last=False,
num_classes=1,
**kwargs):
super(EvalMOTReader, self).__init__(sample_transforms, batch_transforms,
batch_size, shuffle, drop_last,
num_classes, **kwargs)
@register
class TestMOTReader(BaseDataLoader):
__shared__ = ['num_classes']
def __init__(self,
sample_transforms=[],
batch_transforms=[],
batch_size=1,
shuffle=False,
drop_last=False,
num_classes=1,
**kwargs):
super(TestMOTReader, self).__init__(sample_transforms, batch_transforms,
batch_size, shuffle, drop_last,
num_classes, **kwargs)
# For Semi-Supervised Object Detection (SSOD)
class Compose_SSOD(object):
def __init__(self, base_transforms, weak_aug, strong_aug, num_classes=80):
self.base_transforms = base_transforms
self.base_transforms_cls = []
for t in self.base_transforms:
for k, v in t.items():
op_cls = getattr(transform, k)
f = op_cls(**v)
if hasattr(f, 'num_classes'):
f.num_classes = num_classes
self.base_transforms_cls.append(f)
self.weak_augs = weak_aug
self.weak_augs_cls = []
for t in self.weak_augs:
for k, v in t.items():
op_cls = getattr(transform, k)
f = op_cls(**v)
if hasattr(f, 'num_classes'):
f.num_classes = num_classes
self.weak_augs_cls.append(f)
self.strong_augs = strong_aug
self.strong_augs_cls = []
for t in self.strong_augs:
for k, v in t.items():
op_cls = getattr(transform, k)
f = op_cls(**v)
if hasattr(f, 'num_classes'):
f.num_classes = num_classes
self.strong_augs_cls.append(f)
def __call__(self, data):
for f in self.base_transforms_cls:
try:
data = f(data)
except Exception as e:
stack_info = traceback.format_exc()
logger.warning("fail to map sample transform [{}] "
"with error: {} and stack:\n{}".format(
f, e, str(stack_info)))
raise e
weak_data = deepcopy(data)
strong_data = deepcopy(data)
for f in self.weak_augs_cls:
try:
weak_data = f(weak_data)
except Exception as e:
stack_info = traceback.format_exc()
logger.warning("fail to map weak aug [{}] "
"with error: {} and stack:\n{}".format(
f, e, str(stack_info)))
raise e
for f in self.strong_augs_cls:
try:
strong_data = f(strong_data)
except Exception as e:
stack_info = traceback.format_exc()
logger.warning("fail to map strong aug [{}] "
"with error: {} and stack:\n{}".format(
f, e, str(stack_info)))
raise e
weak_data['strong_aug'] = strong_data
return weak_data
class BatchCompose_SSOD(Compose):
def __init__(self, transforms, num_classes=80, collate_batch=True):
super(BatchCompose_SSOD, self).__init__(transforms, num_classes)
self.collate_batch = collate_batch
def __call__(self, data):
# split strong_data from data(weak_data)
strong_data = []
for sample in data:
strong_data.append(sample['strong_aug'])
sample.pop('strong_aug')
for f in self.transforms_cls:
try:
data = f(data)
if 'BatchRandomResizeForSSOD' in f._id:
strong_data = f(strong_data, data[1])[0]
data = data[0]
else:
strong_data = f(strong_data)
except Exception as e:
stack_info = traceback.format_exc()
logger.warning("fail to map batch transform [{}] "
"with error: {} and stack:\n{}".format(
f, e, str(stack_info)))
raise e
# remove keys which is not needed by model
extra_key = ['h', 'w', 'flipped']
for k in extra_key:
for sample in data:
if k in sample:
sample.pop(k)
for sample in strong_data:
if k in sample:
sample.pop(k)
# batch data, if user-define batch function needed
# use user-defined here
if self.collate_batch:
batch_data = default_collate_fn(data)
strong_batch_data = default_collate_fn(strong_data)
return batch_data, strong_batch_data
else:
batch_data = {}
for k in data[0].keys():
tmp_data = []
for i in range(len(data)):
tmp_data.append(data[i][k])
if not 'gt_' in k and not 'is_crowd' in k and not 'difficult' in k:
tmp_data = np.stack(tmp_data, axis=0)
batch_data[k] = tmp_data
strong_batch_data = {}
for k in strong_data[0].keys():
tmp_data = []
for i in range(len(strong_data)):
tmp_data.append(strong_data[i][k])
if not 'gt_' in k and not 'is_crowd' in k and not 'difficult' in k:
tmp_data = np.stack(tmp_data, axis=0)
strong_batch_data[k] = tmp_data
return batch_data, strong_batch_data
class CombineSSODLoader(object):
def __init__(self, label_loader, unlabel_loader):
self.label_loader = label_loader
self.unlabel_loader = unlabel_loader
def __iter__(self):
while True:
try:
label_samples = next(self.label_loader_iter)
except:
self.label_loader_iter = iter(self.label_loader)
label_samples = next(self.label_loader_iter)
try:
unlabel_samples = next(self.unlabel_loader_iter)
except:
self.unlabel_loader_iter = iter(self.unlabel_loader)
unlabel_samples = next(self.unlabel_loader_iter)
yield (
label_samples[0], # sup weak
label_samples[1], # sup strong
unlabel_samples[0], # unsup weak
unlabel_samples[1] # unsup strong
)
def __call__(self):
return self.__iter__()
class BaseSemiDataLoader(object):
def __init__(self,
sample_transforms=[],
weak_aug=[],
strong_aug=[],
sup_batch_transforms=[],
unsup_batch_transforms=[],
sup_batch_size=1,
unsup_batch_size=1,
shuffle=True,
drop_last=True,
num_classes=80,
collate_batch=True,
use_shared_memory=False,
**kwargs):
# sup transforms
self._sample_transforms_label = Compose_SSOD(
sample_transforms, weak_aug, strong_aug, num_classes=num_classes)
self._batch_transforms_label = BatchCompose_SSOD(
sup_batch_transforms, num_classes, collate_batch)
self.batch_size_label = sup_batch_size
# unsup transforms
self._sample_transforms_unlabel = Compose_SSOD(
sample_transforms, weak_aug, strong_aug, num_classes=num_classes)
self._batch_transforms_unlabel = BatchCompose_SSOD(
unsup_batch_transforms, num_classes, collate_batch)
self.batch_size_unlabel = unsup_batch_size
# common
self.shuffle = shuffle
self.drop_last = drop_last
self.use_shared_memory = use_shared_memory
self.kwargs = kwargs
def __call__(self,
dataset_label,
dataset_unlabel,
worker_num,
batch_sampler_label=None,
batch_sampler_unlabel=None,
return_list=False):
# sup dataset
self.dataset_label = dataset_label
self.dataset_label.check_or_download_dataset()
self.dataset_label.parse_dataset()
self.dataset_label.set_transform(self._sample_transforms_label)
self.dataset_label.set_kwargs(**self.kwargs)
if batch_sampler_label is None:
self._batch_sampler_label = DistributedBatchSampler(
self.dataset_label,
batch_size=self.batch_size_label,
shuffle=self.shuffle,
drop_last=self.drop_last)
else:
self._batch_sampler_label = batch_sampler_label
# unsup dataset
self.dataset_unlabel = dataset_unlabel
self.dataset_unlabel.length = self.dataset_label.__len__()
self.dataset_unlabel.check_or_download_dataset()
self.dataset_unlabel.parse_dataset()
self.dataset_unlabel.set_transform(self._sample_transforms_unlabel)
self.dataset_unlabel.set_kwargs(**self.kwargs)
if batch_sampler_unlabel is None:
self._batch_sampler_unlabel = DistributedBatchSampler(
self.dataset_unlabel,
batch_size=self.batch_size_unlabel,
shuffle=self.shuffle,
drop_last=self.drop_last)
else:
self._batch_sampler_unlabel = batch_sampler_unlabel
# DataLoader do not start sub-process in Windows and Mac
# system, do not need to use shared memory
use_shared_memory = self.use_shared_memory and \
sys.platform not in ['win32', 'darwin']
# check whether shared memory size is bigger than 1G(1024M)
if use_shared_memory:
shm_size = _get_shared_memory_size_in_M()
if shm_size is not None and shm_size < 1024.:
logger.warning("Shared memory size is less than 1G, "
"disable shared_memory in DataLoader")
use_shared_memory = False
self.dataloader_label = DataLoader(
dataset=self.dataset_label,
batch_sampler=self._batch_sampler_label,
collate_fn=self._batch_transforms_label,
num_workers=worker_num,
return_list=return_list,
use_shared_memory=use_shared_memory)
self.dataloader_unlabel = DataLoader(
dataset=self.dataset_unlabel,
batch_sampler=self._batch_sampler_unlabel,
collate_fn=self._batch_transforms_unlabel,
num_workers=worker_num,
return_list=return_list,
use_shared_memory=use_shared_memory)
self.dataloader = CombineSSODLoader(self.dataloader_label,
self.dataloader_unlabel)
self.loader = iter(self.dataloader)
return self
def __len__(self):
return len(self._batch_sampler_label)
def __iter__(self):
return self
def __next__(self):
return next(self.loader)
def next(self):
# python2 compatibility
return self.__next__()
@register
class SemiTrainReader(BaseSemiDataLoader):
__shared__ = ['num_classes']
def __init__(self,
sample_transforms=[],
weak_aug=[],
strong_aug=[],
sup_batch_transforms=[],
unsup_batch_transforms=[],
sup_batch_size=1,
unsup_batch_size=1,
shuffle=True,
drop_last=True,
num_classes=80,
collate_batch=True,
**kwargs):
super(SemiTrainReader, self).__init__(
sample_transforms, weak_aug, strong_aug, sup_batch_transforms,
unsup_batch_transforms, sup_batch_size, unsup_batch_size, shuffle,
drop_last, num_classes, collate_batch, **kwargs)

View File

@@ -0,0 +1,70 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
SIZE_UNIT = ['K', 'M', 'G', 'T']
SHM_QUERY_CMD = 'df -h'
SHM_KEY = 'shm'
SHM_DEFAULT_MOUNT = '/dev/shm'
# [ shared memory size check ]
# In detection models, image/target data occupies a lot of memory, and
# will occupy lots of shared memory in multi-process DataLoader, we use
# following code to get shared memory size and perform a size check to
# disable shared memory use if shared memory size is not enough.
# Shared memory getting process as follows:
# 1. use `df -h` get all mount info
# 2. pick up spaces whose mount info contains 'shm'
# 3. if 'shm' space number is only 1, return its size
# 4. if there are multiple 'shm' space, try to find the default mount
# directory '/dev/shm' is Linux-like system, otherwise return the
# biggest space size.
def _parse_size_in_M(size_str):
if size_str[-1] == 'B':
num, unit = size_str[:-2], size_str[-2]
else:
num, unit = size_str[:-1], size_str[-1]
assert unit in SIZE_UNIT, \
"unknown shm size unit {}".format(unit)
return float(num) * \
(1024 ** (SIZE_UNIT.index(unit) - 1))
def _get_shared_memory_size_in_M():
try:
df_infos = os.popen(SHM_QUERY_CMD).readlines()
except:
return None
else:
shm_infos = []
for df_info in df_infos:
info = df_info.strip()
if info.find(SHM_KEY) >= 0:
shm_infos.append(info.split())
if len(shm_infos) == 0:
return None
elif len(shm_infos) == 1:
return _parse_size_in_M(shm_infos[0][3])
else:
default_mount_infos = [
si for si in shm_infos if si[-1] == SHM_DEFAULT_MOUNT
]
if default_mount_infos:
return _parse_size_in_M(default_mount_infos[0][3])
else:
return max([_parse_size_in_M(si[3]) for si in shm_infos])

View File

@@ -0,0 +1,33 @@
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import coco
from . import voc
from . import widerface
from . import category
from . import keypoint_coco
from . import mot
from . import sniper_coco
from . import culane
from .coco import *
from .voc import *
from .widerface import *
from .category import *
from .keypoint_coco import *
from .mot import *
from .sniper_coco import SniperCOCODataSet
from .dataset import ImageFolder
from .pose3d_cmb import *
from .culane import *

View File

@@ -0,0 +1,942 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from ppdet.data.source.voc import pascalvoc_label
from ppdet.data.source.widerface import widerface_label
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
__all__ = ['get_categories']
def get_categories(metric_type, anno_file=None, arch=None):
"""
Get class id to category id map and category id
to category name map from annotation file.
Args:
metric_type (str): metric type, currently support 'coco', 'voc', 'oid'
and 'widerface'.
anno_file (str): annotation file path
"""
if arch == 'keypoint_arch':
return (None, {'id': 'keypoint'})
if anno_file == None or (not os.path.isfile(anno_file)):
logger.warning(
"anno_file '{}' is None or not set or not exist, "
"please recheck TrainDataset/EvalDataset/TestDataset.anno_path, "
"otherwise the default categories will be used by metric_type.".
format(anno_file))
if metric_type.lower() == 'coco' or metric_type.lower(
) == 'rbox' or metric_type.lower() == 'snipercoco':
if anno_file and os.path.isfile(anno_file):
if anno_file.endswith('json'):
# lazy import pycocotools here
from pycocotools.coco import COCO
coco = COCO(anno_file)
cats = coco.loadCats(coco.getCatIds())
clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
catid2name = {cat['id']: cat['name'] for cat in cats}
elif anno_file.endswith('txt'):
cats = []
with open(anno_file) as f:
for line in f.readlines():
cats.append(line.strip())
if cats[0] == 'background': cats = cats[1:]
clsid2catid = {i: i for i in range(len(cats))}
catid2name = {i: name for i, name in enumerate(cats)}
else:
raise ValueError("anno_file {} should be json or txt.".format(
anno_file))
return clsid2catid, catid2name
# anno file not exist, load default categories of COCO17
else:
if metric_type.lower() == 'rbox':
logger.warning(
"metric_type: {}, load default categories of DOTA.".format(
metric_type))
return _dota_category()
logger.warning("metric_type: {}, load default categories of COCO.".
format(metric_type))
return _coco17_category()
elif metric_type.lower() == 'voc':
if anno_file and os.path.isfile(anno_file):
cats = []
with open(anno_file) as f:
for line in f.readlines():
cats.append(line.strip())
if cats[0] == 'background':
cats = cats[1:]
clsid2catid = {i: i for i in range(len(cats))}
catid2name = {i: name for i, name in enumerate(cats)}
return clsid2catid, catid2name
# anno file not exist, load default categories of
# VOC all 20 categories
else:
logger.warning("metric_type: {}, load default categories of VOC.".
format(metric_type))
return _vocall_category()
elif metric_type.lower() == 'oid':
if anno_file and os.path.isfile(anno_file):
logger.warning("only default categories support for OID19")
return _oid19_category()
elif metric_type.lower() == 'widerface':
return _widerface_category()
elif metric_type.lower() in [
'keypointtopdowncocoeval', 'keypointtopdownmpiieval',
'keypointtopdowncocowholebadyhandeval'
]:
return (None, {'id': 'keypoint'})
elif metric_type.lower() == 'pose3deval':
return (None, {'id': 'pose3d'})
elif metric_type.lower() in ['mot', 'motdet', 'reid']:
if anno_file and os.path.isfile(anno_file):
cats = []
with open(anno_file) as f:
for line in f.readlines():
cats.append(line.strip())
if cats[0] == 'background':
cats = cats[1:]
clsid2catid = {i: i for i in range(len(cats))}
catid2name = {i: name for i, name in enumerate(cats)}
return clsid2catid, catid2name
# anno file not exist, load default category 'pedestrian'.
else:
logger.warning(
"metric_type: {}, load default categories of pedestrian MOT.".
format(metric_type))
return _mot_category(category='pedestrian')
elif metric_type.lower() in ['kitti', 'bdd100kmot']:
return _mot_category(category='vehicle')
elif metric_type.lower() in ['mcmot']:
if anno_file and os.path.isfile(anno_file):
cats = []
with open(anno_file) as f:
for line in f.readlines():
cats.append(line.strip())
if cats[0] == 'background':
cats = cats[1:]
clsid2catid = {i: i for i in range(len(cats))}
catid2name = {i: name for i, name in enumerate(cats)}
return clsid2catid, catid2name
# anno file not exist, load default categories of visdrone all 10 categories
else:
logger.warning(
"metric_type: {}, load default categories of VisDrone.".format(
metric_type))
return _visdrone_category()
else:
raise ValueError("unknown metric type {}".format(metric_type))
def _mot_category(category='pedestrian'):
"""
Get class id to category id map and category id
to category name map of mot dataset
"""
label_map = {category: 0}
label_map = sorted(label_map.items(), key=lambda x: x[1])
cats = [l[0] for l in label_map]
clsid2catid = {i: i for i in range(len(cats))}
catid2name = {i: name for i, name in enumerate(cats)}
return clsid2catid, catid2name
def _coco17_category():
"""
Get class id to category id map and category id
to category name map of COCO2017 dataset
"""
clsid2catid = {
1: 1,
2: 2,
3: 3,
4: 4,
5: 5,
6: 6,
7: 7,
8: 8,
9: 9,
10: 10,
11: 11,
12: 13,
13: 14,
14: 15,
15: 16,
16: 17,
17: 18,
18: 19,
19: 20,
20: 21,
21: 22,
22: 23,
23: 24,
24: 25,
25: 27,
26: 28,
27: 31,
28: 32,
29: 33,
30: 34,
31: 35,
32: 36,
33: 37,
34: 38,
35: 39,
36: 40,
37: 41,
38: 42,
39: 43,
40: 44,
41: 46,
42: 47,
43: 48,
44: 49,
45: 50,
46: 51,
47: 52,
48: 53,
49: 54,
50: 55,
51: 56,
52: 57,
53: 58,
54: 59,
55: 60,
56: 61,
57: 62,
58: 63,
59: 64,
60: 65,
61: 67,
62: 70,
63: 72,
64: 73,
65: 74,
66: 75,
67: 76,
68: 77,
69: 78,
70: 79,
71: 80,
72: 81,
73: 82,
74: 84,
75: 85,
76: 86,
77: 87,
78: 88,
79: 89,
80: 90
}
catid2name = {
0: 'background',
1: 'person',
2: 'bicycle',
3: 'car',
4: 'motorcycle',
5: 'airplane',
6: 'bus',
7: 'train',
8: 'truck',
9: 'boat',
10: 'traffic light',
11: 'fire hydrant',
13: 'stop sign',
14: 'parking meter',
15: 'bench',
16: 'bird',
17: 'cat',
18: 'dog',
19: 'horse',
20: 'sheep',
21: 'cow',
22: 'elephant',
23: 'bear',
24: 'zebra',
25: 'giraffe',
27: 'backpack',
28: 'umbrella',
31: 'handbag',
32: 'tie',
33: 'suitcase',
34: 'frisbee',
35: 'skis',
36: 'snowboard',
37: 'sports ball',
38: 'kite',
39: 'baseball bat',
40: 'baseball glove',
41: 'skateboard',
42: 'surfboard',
43: 'tennis racket',
44: 'bottle',
46: 'wine glass',
47: 'cup',
48: 'fork',
49: 'knife',
50: 'spoon',
51: 'bowl',
52: 'banana',
53: 'apple',
54: 'sandwich',
55: 'orange',
56: 'broccoli',
57: 'carrot',
58: 'hot dog',
59: 'pizza',
60: 'donut',
61: 'cake',
62: 'chair',
63: 'couch',
64: 'potted plant',
65: 'bed',
67: 'dining table',
70: 'toilet',
72: 'tv',
73: 'laptop',
74: 'mouse',
75: 'remote',
76: 'keyboard',
77: 'cell phone',
78: 'microwave',
79: 'oven',
80: 'toaster',
81: 'sink',
82: 'refrigerator',
84: 'book',
85: 'clock',
86: 'vase',
87: 'scissors',
88: 'teddy bear',
89: 'hair drier',
90: 'toothbrush'
}
clsid2catid = {k - 1: v for k, v in clsid2catid.items()}
catid2name.pop(0)
return clsid2catid, catid2name
def _dota_category():
"""
Get class id to category id map and category id
to category name map of dota dataset
"""
catid2name = {
0: 'background',
1: 'plane',
2: 'baseball-diamond',
3: 'bridge',
4: 'ground-track-field',
5: 'small-vehicle',
6: 'large-vehicle',
7: 'ship',
8: 'tennis-court',
9: 'basketball-court',
10: 'storage-tank',
11: 'soccer-ball-field',
12: 'roundabout',
13: 'harbor',
14: 'swimming-pool',
15: 'helicopter'
}
catid2name.pop(0)
clsid2catid = {i: i + 1 for i in range(len(catid2name))}
return clsid2catid, catid2name
def _vocall_category():
"""
Get class id to category id map and category id
to category name map of mixup voc dataset
"""
label_map = pascalvoc_label()
label_map = sorted(label_map.items(), key=lambda x: x[1])
cats = [l[0] for l in label_map]
clsid2catid = {i: i for i in range(len(cats))}
catid2name = {i: name for i, name in enumerate(cats)}
return clsid2catid, catid2name
def _widerface_category():
label_map = widerface_label()
label_map = sorted(label_map.items(), key=lambda x: x[1])
cats = [l[0] for l in label_map]
clsid2catid = {i: i for i in range(len(cats))}
catid2name = {i: name for i, name in enumerate(cats)}
return clsid2catid, catid2name
def _oid19_category():
clsid2catid = {k: k + 1 for k in range(500)}
catid2name = {
0: "background",
1: "Infant bed",
2: "Rose",
3: "Flag",
4: "Flashlight",
5: "Sea turtle",
6: "Camera",
7: "Animal",
8: "Glove",
9: "Crocodile",
10: "Cattle",
11: "House",
12: "Guacamole",
13: "Penguin",
14: "Vehicle registration plate",
15: "Bench",
16: "Ladybug",
17: "Human nose",
18: "Watermelon",
19: "Flute",
20: "Butterfly",
21: "Washing machine",
22: "Raccoon",
23: "Segway",
24: "Taco",
25: "Jellyfish",
26: "Cake",
27: "Pen",
28: "Cannon",
29: "Bread",
30: "Tree",
31: "Shellfish",
32: "Bed",
33: "Hamster",
34: "Hat",
35: "Toaster",
36: "Sombrero",
37: "Tiara",
38: "Bowl",
39: "Dragonfly",
40: "Moths and butterflies",
41: "Antelope",
42: "Vegetable",
43: "Torch",
44: "Building",
45: "Power plugs and sockets",
46: "Blender",
47: "Billiard table",
48: "Cutting board",
49: "Bronze sculpture",
50: "Turtle",
51: "Broccoli",
52: "Tiger",
53: "Mirror",
54: "Bear",
55: "Zucchini",
56: "Dress",
57: "Volleyball",
58: "Guitar",
59: "Reptile",
60: "Golf cart",
61: "Tart",
62: "Fedora",
63: "Carnivore",
64: "Car",
65: "Lighthouse",
66: "Coffeemaker",
67: "Food processor",
68: "Truck",
69: "Bookcase",
70: "Surfboard",
71: "Footwear",
72: "Bench",
73: "Necklace",
74: "Flower",
75: "Radish",
76: "Marine mammal",
77: "Frying pan",
78: "Tap",
79: "Peach",
80: "Knife",
81: "Handbag",
82: "Laptop",
83: "Tent",
84: "Ambulance",
85: "Christmas tree",
86: "Eagle",
87: "Limousine",
88: "Kitchen & dining room table",
89: "Polar bear",
90: "Tower",
91: "Football",
92: "Willow",
93: "Human head",
94: "Stop sign",
95: "Banana",
96: "Mixer",
97: "Binoculars",
98: "Dessert",
99: "Bee",
100: "Chair",
101: "Wood-burning stove",
102: "Flowerpot",
103: "Beaker",
104: "Oyster",
105: "Woodpecker",
106: "Harp",
107: "Bathtub",
108: "Wall clock",
109: "Sports uniform",
110: "Rhinoceros",
111: "Beehive",
112: "Cupboard",
113: "Chicken",
114: "Man",
115: "Blue jay",
116: "Cucumber",
117: "Balloon",
118: "Kite",
119: "Fireplace",
120: "Lantern",
121: "Missile",
122: "Book",
123: "Spoon",
124: "Grapefruit",
125: "Squirrel",
126: "Orange",
127: "Coat",
128: "Punching bag",
129: "Zebra",
130: "Billboard",
131: "Bicycle",
132: "Door handle",
133: "Mechanical fan",
134: "Ring binder",
135: "Table",
136: "Parrot",
137: "Sock",
138: "Vase",
139: "Weapon",
140: "Shotgun",
141: "Glasses",
142: "Seahorse",
143: "Belt",
144: "Watercraft",
145: "Window",
146: "Giraffe",
147: "Lion",
148: "Tire",
149: "Vehicle",
150: "Canoe",
151: "Tie",
152: "Shelf",
153: "Picture frame",
154: "Printer",
155: "Human leg",
156: "Boat",
157: "Slow cooker",
158: "Croissant",
159: "Candle",
160: "Pancake",
161: "Pillow",
162: "Coin",
163: "Stretcher",
164: "Sandal",
165: "Woman",
166: "Stairs",
167: "Harpsichord",
168: "Stool",
169: "Bus",
170: "Suitcase",
171: "Human mouth",
172: "Juice",
173: "Skull",
174: "Door",
175: "Violin",
176: "Chopsticks",
177: "Digital clock",
178: "Sunflower",
179: "Leopard",
180: "Bell pepper",
181: "Harbor seal",
182: "Snake",
183: "Sewing machine",
184: "Goose",
185: "Helicopter",
186: "Seat belt",
187: "Coffee cup",
188: "Microwave oven",
189: "Hot dog",
190: "Countertop",
191: "Serving tray",
192: "Dog bed",
193: "Beer",
194: "Sunglasses",
195: "Golf ball",
196: "Waffle",
197: "Palm tree",
198: "Trumpet",
199: "Ruler",
200: "Helmet",
201: "Ladder",
202: "Office building",
203: "Tablet computer",
204: "Toilet paper",
205: "Pomegranate",
206: "Skirt",
207: "Gas stove",
208: "Cookie",
209: "Cart",
210: "Raven",
211: "Egg",
212: "Burrito",
213: "Goat",
214: "Kitchen knife",
215: "Skateboard",
216: "Salt and pepper shakers",
217: "Lynx",
218: "Boot",
219: "Platter",
220: "Ski",
221: "Swimwear",
222: "Swimming pool",
223: "Drinking straw",
224: "Wrench",
225: "Drum",
226: "Ant",
227: "Human ear",
228: "Headphones",
229: "Fountain",
230: "Bird",
231: "Jeans",
232: "Television",
233: "Crab",
234: "Microphone",
235: "Home appliance",
236: "Snowplow",
237: "Beetle",
238: "Artichoke",
239: "Jet ski",
240: "Stationary bicycle",
241: "Human hair",
242: "Brown bear",
243: "Starfish",
244: "Fork",
245: "Lobster",
246: "Corded phone",
247: "Drink",
248: "Saucer",
249: "Carrot",
250: "Insect",
251: "Clock",
252: "Castle",
253: "Tennis racket",
254: "Ceiling fan",
255: "Asparagus",
256: "Jaguar",
257: "Musical instrument",
258: "Train",
259: "Cat",
260: "Rifle",
261: "Dumbbell",
262: "Mobile phone",
263: "Taxi",
264: "Shower",
265: "Pitcher",
266: "Lemon",
267: "Invertebrate",
268: "Turkey",
269: "High heels",
270: "Bust",
271: "Elephant",
272: "Scarf",
273: "Barrel",
274: "Trombone",
275: "Pumpkin",
276: "Box",
277: "Tomato",
278: "Frog",
279: "Bidet",
280: "Human face",
281: "Houseplant",
282: "Van",
283: "Shark",
284: "Ice cream",
285: "Swim cap",
286: "Falcon",
287: "Ostrich",
288: "Handgun",
289: "Whiteboard",
290: "Lizard",
291: "Pasta",
292: "Snowmobile",
293: "Light bulb",
294: "Window blind",
295: "Muffin",
296: "Pretzel",
297: "Computer monitor",
298: "Horn",
299: "Furniture",
300: "Sandwich",
301: "Fox",
302: "Convenience store",
303: "Fish",
304: "Fruit",
305: "Earrings",
306: "Curtain",
307: "Grape",
308: "Sofa bed",
309: "Horse",
310: "Luggage and bags",
311: "Desk",
312: "Crutch",
313: "Bicycle helmet",
314: "Tick",
315: "Airplane",
316: "Canary",
317: "Spatula",
318: "Watch",
319: "Lily",
320: "Kitchen appliance",
321: "Filing cabinet",
322: "Aircraft",
323: "Cake stand",
324: "Candy",
325: "Sink",
326: "Mouse",
327: "Wine",
328: "Wheelchair",
329: "Goldfish",
330: "Refrigerator",
331: "French fries",
332: "Drawer",
333: "Treadmill",
334: "Picnic basket",
335: "Dice",
336: "Cabbage",
337: "Football helmet",
338: "Pig",
339: "Person",
340: "Shorts",
341: "Gondola",
342: "Honeycomb",
343: "Doughnut",
344: "Chest of drawers",
345: "Land vehicle",
346: "Bat",
347: "Monkey",
348: "Dagger",
349: "Tableware",
350: "Human foot",
351: "Mug",
352: "Alarm clock",
353: "Pressure cooker",
354: "Human hand",
355: "Tortoise",
356: "Baseball glove",
357: "Sword",
358: "Pear",
359: "Miniskirt",
360: "Traffic sign",
361: "Girl",
362: "Roller skates",
363: "Dinosaur",
364: "Porch",
365: "Human beard",
366: "Submarine sandwich",
367: "Screwdriver",
368: "Strawberry",
369: "Wine glass",
370: "Seafood",
371: "Racket",
372: "Wheel",
373: "Sea lion",
374: "Toy",
375: "Tea",
376: "Tennis ball",
377: "Waste container",
378: "Mule",
379: "Cricket ball",
380: "Pineapple",
381: "Coconut",
382: "Doll",
383: "Coffee table",
384: "Snowman",
385: "Lavender",
386: "Shrimp",
387: "Maple",
388: "Cowboy hat",
389: "Goggles",
390: "Rugby ball",
391: "Caterpillar",
392: "Poster",
393: "Rocket",
394: "Organ",
395: "Saxophone",
396: "Traffic light",
397: "Cocktail",
398: "Plastic bag",
399: "Squash",
400: "Mushroom",
401: "Hamburger",
402: "Light switch",
403: "Parachute",
404: "Teddy bear",
405: "Winter melon",
406: "Deer",
407: "Musical keyboard",
408: "Plumbing fixture",
409: "Scoreboard",
410: "Baseball bat",
411: "Envelope",
412: "Adhesive tape",
413: "Briefcase",
414: "Paddle",
415: "Bow and arrow",
416: "Telephone",
417: "Sheep",
418: "Jacket",
419: "Boy",
420: "Pizza",
421: "Otter",
422: "Office supplies",
423: "Couch",
424: "Cello",
425: "Bull",
426: "Camel",
427: "Ball",
428: "Duck",
429: "Whale",
430: "Shirt",
431: "Tank",
432: "Motorcycle",
433: "Accordion",
434: "Owl",
435: "Porcupine",
436: "Sun hat",
437: "Nail",
438: "Scissors",
439: "Swan",
440: "Lamp",
441: "Crown",
442: "Piano",
443: "Sculpture",
444: "Cheetah",
445: "Oboe",
446: "Tin can",
447: "Mango",
448: "Tripod",
449: "Oven",
450: "Mouse",
451: "Barge",
452: "Coffee",
453: "Snowboard",
454: "Common fig",
455: "Salad",
456: "Marine invertebrates",
457: "Umbrella",
458: "Kangaroo",
459: "Human arm",
460: "Measuring cup",
461: "Snail",
462: "Loveseat",
463: "Suit",
464: "Teapot",
465: "Bottle",
466: "Alpaca",
467: "Kettle",
468: "Trousers",
469: "Popcorn",
470: "Centipede",
471: "Spider",
472: "Sparrow",
473: "Plate",
474: "Bagel",
475: "Personal care",
476: "Apple",
477: "Brassiere",
478: "Bathroom cabinet",
479: "studio couch",
480: "Computer keyboard",
481: "Table tennis racket",
482: "Sushi",
483: "Cabinetry",
484: "Street light",
485: "Towel",
486: "Nightstand",
487: "Rabbit",
488: "Dolphin",
489: "Dog",
490: "Jug",
491: "Wok",
492: "Fire hydrant",
493: "Human eye",
494: "Skyscraper",
495: "Backpack",
496: "Potato",
497: "Paper towel",
498: "Lifejacket",
499: "Bicycle wheel",
500: "Toilet",
}
return clsid2catid, catid2name
def _visdrone_category():
clsid2catid = {i: i for i in range(10)}
catid2name = {
0: 'pedestrian',
1: 'people',
2: 'bicycle',
3: 'car',
4: 'van',
5: 'truck',
6: 'tricycle',
7: 'awning-tricycle',
8: 'bus',
9: 'motor'
}
return clsid2catid, catid2name

View File

@@ -0,0 +1,596 @@
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import copy
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
import numpy as np
from ppdet.core.workspace import register, serializable
from .dataset import DetDataset
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
__all__ = [
'COCODataSet', 'SlicedCOCODataSet', 'SemiCOCODataSet', 'COCODetDataset'
]
@register
@serializable
class COCODataSet(DetDataset):
"""
Load dataset with COCO format.
Args:
dataset_dir (str): root directory for dataset.
image_dir (str): directory for images.
anno_path (str): coco annotation file path.
data_fields (list): key name of data dictionary, at least have 'image'.
sample_num (int): number of samples to load, -1 means all.
load_crowd (bool): whether to load crowded ground-truth.
False as default
allow_empty (bool): whether to load empty entry. False as default
empty_ratio (float): the ratio of empty record number to total
record's, if empty_ratio is out of [0. ,1.), do not sample the
records and use all the empty entries. 1. as default
repeat (int): repeat times for dataset, use in benchmark.
"""
def __init__(self,
dataset_dir=None,
image_dir=None,
anno_path=None,
data_fields=['image'],
sample_num=-1,
load_crowd=False,
allow_empty=False,
empty_ratio=1.,
repeat=1):
super(COCODataSet, self).__init__(
dataset_dir,
image_dir,
anno_path,
data_fields,
sample_num,
repeat=repeat)
self.load_image_only = False
self.load_semantic = False
self.load_crowd = load_crowd
self.allow_empty = allow_empty
self.empty_ratio = empty_ratio
def _sample_empty(self, records, num):
# if empty_ratio is out of [0. ,1.), do not sample the records
if self.empty_ratio < 0. or self.empty_ratio >= 1.:
return records
import random
sample_num = min(
int(num * self.empty_ratio / (1 - self.empty_ratio)), len(records))
records = random.sample(records, sample_num)
return records
def parse_dataset(self):
anno_path = os.path.join(self.dataset_dir, self.anno_path)
image_dir = os.path.join(self.dataset_dir, self.image_dir)
assert anno_path.endswith('.json'), \
'invalid coco annotation file: ' + anno_path
from pycocotools.coco import COCO
coco = COCO(anno_path)
img_ids = coco.getImgIds()
img_ids.sort()
cat_ids = coco.getCatIds()
records = []
empty_records = []
ct = 0
self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
self.cname2cid = dict({
coco.loadCats(catid)[0]['name']: clsid
for catid, clsid in self.catid2clsid.items()
})
if 'annotations' not in coco.dataset:
self.load_image_only = True
logger.warning('Annotation file: {} does not contains ground truth '
'and load image information only.'.format(anno_path))
for img_id in img_ids:
img_anno = coco.loadImgs([img_id])[0]
im_fname = img_anno['file_name']
im_w = float(img_anno['width'])
im_h = float(img_anno['height'])
im_path = os.path.join(image_dir,
im_fname) if image_dir else im_fname
is_empty = False
if not os.path.exists(im_path):
logger.warning('Illegal image file: {}, and it will be '
'ignored'.format(im_path))
continue
if im_w < 0 or im_h < 0:
logger.warning('Illegal width: {} or height: {} in annotation, '
'and im_id: {} will be ignored'.format(
im_w, im_h, img_id))
continue
coco_rec = {
'im_file': im_path,
'im_id': np.array([img_id]),
'h': im_h,
'w': im_w,
} if 'image' in self.data_fields else {}
if not self.load_image_only:
ins_anno_ids = coco.getAnnIds(
imgIds=[img_id], iscrowd=None if self.load_crowd else False)
instances = coco.loadAnns(ins_anno_ids)
bboxes = []
is_rbox_anno = False
for inst in instances:
# check gt bbox
if inst.get('ignore', False):
continue
if 'bbox' not in inst.keys():
continue
else:
if not any(np.array(inst['bbox'])):
continue
x1, y1, box_w, box_h = inst['bbox']
x2 = x1 + box_w
y2 = y1 + box_h
eps = 1e-5
if inst['area'] > 0 and x2 - x1 > eps and y2 - y1 > eps:
inst['clean_bbox'] = [
round(float(x), 3) for x in [x1, y1, x2, y2]
]
bboxes.append(inst)
else:
logger.warning(
'Found an invalid bbox in annotations: im_id: {}, '
'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format(
img_id, float(inst['area']), x1, y1, x2, y2))
num_bbox = len(bboxes)
if num_bbox <= 0 and not self.allow_empty:
continue
elif num_bbox <= 0:
is_empty = True
gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
gt_poly = [None] * num_bbox
gt_track_id = -np.ones((num_bbox, 1), dtype=np.int32)
has_segmentation = False
has_track_id = False
for i, box in enumerate(bboxes):
catid = box['category_id']
gt_class[i][0] = self.catid2clsid[catid]
gt_bbox[i, :] = box['clean_bbox']
is_crowd[i][0] = box['iscrowd']
# check RLE format
if 'segmentation' in box and box['iscrowd'] == 1:
gt_poly[i] = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
elif 'segmentation' in box and box['segmentation']:
if not np.array(
box['segmentation'],
dtype=object).size > 0 and not self.allow_empty:
bboxes.pop(i)
gt_poly.pop(i)
np.delete(is_crowd, i)
np.delete(gt_class, i)
np.delete(gt_bbox, i)
else:
gt_poly[i] = box['segmentation']
has_segmentation = True
if 'track_id' in box:
gt_track_id[i][0] = box['track_id']
has_track_id = True
if has_segmentation and not any(
gt_poly) and not self.allow_empty:
continue
gt_rec = {
'is_crowd': is_crowd,
'gt_class': gt_class,
'gt_bbox': gt_bbox,
'gt_poly': gt_poly,
}
if has_track_id:
gt_rec.update({'gt_track_id': gt_track_id})
for k, v in gt_rec.items():
if k in self.data_fields:
coco_rec[k] = v
# TODO: remove load_semantic
if self.load_semantic and 'semantic' in self.data_fields:
seg_path = os.path.join(self.dataset_dir, 'stuffthingmaps',
'train2017', im_fname[:-3] + 'png')
coco_rec.update({'semantic': seg_path})
logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format(
im_path, img_id, im_h, im_w))
if is_empty:
empty_records.append(coco_rec)
else:
records.append(coco_rec)
ct += 1
if self.sample_num > 0 and ct >= self.sample_num:
break
assert ct > 0, 'not found any coco record in %s' % (anno_path)
logger.info('Load [{} samples valid, {} samples invalid] in file {}.'.
format(ct, len(img_ids) - ct, anno_path))
if self.allow_empty and len(empty_records) > 0:
empty_records = self._sample_empty(empty_records, len(records))
records += empty_records
self.roidbs = records
@register
@serializable
class SlicedCOCODataSet(COCODataSet):
"""Sliced COCODataSet"""
def __init__(
self,
dataset_dir=None,
image_dir=None,
anno_path=None,
data_fields=['image'],
sample_num=-1,
load_crowd=False,
allow_empty=False,
empty_ratio=1.,
repeat=1,
sliced_size=[640, 640],
overlap_ratio=[0.25, 0.25], ):
super(SlicedCOCODataSet, self).__init__(
dataset_dir=dataset_dir,
image_dir=image_dir,
anno_path=anno_path,
data_fields=data_fields,
sample_num=sample_num,
load_crowd=load_crowd,
allow_empty=allow_empty,
empty_ratio=empty_ratio,
repeat=repeat, )
self.sliced_size = sliced_size
self.overlap_ratio = overlap_ratio
def parse_dataset(self):
anno_path = os.path.join(self.dataset_dir, self.anno_path)
image_dir = os.path.join(self.dataset_dir, self.image_dir)
assert anno_path.endswith('.json'), \
'invalid coco annotation file: ' + anno_path
from pycocotools.coco import COCO
coco = COCO(anno_path)
img_ids = coco.getImgIds()
img_ids.sort()
cat_ids = coco.getCatIds()
records = []
empty_records = []
ct = 0
ct_sub = 0
self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
self.cname2cid = dict({
coco.loadCats(catid)[0]['name']: clsid
for catid, clsid in self.catid2clsid.items()
})
if 'annotations' not in coco.dataset:
self.load_image_only = True
logger.warning('Annotation file: {} does not contains ground truth '
'and load image information only.'.format(anno_path))
try:
import sahi
from sahi.slicing import slice_image
except Exception as e:
logger.error(
'sahi not found, plaese install sahi. '
'for example: `pip install sahi`, see https://github.com/obss/sahi.'
)
raise e
sub_img_ids = 0
for img_id in img_ids:
img_anno = coco.loadImgs([img_id])[0]
im_fname = img_anno['file_name']
im_w = float(img_anno['width'])
im_h = float(img_anno['height'])
im_path = os.path.join(image_dir,
im_fname) if image_dir else im_fname
is_empty = False
if not os.path.exists(im_path):
logger.warning('Illegal image file: {}, and it will be '
'ignored'.format(im_path))
continue
if im_w < 0 or im_h < 0:
logger.warning('Illegal width: {} or height: {} in annotation, '
'and im_id: {} will be ignored'.format(
im_w, im_h, img_id))
continue
slice_image_result = sahi.slicing.slice_image(
image=im_path,
slice_height=self.sliced_size[0],
slice_width=self.sliced_size[1],
overlap_height_ratio=self.overlap_ratio[0],
overlap_width_ratio=self.overlap_ratio[1])
sub_img_num = len(slice_image_result)
for _ind in range(sub_img_num):
im = slice_image_result.images[_ind]
coco_rec = {
'image': im,
'im_id': np.array([sub_img_ids + _ind]),
'h': im.shape[0],
'w': im.shape[1],
'ori_im_id': np.array([img_id]),
'st_pix': np.array(
slice_image_result.starting_pixels[_ind],
dtype=np.float32),
'is_last': 1 if _ind == sub_img_num - 1 else 0,
} if 'image' in self.data_fields else {}
records.append(coco_rec)
ct_sub += sub_img_num
ct += 1
if self.sample_num > 0 and ct >= self.sample_num:
break
assert ct > 0, 'not found any coco record in %s' % (anno_path)
logger.info('{} samples and slice to {} sub_samples in file {}'.format(
ct, ct_sub, anno_path))
if self.allow_empty and len(empty_records) > 0:
empty_records = self._sample_empty(empty_records, len(records))
records += empty_records
self.roidbs = records
@register
@serializable
class SemiCOCODataSet(COCODataSet):
"""Semi-COCODataSet used for supervised and unsupervised dataSet"""
def __init__(self,
dataset_dir=None,
image_dir=None,
anno_path=None,
data_fields=['image'],
sample_num=-1,
load_crowd=False,
allow_empty=False,
empty_ratio=1.,
repeat=1,
supervised=True):
super(SemiCOCODataSet, self).__init__(
dataset_dir, image_dir, anno_path, data_fields, sample_num,
load_crowd, allow_empty, empty_ratio, repeat)
self.supervised = supervised
self.length = -1 # defalut -1 means all
def parse_dataset(self):
anno_path = os.path.join(self.dataset_dir, self.anno_path)
image_dir = os.path.join(self.dataset_dir, self.image_dir)
assert anno_path.endswith('.json'), \
'invalid coco annotation file: ' + anno_path
from pycocotools.coco import COCO
coco = COCO(anno_path)
img_ids = coco.getImgIds()
img_ids.sort()
cat_ids = coco.getCatIds()
records = []
empty_records = []
ct = 0
self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
self.cname2cid = dict({
coco.loadCats(catid)[0]['name']: clsid
for catid, clsid in self.catid2clsid.items()
})
if 'annotations' not in coco.dataset or self.supervised == False:
self.load_image_only = True
logger.warning('Annotation file: {} does not contains ground truth '
'and load image information only.'.format(anno_path))
for img_id in img_ids:
img_anno = coco.loadImgs([img_id])[0]
im_fname = img_anno['file_name']
im_w = float(img_anno['width'])
im_h = float(img_anno['height'])
im_path = os.path.join(image_dir,
im_fname) if image_dir else im_fname
is_empty = False
if not os.path.exists(im_path):
logger.warning('Illegal image file: {}, and it will be '
'ignored'.format(im_path))
continue
if im_w < 0 or im_h < 0:
logger.warning('Illegal width: {} or height: {} in annotation, '
'and im_id: {} will be ignored'.format(
im_w, im_h, img_id))
continue
coco_rec = {
'im_file': im_path,
'im_id': np.array([img_id]),
'h': im_h,
'w': im_w,
} if 'image' in self.data_fields else {}
if not self.load_image_only:
ins_anno_ids = coco.getAnnIds(
imgIds=[img_id], iscrowd=None if self.load_crowd else False)
instances = coco.loadAnns(ins_anno_ids)
bboxes = []
is_rbox_anno = False
for inst in instances:
# check gt bbox
if inst.get('ignore', False):
continue
if 'bbox' not in inst.keys():
continue
else:
if not any(np.array(inst['bbox'])):
continue
x1, y1, box_w, box_h = inst['bbox']
x2 = x1 + box_w
y2 = y1 + box_h
eps = 1e-5
if inst['area'] > 0 and x2 - x1 > eps and y2 - y1 > eps:
inst['clean_bbox'] = [
round(float(x), 3) for x in [x1, y1, x2, y2]
]
bboxes.append(inst)
else:
logger.warning(
'Found an invalid bbox in annotations: im_id: {}, '
'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format(
img_id, float(inst['area']), x1, y1, x2, y2))
num_bbox = len(bboxes)
if num_bbox <= 0 and not self.allow_empty:
continue
elif num_bbox <= 0:
is_empty = True
gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
gt_poly = [None] * num_bbox
has_segmentation = False
for i, box in enumerate(bboxes):
catid = box['category_id']
gt_class[i][0] = self.catid2clsid[catid]
gt_bbox[i, :] = box['clean_bbox']
is_crowd[i][0] = box['iscrowd']
# check RLE format
if 'segmentation' in box and box['iscrowd'] == 1:
gt_poly[i] = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
elif 'segmentation' in box and box['segmentation']:
if not np.array(box['segmentation']
).size > 0 and not self.allow_empty:
bboxes.pop(i)
gt_poly.pop(i)
np.delete(is_crowd, i)
np.delete(gt_class, i)
np.delete(gt_bbox, i)
else:
gt_poly[i] = box['segmentation']
has_segmentation = True
if has_segmentation and not any(
gt_poly) and not self.allow_empty:
continue
gt_rec = {
'is_crowd': is_crowd,
'gt_class': gt_class,
'gt_bbox': gt_bbox,
'gt_poly': gt_poly,
}
for k, v in gt_rec.items():
if k in self.data_fields:
coco_rec[k] = v
# TODO: remove load_semantic
if self.load_semantic and 'semantic' in self.data_fields:
seg_path = os.path.join(self.dataset_dir, 'stuffthingmaps',
'train2017', im_fname[:-3] + 'png')
coco_rec.update({'semantic': seg_path})
logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format(
im_path, img_id, im_h, im_w))
if is_empty:
empty_records.append(coco_rec)
else:
records.append(coco_rec)
ct += 1
if self.sample_num > 0 and ct >= self.sample_num:
break
assert ct > 0, 'not found any coco record in %s' % (anno_path)
logger.info('Load [{} samples valid, {} samples invalid] in file {}.'.
format(ct, len(img_ids) - ct, anno_path))
if self.allow_empty and len(empty_records) > 0:
empty_records = self._sample_empty(empty_records, len(records))
records += empty_records
self.roidbs = records
if self.supervised:
logger.info(f'Use {len(self.roidbs)} sup_samples data as LABELED')
else:
if self.length > 0: # unsup length will be decide by sup length
all_roidbs = self.roidbs.copy()
selected_idxs = [
np.random.choice(len(all_roidbs))
for _ in range(self.length)
]
self.roidbs = [all_roidbs[i] for i in selected_idxs]
logger.info(
f'Use {len(self.roidbs)} unsup_samples data as UNLABELED')
def __getitem__(self, idx):
n = len(self.roidbs)
if self.repeat > 1:
idx %= n
# data batch
roidb = copy.deepcopy(self.roidbs[idx])
if self.mixup_epoch == 0 or self._epoch < self.mixup_epoch:
idx = np.random.randint(n)
roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
elif self.cutmix_epoch == 0 or self._epoch < self.cutmix_epoch:
idx = np.random.randint(n)
roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
elif self.mosaic_epoch == 0 or self._epoch < self.mosaic_epoch:
roidb = [roidb, ] + [
copy.deepcopy(self.roidbs[np.random.randint(n)])
for _ in range(4)
]
if isinstance(roidb, Sequence):
for r in roidb:
r['curr_iter'] = self._curr_iter
else:
roidb['curr_iter'] = self._curr_iter
self._curr_iter += 1
return self.transform(roidb)
# for PaddleX
@register
@serializable
class COCODetDataset(COCODataSet):
pass

View File

@@ -0,0 +1,206 @@
from ppdet.core.workspace import register, serializable
import cv2
import os
import tarfile
import numpy as np
import os.path as osp
from ppdet.data.source.dataset import DetDataset
from imgaug.augmentables.lines import LineStringsOnImage
from imgaug.augmentables.segmaps import SegmentationMapsOnImage
from ppdet.data.culane_utils import lane_to_linestrings
import pickle as pkl
from ppdet.utils.logger import setup_logger
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
from .dataset import DetDataset, _make_dataset, _is_valid_file
from ppdet.utils.download import download_dataset
logger = setup_logger(__name__)
@register
@serializable
class CULaneDataSet(DetDataset):
def __init__(
self,
dataset_dir,
cut_height,
list_path,
split='train',
data_fields=['image'],
video_file=None,
frame_rate=-1, ):
super(CULaneDataSet, self).__init__(
dataset_dir=dataset_dir,
cut_height=cut_height,
split=split,
data_fields=data_fields)
self.dataset_dir = dataset_dir
self.list_path = osp.join(dataset_dir, list_path)
self.cut_height = cut_height
self.data_fields = data_fields
self.split = split
self.training = 'train' in split
self.data_infos = []
self.video_file = video_file
self.frame_rate = frame_rate
self._imid2path = {}
self.predict_dir = None
def __len__(self):
return len(self.data_infos)
def check_or_download_dataset(self):
if not osp.exists(self.dataset_dir):
download_dataset("dataset", dataset="culane")
# extract .tar files in self.dataset_dir
for fname in os.listdir(self.dataset_dir):
logger.info("Decompressing {}...".format(fname))
# ignore .* files
if fname.startswith('.'):
continue
if fname.find('.tar.gz') >= 0:
with tarfile.open(osp.join(self.dataset_dir, fname)) as tf:
tf.extractall(path=self.dataset_dir)
logger.info("Dataset files are ready.")
def parse_dataset(self):
logger.info('Loading CULane annotations...')
if self.predict_dir is not None:
logger.info('switch to predict mode')
return
# Waiting for the dataset to load is tedious, let's cache it
os.makedirs('cache', exist_ok=True)
cache_path = 'cache/culane_paddle_{}.pkl'.format(self.split)
if os.path.exists(cache_path):
with open(cache_path, 'rb') as cache_file:
self.data_infos = pkl.load(cache_file)
self.max_lanes = max(
len(anno['lanes']) for anno in self.data_infos)
return
with open(self.list_path) as list_file:
for line in list_file:
infos = self.load_annotation(line.split())
self.data_infos.append(infos)
# cache data infos to file
with open(cache_path, 'wb') as cache_file:
pkl.dump(self.data_infos, cache_file)
def load_annotation(self, line):
infos = {}
img_line = line[0]
img_line = img_line[1 if img_line[0] == '/' else 0::]
img_path = os.path.join(self.dataset_dir, img_line)
infos['img_name'] = img_line
infos['img_path'] = img_path
if len(line) > 1:
mask_line = line[1]
mask_line = mask_line[1 if mask_line[0] == '/' else 0::]
mask_path = os.path.join(self.dataset_dir, mask_line)
infos['mask_path'] = mask_path
if len(line) > 2:
exist_list = [int(l) for l in line[2:]]
infos['lane_exist'] = np.array(exist_list)
anno_path = img_path[:
-3] + 'lines.txt' # remove sufix jpg and add lines.txt
with open(anno_path, 'r') as anno_file:
data = [
list(map(float, line.split())) for line in anno_file.readlines()
]
lanes = [[(lane[i], lane[i + 1]) for i in range(0, len(lane), 2)
if lane[i] >= 0 and lane[i + 1] >= 0] for lane in data]
lanes = [list(set(lane)) for lane in lanes] # remove duplicated points
lanes = [lane for lane in lanes
if len(lane) > 2] # remove lanes with less than 2 points
lanes = [sorted(
lane, key=lambda x: x[1]) for lane in lanes] # sort by y
infos['lanes'] = lanes
return infos
def set_images(self, images):
self.predict_dir = images
self.data_infos = self._load_images()
def _find_images(self):
predict_dir = self.predict_dir
if not isinstance(predict_dir, Sequence):
predict_dir = [predict_dir]
images = []
for im_dir in predict_dir:
if os.path.isdir(im_dir):
im_dir = os.path.join(self.predict_dir, im_dir)
images.extend(_make_dataset(im_dir))
elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
images.append(im_dir)
return images
def _load_images(self):
images = self._find_images()
ct = 0
records = []
for image in images:
assert image != '' and os.path.isfile(image), \
"Image {} not found".format(image)
if self.sample_num > 0 and ct >= self.sample_num:
break
rec = {
'im_id': np.array([ct]),
"img_path": os.path.abspath(image),
"img_name": os.path.basename(image),
"lanes": []
}
self._imid2path[ct] = image
ct += 1
records.append(rec)
assert len(records) > 0, "No image file found"
return records
def get_imid2path(self):
return self._imid2path
def __getitem__(self, idx):
data_info = self.data_infos[idx]
img = cv2.imread(data_info['img_path'])
img = img[self.cut_height:, :, :]
sample = data_info.copy()
sample.update({'image': img})
img_org = sample['image']
if self.training:
label = cv2.imread(sample['mask_path'], cv2.IMREAD_UNCHANGED)
if len(label.shape) > 2:
label = label[:, :, 0]
label = label.squeeze()
label = label[self.cut_height:, :]
sample.update({'mask': label})
if self.cut_height != 0:
new_lanes = []
for i in sample['lanes']:
lanes = []
for p in i:
lanes.append((p[0], p[1] - self.cut_height))
new_lanes.append(lanes)
sample.update({'lanes': new_lanes})
sample['mask'] = SegmentationMapsOnImage(
sample['mask'], shape=img_org.shape)
sample['full_img_path'] = data_info['img_path']
sample['img_name'] = data_info['img_name']
sample['im_id'] = np.array([idx])
sample['image'] = sample['image'].copy().astype(np.uint8)
sample['lanes'] = lane_to_linestrings(sample['lanes'])
sample['lanes'] = LineStringsOnImage(
sample['lanes'], shape=img_org.shape)
sample['seg'] = np.zeros(img_org.shape)
return sample

View File

@@ -0,0 +1,307 @@
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import copy
import numpy as np
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
from paddle.io import Dataset
from ppdet.core.workspace import register, serializable
from ppdet.utils.download import get_dataset_path
from ppdet.data import source
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
@serializable
class DetDataset(Dataset):
"""
Load detection dataset.
Args:
dataset_dir (str): root directory for dataset.
image_dir (str): directory for images.
anno_path (str): annotation file path.
data_fields (list): key name of data dictionary, at least have 'image'.
sample_num (int): number of samples to load, -1 means all.
use_default_label (bool): whether to load default label list.
repeat (int): repeat times for dataset, use in benchmark.
"""
def __init__(self,
dataset_dir=None,
image_dir=None,
anno_path=None,
data_fields=['image'],
sample_num=-1,
use_default_label=None,
repeat=1,
**kwargs):
super(DetDataset, self).__init__()
self.dataset_dir = dataset_dir if dataset_dir is not None else ''
self.anno_path = anno_path
self.image_dir = image_dir if image_dir is not None else ''
self.data_fields = data_fields
self.sample_num = sample_num
self.use_default_label = use_default_label
self.repeat = repeat
self._epoch = 0
self._curr_iter = 0
def __len__(self, ):
return len(self.roidbs) * self.repeat
def __call__(self, *args, **kwargs):
return self
def __getitem__(self, idx):
n = len(self.roidbs)
if self.repeat > 1:
idx %= n
# data batch
roidb = copy.deepcopy(self.roidbs[idx])
if self.mixup_epoch == 0 or self._epoch < self.mixup_epoch:
idx = np.random.randint(n)
roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
elif self.cutmix_epoch == 0 or self._epoch < self.cutmix_epoch:
idx = np.random.randint(n)
roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
elif self.mosaic_epoch == 0 or self._epoch < self.mosaic_epoch:
roidb = [roidb, ] + [
copy.deepcopy(self.roidbs[np.random.randint(n)])
for _ in range(4)
]
elif self.pre_img_epoch == 0 or self._epoch < self.pre_img_epoch:
# Add previous image as input, only used in CenterTrack
idx_pre_img = idx - 1
if idx_pre_img < 0:
idx_pre_img = idx + 1
roidb = [roidb, ] + [copy.deepcopy(self.roidbs[idx_pre_img])]
if isinstance(roidb, Sequence):
for r in roidb:
r['curr_iter'] = self._curr_iter
else:
roidb['curr_iter'] = self._curr_iter
self._curr_iter += 1
return self.transform(roidb)
def check_or_download_dataset(self):
self.dataset_dir = get_dataset_path(self.dataset_dir, self.anno_path,
self.image_dir)
def set_kwargs(self, **kwargs):
self.mixup_epoch = kwargs.get('mixup_epoch', -1)
self.cutmix_epoch = kwargs.get('cutmix_epoch', -1)
self.mosaic_epoch = kwargs.get('mosaic_epoch', -1)
self.pre_img_epoch = kwargs.get('pre_img_epoch', -1)
def set_transform(self, transform):
self.transform = transform
def set_epoch(self, epoch_id):
self._epoch = epoch_id
def parse_dataset(self, ):
raise NotImplementedError(
"Need to implement parse_dataset method of Dataset")
def get_anno(self):
if self.anno_path is None:
return
return os.path.join(self.dataset_dir, self.anno_path)
def _is_valid_file(f, extensions=('.jpg', '.jpeg', '.png', '.bmp')):
return f.lower().endswith(extensions)
def _make_dataset(dir):
dir = os.path.expanduser(dir)
if not os.path.isdir(dir):
raise ('{} should be a dir'.format(dir))
images = []
for root, _, fnames in sorted(os.walk(dir, followlinks=True)):
for fname in sorted(fnames):
path = os.path.join(root, fname)
if _is_valid_file(path):
images.append(path)
return images
@register
@serializable
class ImageFolder(DetDataset):
def __init__(self,
dataset_dir=None,
image_dir=None,
anno_path=None,
sample_num=-1,
use_default_label=None,
**kwargs):
super(ImageFolder, self).__init__(
dataset_dir,
image_dir,
anno_path,
sample_num=sample_num,
use_default_label=use_default_label)
self._imid2path = {}
self.roidbs = None
self.sample_num = sample_num
def check_or_download_dataset(self):
return
def get_anno(self):
if self.anno_path is None:
return
if self.dataset_dir:
return os.path.join(self.dataset_dir, self.anno_path)
else:
return self.anno_path
def parse_dataset(self, ):
if not self.roidbs:
self.roidbs = self._load_images()
def _parse(self):
image_dir = self.image_dir
if not isinstance(image_dir, Sequence):
image_dir = [image_dir]
images = []
for im_dir in image_dir:
if os.path.isdir(im_dir):
im_dir = os.path.join(self.dataset_dir, im_dir)
images.extend(_make_dataset(im_dir))
elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
images.append(im_dir)
return images
def _load_images(self):
images = self._parse()
ct = 0
records = []
for image in images:
assert image != '' and os.path.isfile(image), \
"Image {} not found".format(image)
if self.sample_num > 0 and ct >= self.sample_num:
break
rec = {'im_id': np.array([ct]), 'im_file': image}
self._imid2path[ct] = image
ct += 1
records.append(rec)
assert len(records) > 0, "No image file found"
return records
def get_imid2path(self):
return self._imid2path
def set_images(self, images):
self.image_dir = images
self.roidbs = self._load_images()
def set_slice_images(self,
images,
slice_size=[640, 640],
overlap_ratio=[0.25, 0.25]):
self.image_dir = images
ori_records = self._load_images()
try:
import sahi
from sahi.slicing import slice_image
except Exception as e:
logger.error(
'sahi not found, plaese install sahi. '
'for example: `pip install sahi`, see https://github.com/obss/sahi.'
)
raise e
sub_img_ids = 0
ct = 0
ct_sub = 0
records = []
for i, ori_rec in enumerate(ori_records):
im_path = ori_rec['im_file']
slice_image_result = sahi.slicing.slice_image(
image=im_path,
slice_height=slice_size[0],
slice_width=slice_size[1],
overlap_height_ratio=overlap_ratio[0],
overlap_width_ratio=overlap_ratio[1])
sub_img_num = len(slice_image_result)
for _ind in range(sub_img_num):
im = slice_image_result.images[_ind]
rec = {
'image': im,
'im_id': np.array([sub_img_ids + _ind]),
'h': im.shape[0],
'w': im.shape[1],
'ori_im_id': np.array([ori_rec['im_id'][0]]),
'st_pix': np.array(
slice_image_result.starting_pixels[_ind],
dtype=np.float32),
'is_last': 1 if _ind == sub_img_num - 1 else 0,
} if 'image' in self.data_fields else {}
records.append(rec)
ct_sub += sub_img_num
ct += 1
logger.info('{} samples and slice to {} sub_samples.'.format(ct,
ct_sub))
self.roidbs = records
def get_label_list(self):
# Only VOC dataset needs label list in ImageFold
return self.anno_path
@register
class CommonDataset(object):
def __init__(self, **dataset_args):
super(CommonDataset, self).__init__()
dataset_args = copy.deepcopy(dataset_args)
type = dataset_args.pop("name")
self.dataset = getattr(source, type)(**dataset_args)
def __call__(self):
return self.dataset
@register
class TrainDataset(CommonDataset):
pass
@register
class EvalMOTDataset(CommonDataset):
pass
@register
class TestMOTDataset(CommonDataset):
pass
@register
class EvalDataset(CommonDataset):
pass
@register
class TestDataset(CommonDataset):
pass

View File

@@ -0,0 +1,845 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
this code is base on https://github.com/open-mmlab/mmpose
"""
import os
import cv2
import numpy as np
import json
import copy
import pycocotools
from pycocotools.coco import COCO
from .dataset import DetDataset
from ppdet.core.workspace import register, serializable
@serializable
class KeypointBottomUpBaseDataset(DetDataset):
"""Base class for bottom-up datasets.
All datasets should subclass it.
All subclasses should overwrite:
Methods:`_get_imganno`
Args:
dataset_dir (str): Root path to the dataset.
anno_path (str): Relative path to the annotation file.
image_dir (str): Path to a directory where images are held.
Default: None.
num_joints (int): keypoint numbers
transform (composed(operators)): A sequence of data transforms.
shard (list): [rank, worldsize], the distributed env params
test_mode (bool): Store True when building test or
validation dataset. Default: False.
"""
def __init__(self,
dataset_dir,
image_dir,
anno_path,
num_joints,
transform=[],
shard=[0, 1],
test_mode=False):
super().__init__(dataset_dir, image_dir, anno_path)
self.image_info = {}
self.ann_info = {}
self.img_prefix = os.path.join(dataset_dir, image_dir)
self.transform = transform
self.test_mode = test_mode
self.ann_info['num_joints'] = num_joints
self.img_ids = []
def parse_dataset(self):
pass
def __len__(self):
"""Get dataset length."""
return len(self.img_ids)
def _get_imganno(self, idx):
"""Get anno for a single image."""
raise NotImplementedError
def __getitem__(self, idx):
"""Prepare image for training given the index."""
records = copy.deepcopy(self._get_imganno(idx))
records['image'] = cv2.imread(records['image_file'])
records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB)
if 'mask' in records:
records['mask'] = (records['mask'] + 0).astype('uint8')
records = self.transform(records)
return records
def parse_dataset(self):
return
@register
@serializable
class KeypointBottomUpCocoDataset(KeypointBottomUpBaseDataset):
"""COCO dataset for bottom-up pose estimation.
The dataset loads raw features and apply specified transforms
to return a dict containing the image tensors and other information.
COCO keypoint indexes::
0: 'nose',
1: 'left_eye',
2: 'right_eye',
3: 'left_ear',
4: 'right_ear',
5: 'left_shoulder',
6: 'right_shoulder',
7: 'left_elbow',
8: 'right_elbow',
9: 'left_wrist',
10: 'right_wrist',
11: 'left_hip',
12: 'right_hip',
13: 'left_knee',
14: 'right_knee',
15: 'left_ankle',
16: 'right_ankle'
Args:
dataset_dir (str): Root path to the dataset.
anno_path (str): Relative path to the annotation file.
image_dir (str): Path to a directory where images are held.
Default: None.
num_joints (int): keypoint numbers
transform (composed(operators)): A sequence of data transforms.
shard (list): [rank, worldsize], the distributed env params
test_mode (bool): Store True when building test or
validation dataset. Default: False.
"""
def __init__(self,
dataset_dir,
image_dir,
anno_path,
num_joints,
transform=[],
shard=[0, 1],
test_mode=False,
return_mask=True,
return_bbox=True,
return_area=True,
return_class=True):
super().__init__(dataset_dir, image_dir, anno_path, num_joints,
transform, shard, test_mode)
self.ann_file = os.path.join(dataset_dir, anno_path)
self.shard = shard
self.test_mode = test_mode
self.return_mask = return_mask
self.return_bbox = return_bbox
self.return_area = return_area
self.return_class = return_class
def parse_dataset(self):
self.coco = COCO(self.ann_file)
self.img_ids = self.coco.getImgIds()
if not self.test_mode:
self.img_ids_tmp = []
for img_id in self.img_ids:
ann_ids = self.coco.getAnnIds(imgIds=img_id)
anno = self.coco.loadAnns(ann_ids)
anno = [obj for obj in anno if obj['iscrowd'] == 0]
if len(anno) == 0:
continue
self.img_ids_tmp.append(img_id)
self.img_ids = self.img_ids_tmp
blocknum = int(len(self.img_ids) / self.shard[1])
self.img_ids = self.img_ids[(blocknum * self.shard[0]):(blocknum * (
self.shard[0] + 1))]
self.num_images = len(self.img_ids)
self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)
self.dataset_name = 'coco'
cat_ids = self.coco.getCatIds()
self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
print('=> num_images: {}'.format(self.num_images))
@staticmethod
def _get_mapping_id_name(imgs):
"""
Args:
imgs (dict): dict of image info.
Returns:
tuple: Image name & id mapping dicts.
- id2name (dict): Mapping image id to name.
- name2id (dict): Mapping image name to id.
"""
id2name = {}
name2id = {}
for image_id, image in imgs.items():
file_name = image['file_name']
id2name[image_id] = file_name
name2id[file_name] = image_id
return id2name, name2id
def _get_imganno(self, idx):
"""Get anno for a single image.
Args:
idx (int): image idx
Returns:
dict: info for model training
"""
coco = self.coco
img_id = self.img_ids[idx]
ann_ids = coco.getAnnIds(imgIds=img_id)
anno = coco.loadAnns(ann_ids)
anno = [
obj for obj in anno
if obj['iscrowd'] == 0 and obj['num_keypoints'] > 0
]
db_rec = {}
joints, orgsize = self._get_joints(anno, idx)
db_rec['gt_joints'] = joints
db_rec['im_shape'] = orgsize
if self.return_bbox:
db_rec['gt_bbox'] = self._get_bboxs(anno, idx)
if self.return_class:
db_rec['gt_class'] = self._get_labels(anno, idx)
if self.return_area:
db_rec['gt_areas'] = self._get_areas(anno, idx)
if self.return_mask:
db_rec['mask'] = self._get_mask(anno, idx)
db_rec['im_id'] = img_id
db_rec['image_file'] = os.path.join(self.img_prefix,
self.id2name[img_id])
return db_rec
def _get_joints(self, anno, idx):
"""Get joints for all people in an image."""
num_people = len(anno)
joints = np.zeros(
(num_people, self.ann_info['num_joints'], 3), dtype=np.float32)
for i, obj in enumerate(anno):
joints[i, :self.ann_info['num_joints'], :3] = \
np.array(obj['keypoints']).reshape([-1, 3])
img_info = self.coco.loadImgs(self.img_ids[idx])[0]
orgsize = np.array([img_info['height'], img_info['width'], 1])
return joints, orgsize
def _get_bboxs(self, anno, idx):
num_people = len(anno)
gt_bboxes = np.zeros((num_people, 4), dtype=np.float32)
for idx, obj in enumerate(anno):
if 'bbox' in obj:
gt_bboxes[idx, :] = obj['bbox']
gt_bboxes[:, 2] += gt_bboxes[:, 0]
gt_bboxes[:, 3] += gt_bboxes[:, 1]
return gt_bboxes
def _get_labels(self, anno, idx):
num_people = len(anno)
gt_labels = np.zeros((num_people, 1), dtype=np.float32)
for idx, obj in enumerate(anno):
if 'category_id' in obj:
catid = obj['category_id']
gt_labels[idx, 0] = self.catid2clsid[catid]
return gt_labels
def _get_areas(self, anno, idx):
num_people = len(anno)
gt_areas = np.zeros((num_people, ), dtype=np.float32)
for idx, obj in enumerate(anno):
if 'area' in obj:
gt_areas[idx, ] = obj['area']
return gt_areas
def _get_mask(self, anno, idx):
"""Get ignore masks to mask out losses."""
coco = self.coco
img_info = coco.loadImgs(self.img_ids[idx])[0]
m = np.zeros((img_info['height'], img_info['width']), dtype=np.float32)
for obj in anno:
if 'segmentation' in obj:
if obj['iscrowd']:
rle = pycocotools.mask.frPyObjects(obj['segmentation'],
img_info['height'],
img_info['width'])
m += pycocotools.mask.decode(rle)
elif obj['num_keypoints'] == 0:
rles = pycocotools.mask.frPyObjects(obj['segmentation'],
img_info['height'],
img_info['width'])
for rle in rles:
m += pycocotools.mask.decode(rle)
return m < 0.5
@register
@serializable
class KeypointBottomUpCrowdPoseDataset(KeypointBottomUpCocoDataset):
"""CrowdPose dataset for bottom-up pose estimation.
The dataset loads raw features and apply specified transforms
to return a dict containing the image tensors and other information.
CrowdPose keypoint indexes::
0: 'left_shoulder',
1: 'right_shoulder',
2: 'left_elbow',
3: 'right_elbow',
4: 'left_wrist',
5: 'right_wrist',
6: 'left_hip',
7: 'right_hip',
8: 'left_knee',
9: 'right_knee',
10: 'left_ankle',
11: 'right_ankle',
12: 'top_head',
13: 'neck'
Args:
dataset_dir (str): Root path to the dataset.
anno_path (str): Relative path to the annotation file.
image_dir (str): Path to a directory where images are held.
Default: None.
num_joints (int): keypoint numbers
transform (composed(operators)): A sequence of data transforms.
shard (list): [rank, worldsize], the distributed env params
test_mode (bool): Store True when building test or
validation dataset. Default: False.
"""
def __init__(self,
dataset_dir,
image_dir,
anno_path,
num_joints,
transform=[],
shard=[0, 1],
test_mode=False):
super().__init__(dataset_dir, image_dir, anno_path, num_joints,
transform, shard, test_mode)
self.ann_file = os.path.join(dataset_dir, anno_path)
self.shard = shard
self.test_mode = test_mode
def parse_dataset(self):
self.coco = COCO(self.ann_file)
self.img_ids = self.coco.getImgIds()
if not self.test_mode:
self.img_ids = [
img_id for img_id in self.img_ids
if len(self.coco.getAnnIds(
imgIds=img_id, iscrowd=None)) > 0
]
blocknum = int(len(self.img_ids) / self.shard[1])
self.img_ids = self.img_ids[(blocknum * self.shard[0]):(blocknum * (
self.shard[0] + 1))]
self.num_images = len(self.img_ids)
self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)
self.dataset_name = 'crowdpose'
print('=> num_images: {}'.format(self.num_images))
@serializable
class KeypointTopDownBaseDataset(DetDataset):
"""Base class for top_down datasets.
All datasets should subclass it.
All subclasses should overwrite:
Methods:`_get_db`
Args:
dataset_dir (str): Root path to the dataset.
image_dir (str): Path to a directory where images are held.
anno_path (str): Relative path to the annotation file.
num_joints (int): keypoint numbers
transform (composed(operators)): A sequence of data transforms.
"""
def __init__(self,
dataset_dir,
image_dir,
anno_path,
num_joints,
transform=[]):
super().__init__(dataset_dir, image_dir, anno_path)
self.image_info = {}
self.ann_info = {}
self.img_prefix = os.path.join(dataset_dir, image_dir)
self.transform = transform
self.ann_info['num_joints'] = num_joints
self.db = []
def __len__(self):
"""Get dataset length."""
return len(self.db)
def _get_db(self):
"""Get a sample"""
raise NotImplementedError
def __getitem__(self, idx):
"""Prepare sample for training given the index."""
records = copy.deepcopy(self.db[idx])
records['image'] = cv2.imread(records['image_file'], cv2.IMREAD_COLOR |
cv2.IMREAD_IGNORE_ORIENTATION)
records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB)
records['score'] = records['score'] if 'score' in records else 1
records = self.transform(records)
# print('records', records)
return records
@register
@serializable
class KeypointTopDownCocoDataset(KeypointTopDownBaseDataset):
"""COCO dataset for top-down pose estimation.
The dataset loads raw features and apply specified transforms
to return a dict containing the image tensors and other information.
COCO keypoint indexes:
0: 'nose',
1: 'left_eye',
2: 'right_eye',
3: 'left_ear',
4: 'right_ear',
5: 'left_shoulder',
6: 'right_shoulder',
7: 'left_elbow',
8: 'right_elbow',
9: 'left_wrist',
10: 'right_wrist',
11: 'left_hip',
12: 'right_hip',
13: 'left_knee',
14: 'right_knee',
15: 'left_ankle',
16: 'right_ankle'
Args:
dataset_dir (str): Root path to the dataset.
image_dir (str): Path to a directory where images are held.
anno_path (str): Relative path to the annotation file.
num_joints (int): Keypoint numbers
trainsize (list):[w, h] Image target size
transform (composed(operators)): A sequence of data transforms.
bbox_file (str): Path to a detection bbox file
Default: None.
use_gt_bbox (bool): Whether to use ground truth bbox
Default: True.
pixel_std (int): The pixel std of the scale
Default: 200.
image_thre (float): The threshold to filter the detection box
Default: 0.0.
"""
def __init__(self,
dataset_dir,
image_dir,
anno_path,
num_joints,
trainsize,
transform=[],
bbox_file=None,
use_gt_bbox=True,
pixel_std=200,
image_thre=0.0,
center_scale=None):
super().__init__(dataset_dir, image_dir, anno_path, num_joints,
transform)
self.bbox_file = bbox_file
self.use_gt_bbox = use_gt_bbox
self.trainsize = trainsize
self.pixel_std = pixel_std
self.image_thre = image_thre
self.center_scale = center_scale
self.dataset_name = 'coco'
def parse_dataset(self):
if self.use_gt_bbox:
self.db = self._load_coco_keypoint_annotations()
else:
self.db = self._load_coco_person_detection_results()
def _load_coco_keypoint_annotations(self):
coco = COCO(self.get_anno())
img_ids = coco.getImgIds()
gt_db = []
for index in img_ids:
im_ann = coco.loadImgs(index)[0]
width = im_ann['width']
height = im_ann['height']
file_name = im_ann['file_name']
im_id = int(im_ann["id"])
annIds = coco.getAnnIds(imgIds=index, iscrowd=False)
objs = coco.loadAnns(annIds)
valid_objs = []
for obj in objs:
x, y, w, h = obj['bbox']
x1 = np.max((0, x))
y1 = np.max((0, y))
x2 = np.min((width - 1, x1 + np.max((0, w - 1))))
y2 = np.min((height - 1, y1 + np.max((0, h - 1))))
if obj['area'] > 0 and x2 >= x1 and y2 >= y1:
obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
valid_objs.append(obj)
objs = valid_objs
rec = []
for obj in objs:
if max(obj['keypoints']) == 0:
continue
joints = np.zeros(
(self.ann_info['num_joints'], 3), dtype=np.float32)
joints_vis = np.zeros(
(self.ann_info['num_joints'], 3), dtype=np.float32)
for ipt in range(self.ann_info['num_joints']):
joints[ipt, 0] = obj['keypoints'][ipt * 3 + 0]
joints[ipt, 1] = obj['keypoints'][ipt * 3 + 1]
joints[ipt, 2] = 0
t_vis = obj['keypoints'][ipt * 3 + 2]
if t_vis > 1:
t_vis = 1
joints_vis[ipt, 0] = t_vis
joints_vis[ipt, 1] = t_vis
joints_vis[ipt, 2] = 0
center, scale = self._box2cs(obj['clean_bbox'][:4])
rec.append({
'image_file': os.path.join(self.img_prefix, file_name),
'center': center,
'scale': scale,
'gt_joints': joints,
'joints_vis': joints_vis,
'im_id': im_id,
})
gt_db.extend(rec)
return gt_db
def _box2cs(self, box):
x, y, w, h = box[:4]
center = np.zeros((2), dtype=np.float32)
center[0] = x + w * 0.5
center[1] = y + h * 0.5
aspect_ratio = self.trainsize[0] * 1.0 / self.trainsize[1]
if self.center_scale is not None and np.random.rand() < 0.3:
center += self.center_scale * (np.random.rand(2) - 0.5) * [w, h]
if w > aspect_ratio * h:
h = w * 1.0 / aspect_ratio
elif w < aspect_ratio * h:
w = h * aspect_ratio
scale = np.array(
[w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
dtype=np.float32)
if center[0] != -1:
scale = scale * 1.25
return center, scale
def _load_coco_person_detection_results(self):
all_boxes = None
bbox_file_path = os.path.join(self.dataset_dir, self.bbox_file)
with open(bbox_file_path, 'r') as f:
all_boxes = json.load(f)
if not all_boxes:
print('=> Load %s fail!' % bbox_file_path)
return None
kpt_db = []
for n_img in range(0, len(all_boxes)):
det_res = all_boxes[n_img]
if det_res['category_id'] != 1:
continue
file_name = det_res[
'filename'] if 'filename' in det_res else '%012d.jpg' % det_res[
'image_id']
img_name = os.path.join(self.img_prefix, file_name)
box = det_res['bbox']
score = det_res['score']
im_id = int(det_res['image_id'])
if score < self.image_thre:
continue
center, scale = self._box2cs(box)
joints = np.zeros(
(self.ann_info['num_joints'], 3), dtype=np.float32)
joints_vis = np.ones(
(self.ann_info['num_joints'], 3), dtype=np.float32)
kpt_db.append({
'image_file': img_name,
'im_id': im_id,
'center': center,
'scale': scale,
'score': score,
'gt_joints': joints,
'joints_vis': joints_vis,
})
return kpt_db
@register
@serializable
class KeypointTopDownCocoWholeBodyHandDataset(KeypointTopDownBaseDataset):
"""CocoWholeBody dataset for top-down hand pose estimation.
The dataset loads raw features and apply specified transforms
to return a dict containing the image tensors and other information.
COCO-WholeBody Hand keypoint indexes:
0: 'wrist',
1: 'thumb1',
2: 'thumb2',
3: 'thumb3',
4: 'thumb4',
5: 'forefinger1',
6: 'forefinger2',
7: 'forefinger3',
8: 'forefinger4',
9: 'middle_finger1',
10: 'middle_finger2',
11: 'middle_finger3',
12: 'middle_finger4',
13: 'ring_finger1',
14: 'ring_finger2',
15: 'ring_finger3',
16: 'ring_finger4',
17: 'pinky_finger1',
18: 'pinky_finger2',
19: 'pinky_finger3',
20: 'pinky_finger4'
Args:
dataset_dir (str): Root path to the dataset.
image_dir (str): Path to a directory where images are held.
anno_path (str): Relative path to the annotation file.
num_joints (int): Keypoint numbers
trainsize (list):[w, h] Image target size
transform (composed(operators)): A sequence of data transforms.
pixel_std (int): The pixel std of the scale
Default: 200.
"""
def __init__(self,
dataset_dir,
image_dir,
anno_path,
num_joints,
trainsize,
transform=[],
pixel_std=200):
super().__init__(dataset_dir, image_dir, anno_path, num_joints,
transform)
self.trainsize = trainsize
self.pixel_std = pixel_std
self.dataset_name = 'coco_wholebady_hand'
def _box2cs(self, box):
x, y, w, h = box[:4]
center = np.zeros((2), dtype=np.float32)
center[0] = x + w * 0.5
center[1] = y + h * 0.5
aspect_ratio = self.trainsize[0] * 1.0 / self.trainsize[1]
if w > aspect_ratio * h:
h = w * 1.0 / aspect_ratio
elif w < aspect_ratio * h:
w = h * aspect_ratio
scale = np.array(
[w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
dtype=np.float32)
if center[0] != -1:
scale = scale * 1.25
return center, scale
def parse_dataset(self):
gt_db = []
num_joints = self.ann_info['num_joints']
coco = COCO(self.get_anno())
img_ids = list(coco.imgs.keys())
for img_id in img_ids:
im_ann = coco.loadImgs(img_id)[0]
image_file = os.path.join(self.img_prefix, im_ann['file_name'])
im_id = int(im_ann["id"])
ann_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False)
objs = coco.loadAnns(ann_ids)
for obj in objs:
for type in ['left', 'right']:
if (obj[f'{type}hand_valid'] and
max(obj[f'{type}hand_kpts']) > 0):
joints = np.zeros((num_joints, 3), dtype=np.float32)
joints_vis = np.zeros((num_joints, 3), dtype=np.float32)
keypoints = np.array(obj[f'{type}hand_kpts'])
keypoints = keypoints.reshape(-1, 3)
joints[:, :2] = keypoints[:, :2]
joints_vis[:, :2] = np.minimum(1, keypoints[:, 2:3])
center, scale = self._box2cs(obj[f'{type}hand_box'][:4])
gt_db.append({
'image_file': image_file,
'center': center,
'scale': scale,
'gt_joints': joints,
'joints_vis': joints_vis,
'im_id': im_id,
})
self.db = gt_db
@register
@serializable
class KeypointTopDownMPIIDataset(KeypointTopDownBaseDataset):
"""MPII dataset for topdown pose estimation.
The dataset loads raw features and apply specified transforms
to return a dict containing the image tensors and other information.
MPII keypoint indexes::
0: 'right_ankle',
1: 'right_knee',
2: 'right_hip',
3: 'left_hip',
4: 'left_knee',
5: 'left_ankle',
6: 'pelvis',
7: 'thorax',
8: 'upper_neck',
9: 'head_top',
10: 'right_wrist',
11: 'right_elbow',
12: 'right_shoulder',
13: 'left_shoulder',
14: 'left_elbow',
15: 'left_wrist',
Args:
dataset_dir (str): Root path to the dataset.
image_dir (str): Path to a directory where images are held.
anno_path (str): Relative path to the annotation file.
num_joints (int): Keypoint numbers
trainsize (list):[w, h] Image target size
transform (composed(operators)): A sequence of data transforms.
"""
def __init__(self,
dataset_dir,
image_dir,
anno_path,
num_joints,
transform=[]):
super().__init__(dataset_dir, image_dir, anno_path, num_joints,
transform)
self.dataset_name = 'mpii'
def parse_dataset(self):
with open(self.get_anno()) as anno_file:
anno = json.load(anno_file)
gt_db = []
for a in anno:
image_name = a['image']
im_id = a['image_id'] if 'image_id' in a else int(
os.path.splitext(image_name)[0])
c = np.array(a['center'], dtype=np.float32)
s = np.array([a['scale'], a['scale']], dtype=np.float32)
# Adjust center/scale slightly to avoid cropping limbs
if c[0] != -1:
c[1] = c[1] + 15 * s[1]
s = s * 1.25
c = c - 1
joints = np.zeros(
(self.ann_info['num_joints'], 3), dtype=np.float32)
joints_vis = np.zeros(
(self.ann_info['num_joints'], 3), dtype=np.float32)
if 'gt_joints' in a:
joints_ = np.array(a['gt_joints'])
joints_[:, 0:2] = joints_[:, 0:2] - 1
joints_vis_ = np.array(a['joints_vis'])
assert len(joints_) == self.ann_info[
'num_joints'], 'joint num diff: {} vs {}'.format(
len(joints_), self.ann_info['num_joints'])
joints[:, 0:2] = joints_[:, 0:2]
joints_vis[:, 0] = joints_vis_[:]
joints_vis[:, 1] = joints_vis_[:]
gt_db.append({
'image_file': os.path.join(self.img_prefix, image_name),
'im_id': im_id,
'center': c,
'scale': s,
'gt_joints': joints,
'joints_vis': joints_vis
})
print("number length: {}".format(len(gt_db)))
self.db = gt_db

View File

@@ -0,0 +1,638 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import cv2
import glob
import numpy as np
from collections import OrderedDict, defaultdict
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
from .dataset import DetDataset, _make_dataset, _is_valid_file
from ppdet.core.workspace import register, serializable
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
@register
@serializable
class MOTDataSet(DetDataset):
"""
Load dataset with MOT format, only support single class MOT.
Args:
dataset_dir (str): root directory for dataset.
image_lists (str|list): mot data image lists, muiti-source mot dataset.
data_fields (list): key name of data dictionary, at least have 'image'.
sample_num (int): number of samples to load, -1 means all.
repeat (int): repeat times for dataset, use in benchmark.
Notes:
MOT datasets root directory following this:
dataset/mot
|——————image_lists
| |——————caltech.train
| |——————caltech.val
| |——————mot16.train
| |——————mot17.train
| ......
|——————Caltech
|——————MOT17
|——————......
All the MOT datasets have the following structure:
Caltech
|——————images
| └——————00001.jpg
| |—————— ...
| └——————0000N.jpg
└——————labels_with_ids
└——————00001.txt
|—————— ...
└——————0000N.txt
or
MOT17
|——————images
| └——————train
| └——————test
└——————labels_with_ids
└——————train
"""
def __init__(self,
dataset_dir=None,
image_lists=[],
data_fields=['image'],
sample_num=-1,
repeat=1):
super(MOTDataSet, self).__init__(
dataset_dir=dataset_dir,
data_fields=data_fields,
sample_num=sample_num,
repeat=repeat)
self.dataset_dir = dataset_dir
self.image_lists = image_lists
if isinstance(self.image_lists, str):
self.image_lists = [self.image_lists]
self.roidbs = None
self.cname2cid = None
def get_anno(self):
if self.image_lists == []:
return
# only used to get categories and metric
# only check first data, but the label_list of all data should be same.
first_mot_data = self.image_lists[0].split('.')[0]
anno_file = os.path.join(self.dataset_dir, first_mot_data,
'label_list.txt')
return anno_file
def parse_dataset(self):
self.img_files = OrderedDict()
self.img_start_index = OrderedDict()
self.label_files = OrderedDict()
self.tid_num = OrderedDict()
self.tid_start_index = OrderedDict()
img_index = 0
for data_name in self.image_lists:
# check every data image list
image_lists_dir = os.path.join(self.dataset_dir, 'image_lists')
assert os.path.isdir(image_lists_dir), \
"The {} is not a directory.".format(image_lists_dir)
list_path = os.path.join(image_lists_dir, data_name)
assert os.path.exists(list_path), \
"The list path {} does not exist.".format(list_path)
# record img_files, filter out empty ones
with open(list_path, 'r') as file:
self.img_files[data_name] = file.readlines()
self.img_files[data_name] = [
os.path.join(self.dataset_dir, x.strip())
for x in self.img_files[data_name]
]
self.img_files[data_name] = list(
filter(lambda x: len(x) > 0, self.img_files[data_name]))
self.img_start_index[data_name] = img_index
img_index += len(self.img_files[data_name])
# record label_files
self.label_files[data_name] = [
x.replace('images', 'labels_with_ids').replace(
'.png', '.txt').replace('.jpg', '.txt')
for x in self.img_files[data_name]
]
for data_name, label_paths in self.label_files.items():
max_index = -1
for lp in label_paths:
lb = np.loadtxt(lp)
if len(lb) < 1:
continue
if len(lb.shape) < 2:
img_max = lb[1]
else:
img_max = np.max(lb[:, 1])
if img_max > max_index:
max_index = img_max
self.tid_num[data_name] = int(max_index + 1)
last_index = 0
for i, (k, v) in enumerate(self.tid_num.items()):
self.tid_start_index[k] = last_index
last_index += v
self.num_identities_dict = defaultdict(int)
self.num_identities_dict[0] = int(last_index + 1) # single class
self.num_imgs_each_data = [len(x) for x in self.img_files.values()]
self.total_imgs = sum(self.num_imgs_each_data)
logger.info('MOT dataset summary: ')
logger.info(self.tid_num)
logger.info('Total images: {}'.format(self.total_imgs))
logger.info('Image start index: {}'.format(self.img_start_index))
logger.info('Total identities: {}'.format(self.num_identities_dict[0]))
logger.info('Identity start index: {}'.format(self.tid_start_index))
records = []
cname2cid = mot_label()
for img_index in range(self.total_imgs):
for i, (k, v) in enumerate(self.img_start_index.items()):
if img_index >= v:
data_name = list(self.label_files.keys())[i]
start_index = v
img_file = self.img_files[data_name][img_index - start_index]
lbl_file = self.label_files[data_name][img_index - start_index]
if not os.path.exists(img_file):
logger.warning('Illegal image file: {}, and it will be ignored'.
format(img_file))
continue
if not os.path.isfile(lbl_file):
logger.warning('Illegal label file: {}, and it will be ignored'.
format(lbl_file))
continue
labels = np.loadtxt(lbl_file, dtype=np.float32).reshape(-1, 6)
# each row in labels (N, 6) is [gt_class, gt_identity, cx, cy, w, h]
cx, cy = labels[:, 2], labels[:, 3]
w, h = labels[:, 4], labels[:, 5]
gt_bbox = np.stack((cx, cy, w, h)).T.astype('float32')
gt_class = labels[:, 0:1].astype('int32')
gt_score = np.ones((len(labels), 1)).astype('float32')
gt_ide = labels[:, 1:2].astype('int32')
for i, _ in enumerate(gt_ide):
if gt_ide[i] > -1:
gt_ide[i] += self.tid_start_index[data_name]
mot_rec = {
'im_file': img_file,
'im_id': img_index,
} if 'image' in self.data_fields else {}
gt_rec = {
'gt_class': gt_class,
'gt_score': gt_score,
'gt_bbox': gt_bbox,
'gt_ide': gt_ide,
}
for k, v in gt_rec.items():
if k in self.data_fields:
mot_rec[k] = v
records.append(mot_rec)
if self.sample_num > 0 and img_index >= self.sample_num:
break
assert len(records) > 0, 'not found any mot record in %s' % (
self.image_lists)
self.roidbs, self.cname2cid = records, cname2cid
@register
@serializable
class MCMOTDataSet(DetDataset):
"""
Load dataset with MOT format, support multi-class MOT.
Args:
dataset_dir (str): root directory for dataset.
image_lists (list(str)): mcmot data image lists, muiti-source mcmot dataset.
data_fields (list): key name of data dictionary, at least have 'image'.
label_list (str): if use_default_label is False, will load
mapping between category and class index.
sample_num (int): number of samples to load, -1 means all.
Notes:
MCMOT datasets root directory following this:
dataset/mot
|——————image_lists
| |——————visdrone_mcmot.train
| |——————visdrone_mcmot.val
visdrone_mcmot
|——————images
| └——————train
| └——————val
└——————labels_with_ids
└——————train
"""
def __init__(self,
dataset_dir=None,
image_lists=[],
data_fields=['image'],
label_list=None,
sample_num=-1):
super(MCMOTDataSet, self).__init__(
dataset_dir=dataset_dir,
data_fields=data_fields,
sample_num=sample_num)
self.dataset_dir = dataset_dir
self.image_lists = image_lists
if isinstance(self.image_lists, str):
self.image_lists = [self.image_lists]
self.label_list = label_list
self.roidbs = None
self.cname2cid = None
def get_anno(self):
if self.image_lists == []:
return
# only used to get categories and metric
# only check first data, but the label_list of all data should be same.
first_mot_data = self.image_lists[0].split('.')[0]
anno_file = os.path.join(self.dataset_dir, first_mot_data,
'label_list.txt')
return anno_file
def parse_dataset(self):
self.img_files = OrderedDict()
self.img_start_index = OrderedDict()
self.label_files = OrderedDict()
self.tid_num = OrderedDict()
self.tid_start_idx_of_cls_ids = defaultdict(dict) # for MCMOT
img_index = 0
for data_name in self.image_lists:
# check every data image list
image_lists_dir = os.path.join(self.dataset_dir, 'image_lists')
assert os.path.isdir(image_lists_dir), \
"The {} is not a directory.".format(image_lists_dir)
list_path = os.path.join(image_lists_dir, data_name)
assert os.path.exists(list_path), \
"The list path {} does not exist.".format(list_path)
# record img_files, filter out empty ones
with open(list_path, 'r') as file:
self.img_files[data_name] = file.readlines()
self.img_files[data_name] = [
os.path.join(self.dataset_dir, x.strip())
for x in self.img_files[data_name]
]
self.img_files[data_name] = list(
filter(lambda x: len(x) > 0, self.img_files[data_name]))
self.img_start_index[data_name] = img_index
img_index += len(self.img_files[data_name])
# record label_files
self.label_files[data_name] = [
x.replace('images', 'labels_with_ids').replace(
'.png', '.txt').replace('.jpg', '.txt')
for x in self.img_files[data_name]
]
for data_name, label_paths in self.label_files.items():
# using max_ids_dict rather than max_index
max_ids_dict = defaultdict(int)
for lp in label_paths:
lb = np.loadtxt(lp)
if len(lb) < 1:
continue
lb = lb.reshape(-1, 6)
for item in lb:
if item[1] > max_ids_dict[int(item[0])]:
# item[0]: cls_id
# item[1]: track id
max_ids_dict[int(item[0])] = int(item[1])
# track id number
self.tid_num[data_name] = max_ids_dict
last_idx_dict = defaultdict(int)
for i, (k, v) in enumerate(self.tid_num.items()): # each sub dataset
for cls_id, id_num in v.items(): # v is a max_ids_dict
self.tid_start_idx_of_cls_ids[k][cls_id] = last_idx_dict[cls_id]
last_idx_dict[cls_id] += id_num
self.num_identities_dict = defaultdict(int)
for k, v in last_idx_dict.items():
self.num_identities_dict[k] = int(v) # total ids of each category
self.num_imgs_each_data = [len(x) for x in self.img_files.values()]
self.total_imgs = sum(self.num_imgs_each_data)
# cname2cid and cid2cname
cname2cid = {}
if self.label_list is not None:
# if use label_list for multi source mix dataset,
# please make sure label_list in the first sub_dataset at least.
sub_dataset = self.image_lists[0].split('.')[0]
label_path = os.path.join(self.dataset_dir, sub_dataset,
self.label_list)
if not os.path.exists(label_path):
logger.info(
"Note: label_list {} does not exists, use VisDrone 10 classes labels as default.".
format(label_path))
cname2cid = visdrone_mcmot_label()
else:
with open(label_path, 'r') as fr:
label_id = 0
for line in fr.readlines():
cname2cid[line.strip()] = label_id
label_id += 1
else:
cname2cid = visdrone_mcmot_label()
cid2cname = dict([(v, k) for (k, v) in cname2cid.items()])
logger.info('MCMOT dataset summary: ')
logger.info(self.tid_num)
logger.info('Total images: {}'.format(self.total_imgs))
logger.info('Image start index: {}'.format(self.img_start_index))
logger.info('Total identities of each category: ')
num_identities_dict = sorted(
self.num_identities_dict.items(), key=lambda x: x[0])
total_IDs_all_cats = 0
for (k, v) in num_identities_dict:
logger.info('Category {} [{}] has {} IDs.'.format(k, cid2cname[k],
v))
total_IDs_all_cats += v
logger.info('Total identities of all categories: {}'.format(
total_IDs_all_cats))
logger.info('Identity start index of each category: ')
for k, v in self.tid_start_idx_of_cls_ids.items():
sorted_v = sorted(v.items(), key=lambda x: x[0])
for (cls_id, start_idx) in sorted_v:
logger.info('Start index of dataset {} category {:d} is {:d}'
.format(k, cls_id, start_idx))
records = []
for img_index in range(self.total_imgs):
for i, (k, v) in enumerate(self.img_start_index.items()):
if img_index >= v:
data_name = list(self.label_files.keys())[i]
start_index = v
img_file = self.img_files[data_name][img_index - start_index]
lbl_file = self.label_files[data_name][img_index - start_index]
if not os.path.exists(img_file):
logger.warning('Illegal image file: {}, and it will be ignored'.
format(img_file))
continue
if not os.path.isfile(lbl_file):
logger.warning('Illegal label file: {}, and it will be ignored'.
format(lbl_file))
continue
labels = np.loadtxt(lbl_file, dtype=np.float32).reshape(-1, 6)
# each row in labels (N, 6) is [gt_class, gt_identity, cx, cy, w, h]
cx, cy = labels[:, 2], labels[:, 3]
w, h = labels[:, 4], labels[:, 5]
gt_bbox = np.stack((cx, cy, w, h)).T.astype('float32')
gt_class = labels[:, 0:1].astype('int32')
gt_score = np.ones((len(labels), 1)).astype('float32')
gt_ide = labels[:, 1:2].astype('int32')
for i, _ in enumerate(gt_ide):
if gt_ide[i] > -1:
cls_id = int(gt_class[i])
start_idx = self.tid_start_idx_of_cls_ids[data_name][cls_id]
gt_ide[i] += start_idx
mot_rec = {
'im_file': img_file,
'im_id': img_index,
} if 'image' in self.data_fields else {}
gt_rec = {
'gt_class': gt_class,
'gt_score': gt_score,
'gt_bbox': gt_bbox,
'gt_ide': gt_ide,
}
for k, v in gt_rec.items():
if k in self.data_fields:
mot_rec[k] = v
records.append(mot_rec)
if self.sample_num > 0 and img_index >= self.sample_num:
break
assert len(records) > 0, 'not found any mot record in %s' % (
self.image_lists)
self.roidbs, self.cname2cid = records, cname2cid
@register
@serializable
class MOTImageFolder(DetDataset):
"""
Load MOT dataset with MOT format from image folder or video .
Args:
video_file (str): path of the video file, default ''.
frame_rate (int): frame rate of the video, use cv2 VideoCapture if not set.
dataset_dir (str): root directory for dataset.
keep_ori_im (bool): whether to keep original image, default False.
Set True when used during MOT model inference while saving
images or video, or used in DeepSORT.
"""
def __init__(self,
video_file=None,
frame_rate=-1,
dataset_dir=None,
data_root=None,
image_dir=None,
sample_num=-1,
keep_ori_im=False,
anno_path=None,
**kwargs):
super(MOTImageFolder, self).__init__(
dataset_dir, image_dir, sample_num=sample_num)
self.video_file = video_file
self.data_root = data_root
self.keep_ori_im = keep_ori_im
self._imid2path = {}
self.roidbs = None
self.frame_rate = frame_rate
self.anno_path = anno_path
def check_or_download_dataset(self):
return
def parse_dataset(self, ):
if not self.roidbs:
if self.video_file is None:
self.frame_rate = 30 # set as default if infer image folder
self.roidbs = self._load_images()
else:
self.roidbs = self._load_video_images()
def _load_video_images(self):
if self.frame_rate == -1:
# if frame_rate is not set for video, use cv2.VideoCapture
cap = cv2.VideoCapture(self.video_file)
self.frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
extension = self.video_file.split('.')[-1]
output_path = self.video_file.replace('.{}'.format(extension), '')
frames_path = video2frames(self.video_file, output_path,
self.frame_rate)
self.video_frames = sorted(
glob.glob(os.path.join(frames_path, '*.png')))
self.video_length = len(self.video_frames)
logger.info('Length of the video: {:d} frames.'.format(
self.video_length))
ct = 0
records = []
for image in self.video_frames:
assert image != '' and os.path.isfile(image), \
"Image {} not found".format(image)
if self.sample_num > 0 and ct >= self.sample_num:
break
rec = {'im_id': np.array([ct]), 'im_file': image}
if self.keep_ori_im:
rec.update({'keep_ori_im': 1})
self._imid2path[ct] = image
ct += 1
records.append(rec)
assert len(records) > 0, "No image file found"
return records
def _find_images(self):
image_dir = self.image_dir
if not isinstance(image_dir, Sequence):
image_dir = [image_dir]
images = []
for im_dir in image_dir:
if os.path.isdir(im_dir):
im_dir = os.path.join(self.dataset_dir, im_dir)
images.extend(_make_dataset(im_dir))
elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
images.append(im_dir)
return images
def _load_images(self):
images = self._find_images()
ct = 0
records = []
for image in images:
assert image != '' and os.path.isfile(image), \
"Image {} not found".format(image)
if self.sample_num > 0 and ct >= self.sample_num:
break
rec = {'im_id': np.array([ct]), 'im_file': image}
if self.keep_ori_im:
rec.update({'keep_ori_im': 1})
self._imid2path[ct] = image
ct += 1
records.append(rec)
assert len(records) > 0, "No image file found"
return records
def get_imid2path(self):
return self._imid2path
def set_images(self, images):
self.image_dir = images
self.roidbs = self._load_images()
def set_video(self, video_file, frame_rate):
# update video_file and frame_rate by command line of tools/infer_mot.py
self.video_file = video_file
self.frame_rate = frame_rate
assert os.path.isfile(self.video_file) and _is_valid_video(self.video_file), \
"wrong or unsupported file format: {}".format(self.video_file)
self.roidbs = self._load_video_images()
def get_anno(self):
return self.anno_path
def _is_valid_video(f, extensions=('.mp4', '.avi', '.mov', '.rmvb', 'flv')):
return f.lower().endswith(extensions)
def video2frames(video_path, outpath, frame_rate, **kargs):
def _dict2str(kargs):
cmd_str = ''
for k, v in kargs.items():
cmd_str += (' ' + str(k) + ' ' + str(v))
return cmd_str
ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error ']
vid_name = os.path.basename(video_path).split('.')[0]
out_full_path = os.path.join(outpath, vid_name)
if not os.path.exists(out_full_path):
os.makedirs(out_full_path)
# video file name
outformat = os.path.join(out_full_path, '%08d.png')
cmd = ffmpeg
cmd = ffmpeg + [
' -i ', video_path, ' -r ', str(frame_rate), ' -f image2 ', outformat
]
cmd = ''.join(cmd) + _dict2str(kargs)
if os.system(cmd) != 0:
raise RuntimeError('ffmpeg process video: {} error'.format(video_path))
sys.exit(-1)
sys.stdout.flush()
return out_full_path
def mot_label():
labels_map = {'person': 0}
return labels_map
def visdrone_mcmot_label():
labels_map = {
'pedestrian': 0,
'people': 1,
'bicycle': 2,
'car': 3,
'van': 4,
'truck': 5,
'tricycle': 6,
'awning-tricycle': 7,
'bus': 8,
'motor': 9,
}
return labels_map

View File

@@ -0,0 +1,380 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import cv2
import numpy as np
import json
import copy
import pycocotools
from pycocotools.coco import COCO
from .dataset import DetDataset
from ppdet.core.workspace import register, serializable
from paddle.io import Dataset
@serializable
class Pose3DDataset(DetDataset):
"""Pose3D Dataset class.
Args:
dataset_dir (str): Root path to the dataset.
anno_list (list of str): each of the element is a relative path to the annotation file.
image_dirs (list of str): each of path is a relative path where images are held.
transform (composed(operators)): A sequence of data transforms.
test_mode (bool): Store True when building test or
validation dataset. Default: False.
24 joints order:
0-2: 'R_Ankle', 'R_Knee', 'R_Hip',
3-5:'L_Hip', 'L_Knee', 'L_Ankle',
6-8:'R_Wrist', 'R_Elbow', 'R_Shoulder',
9-11:'L_Shoulder','L_Elbow','L_Wrist',
12-14:'Neck','Top_of_Head','Pelvis',
15-18:'Thorax','Spine','Jaw','Head',
19-23:'Nose','L_Eye','R_Eye','L_Ear','R_Ear'
"""
def __init__(self,
dataset_dir,
image_dirs,
anno_list,
transform=[],
num_joints=24,
test_mode=False):
super().__init__(dataset_dir, image_dirs, anno_list)
self.image_info = {}
self.ann_info = {}
self.num_joints = num_joints
self.transform = transform
self.test_mode = test_mode
self.img_ids = []
self.dataset_dir = dataset_dir
self.image_dirs = image_dirs
self.anno_list = anno_list
def get_mask(self, mvm_percent=0.3):
num_joints = self.num_joints
mjm_mask = np.ones((num_joints, 1)).astype(np.float32)
if self.test_mode == False:
pb = np.random.random_sample()
masked_num = int(
pb * mvm_percent *
num_joints) # at most x% of the joints could be masked
indices = np.random.choice(
np.arange(num_joints), replace=False, size=masked_num)
mjm_mask[indices, :] = 0.0
# return mjm_mask
num_joints = 10
mvm_mask = np.ones((num_joints, 1)).astype(np.float)
if self.test_mode == False:
num_vertices = num_joints
pb = np.random.random_sample()
masked_num = int(
pb * mvm_percent *
num_vertices) # at most x% of the vertices could be masked
indices = np.random.choice(
np.arange(num_vertices), replace=False, size=masked_num)
mvm_mask[indices, :] = 0.0
mjm_mask = np.concatenate([mjm_mask, mvm_mask], axis=0)
return mjm_mask
def filterjoints(self, x):
if self.num_joints == 24:
return x
elif self.num_joints == 14:
return x[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 18], :]
elif self.num_joints == 17:
return x[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 18, 19], :]
else:
raise ValueError(
"unsupported joint numbers, only [24 or 17 or 14] is supported!")
def parse_dataset(self):
print("Loading annotations..., please wait")
self.annos = []
im_id = 0
self.human36m_num = 0
for idx, annof in enumerate(self.anno_list):
img_prefix = os.path.join(self.dataset_dir, self.image_dirs[idx])
dataf = os.path.join(self.dataset_dir, annof)
with open(dataf, 'r') as rf:
anno_data = json.load(rf)
annos = anno_data['data']
new_annos = []
print("{} has annos numbers: {}".format(dataf, len(annos)))
for anno in annos:
new_anno = {}
new_anno['im_id'] = im_id
im_id += 1
imagename = anno['imageName']
if imagename.startswith("COCO_train2014_"):
imagename = imagename[len("COCO_train2014_"):]
elif imagename.startswith("COCO_val2014_"):
imagename = imagename[len("COCO_val2014_"):]
imagename = os.path.join(img_prefix, imagename)
if not os.path.exists(imagename):
if "train2017" in imagename:
imagename = imagename.replace("train2017",
"val2017")
if not os.path.exists(imagename):
print("cannot find imagepath:{}".format(
imagename))
continue
else:
print("cannot find imagepath:{}".format(imagename))
continue
new_anno['imageName'] = imagename
if 'human3.6m' in imagename:
self.human36m_num += 1
new_anno['bbox_center'] = anno['bbox_center']
new_anno['bbox_scale'] = anno['bbox_scale']
new_anno['joints_2d'] = np.array(anno[
'gt_keypoint_2d']).astype(np.float32)
if new_anno['joints_2d'].shape[0] == 49:
#if the joints_2d is in SPIN format(which generated by eft), choose the last 24 public joints
#for detail please refer: https://github.com/nkolot/SPIN/blob/master/constants.py
new_anno['joints_2d'] = new_anno['joints_2d'][25:]
new_anno['joints_3d'] = np.array(anno[
'pose3d'])[:, :3].astype(np.float32)
new_anno['mjm_mask'] = self.get_mask()
if not 'has_3d_joints' in anno:
new_anno['has_3d_joints'] = int(1)
new_anno['has_2d_joints'] = int(1)
else:
new_anno['has_3d_joints'] = int(anno['has_3d_joints'])
new_anno['has_2d_joints'] = int(anno['has_2d_joints'])
new_anno['joints_2d'] = self.filterjoints(new_anno[
'joints_2d'])
self.annos.append(new_anno)
del annos
def get_temp_num(self):
"""get temporal data number, like human3.6m"""
return self.human36m_num
def __len__(self):
"""Get dataset length."""
return len(self.annos)
def _get_imganno(self, idx):
"""Get anno for a single image."""
return self.annos[idx]
def __getitem__(self, idx):
"""Prepare image for training given the index."""
records = copy.deepcopy(self._get_imganno(idx))
imgpath = records['imageName']
assert os.path.exists(imgpath), "cannot find image {}".format(imgpath)
records['image'] = cv2.imread(imgpath)
records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB)
records = self.transform(records)
return records
def check_or_download_dataset(self):
alldatafind = True
for image_dir in self.image_dirs:
image_dir = os.path.join(self.dataset_dir, image_dir)
if not os.path.isdir(image_dir):
print("dataset [{}] is not found".format(image_dir))
alldatafind = False
if not alldatafind:
raise ValueError(
"Some dataset is not valid and cannot download automatically now, please prepare the dataset first"
)
@register
@serializable
class Keypoint3DMultiFramesDataset(Dataset):
"""24 keypoints 3D dataset for pose estimation.
each item is a list of images
The dataset loads raw features and apply specified transforms
to return a dict containing the image tensors and other information.
Args:
dataset_dir (str): Root path to the dataset.
image_dir (str): Path to a directory where images are held.
"""
def __init__(
self,
dataset_dir, # 数据集根目录
image_dir, # 图像文件夹
p3d_dir, # 3D关键点文件夹
json_path,
img_size, #图像resize大小
num_frames, # 帧序列长度
anno_path=None, ):
self.dataset_dir = dataset_dir
self.image_dir = image_dir
self.p3d_dir = p3d_dir
self.json_path = json_path
self.img_size = img_size
self.num_frames = num_frames
self.anno_path = anno_path
self.data_labels, self.mf_inds = self._generate_multi_frames_list()
def _generate_multi_frames_list(self):
act_list = os.listdir(self.dataset_dir) # 动作列表
count = 0
mf_list = []
annos_dict = {'images': [], 'annotations': [], 'act_inds': []}
for act in act_list: #对每个动作,生成帧序列
if '.' in act:
continue
json_path = os.path.join(self.dataset_dir, act, self.json_path)
with open(json_path, 'r') as j:
annos = json.load(j)
length = len(annos['images'])
for k, v in annos.items():
if k in annos_dict:
annos_dict[k].extend(v)
annos_dict['act_inds'].extend([act] * length)
mf = [[i + j + count for j in range(self.num_frames)]
for i in range(0, length - self.num_frames + 1)]
mf_list.extend(mf)
count += length
print("total data number:", len(mf_list))
return annos_dict, mf_list
def __call__(self, *args, **kwargs):
return self
def __getitem__(self, index): # 拿一个连续的序列
inds = self.mf_inds[
index] # 如[568, 569, 570, 571, 572, 573]长度为num_frames
images = self.data_labels['images'] # all images
annots = self.data_labels['annotations'] # all annots
act = self.data_labels['act_inds'][inds[0]] # 动作名(文件夹名)
kps3d_list = []
kps3d_vis_list = []
names = []
h, w = 0, 0
for ind in inds: # one image
height = float(images[ind]['height'])
width = float(images[ind]['width'])
name = images[ind]['file_name'] # 图像名称,带有后缀
kps3d_name = name.split('.')[0] + '.obj'
kps3d_path = os.path.join(self.dataset_dir, act, self.p3d_dir,
kps3d_name)
joints, joints_vis = self.kps3d_process(kps3d_path)
joints_vis = np.array(joints_vis, dtype=np.float32)
kps3d_list.append(joints)
kps3d_vis_list.append(joints_vis)
names.append(name)
kps3d = np.array(kps3d_list) # (6, 24, 3),(num_frames, joints_num, 3)
kps3d_vis = np.array(kps3d_vis_list)
# read image
imgs = []
for name in names:
img_path = os.path.join(self.dataset_dir, act, self.image_dir, name)
image = cv2.imread(img_path, cv2.IMREAD_COLOR |
cv2.IMREAD_IGNORE_ORIENTATION)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
imgs.append(np.expand_dims(image, axis=0))
imgs = np.concatenate(imgs, axis=0)
imgs = imgs.astype(
np.float32) # (6, 1080, 1920, 3),(num_frames, h, w, c)
# attention: 此时图像和标注是镜像的
records = {
'kps3d': kps3d,
'kps3d_vis': kps3d_vis,
"image": imgs,
'act': act,
'names': names,
'im_id': index
}
return self.transform(records)
def kps3d_process(self, kps3d_path):
count = 0
kps = []
kps_vis = []
with open(kps3d_path, 'r') as f:
lines = f.readlines()
for line in lines:
if line[0] == 'v':
kps.append([])
line = line.strip('\n').split(' ')[1:]
for kp in line:
kps[-1].append(float(kp))
count += 1
kps_vis.append([1, 1, 1])
kps = np.array(kps) # 523
kps_vis = np.array(kps_vis)
kps *= 10 # scale points
kps -= kps[[0], :] # set root point to zero
kps = np.concatenate((kps[0:23], kps[[37]]), axis=0) # 24,3
kps *= 10
kps_vis = np.concatenate((kps_vis[0:23], kps_vis[[37]]), axis=0) # 24,3
return kps, kps_vis
def __len__(self):
return len(self.mf_inds)
def get_anno(self):
if self.anno_path is None:
return
return os.path.join(self.dataset_dir, self.anno_path)
def check_or_download_dataset(self):
return
def parse_dataset(self, ):
return
def set_transform(self, transform):
self.transform = transform
def set_epoch(self, epoch_id):
self._epoch = epoch_id
def set_kwargs(self, **kwargs):
self.mixup_epoch = kwargs.get('mixup_epoch', -1)
self.cutmix_epoch = kwargs.get('cutmix_epoch', -1)
self.mosaic_epoch = kwargs.get('mosaic_epoch', -1)

View File

@@ -0,0 +1,194 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import cv2
import json
import copy
import numpy as np
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
from ppdet.core.workspace import register, serializable
from ppdet.data.crop_utils.annotation_cropper import AnnoCropper
from .coco import COCODataSet
from .dataset import _make_dataset, _is_valid_file
from ppdet.utils.logger import setup_logger
logger = setup_logger('sniper_coco_dataset')
@register
@serializable
class SniperCOCODataSet(COCODataSet):
"""SniperCOCODataSet"""
def __init__(self,
dataset_dir=None,
image_dir=None,
anno_path=None,
proposals_file=None,
data_fields=['image'],
sample_num=-1,
load_crowd=False,
allow_empty=True,
empty_ratio=1.,
is_trainset=True,
image_target_sizes=[2000, 1000],
valid_box_ratio_ranges=[[-1, 0.1],[0.08, -1]],
chip_target_size=500,
chip_target_stride=200,
use_neg_chip=False,
max_neg_num_per_im=8,
max_per_img=-1,
nms_thresh=0.5):
super(SniperCOCODataSet, self).__init__(
dataset_dir=dataset_dir,
image_dir=image_dir,
anno_path=anno_path,
data_fields=data_fields,
sample_num=sample_num,
load_crowd=load_crowd,
allow_empty=allow_empty,
empty_ratio=empty_ratio
)
self.proposals_file = proposals_file
self.proposals = None
self.anno_cropper = None
self.is_trainset = is_trainset
self.image_target_sizes = image_target_sizes
self.valid_box_ratio_ranges = valid_box_ratio_ranges
self.chip_target_size = chip_target_size
self.chip_target_stride = chip_target_stride
self.use_neg_chip = use_neg_chip
self.max_neg_num_per_im = max_neg_num_per_im
self.max_per_img = max_per_img
self.nms_thresh = nms_thresh
def parse_dataset(self):
if not hasattr(self, "roidbs"):
super(SniperCOCODataSet, self).parse_dataset()
if self.is_trainset:
self._parse_proposals()
self._merge_anno_proposals()
self.ori_roidbs = copy.deepcopy(self.roidbs)
self.init_anno_cropper()
self.roidbs = self.generate_chips_roidbs(self.roidbs, self.is_trainset)
def set_proposals_file(self, file_path):
self.proposals_file = file_path
def init_anno_cropper(self):
logger.info("Init AnnoCropper...")
self.anno_cropper = AnnoCropper(
image_target_sizes=self.image_target_sizes,
valid_box_ratio_ranges=self.valid_box_ratio_ranges,
chip_target_size=self.chip_target_size,
chip_target_stride=self.chip_target_stride,
use_neg_chip=self.use_neg_chip,
max_neg_num_per_im=self.max_neg_num_per_im,
max_per_img=self.max_per_img,
nms_thresh=self.nms_thresh
)
def generate_chips_roidbs(self, roidbs, is_trainset):
if is_trainset:
roidbs = self.anno_cropper.crop_anno_records(roidbs)
else:
roidbs = self.anno_cropper.crop_infer_anno_records(roidbs)
return roidbs
def _parse_proposals(self):
if self.proposals_file:
self.proposals = {}
logger.info("Parse proposals file:{}".format(self.proposals_file))
with open(self.proposals_file, 'r') as f:
proposals = json.load(f)
for prop in proposals:
image_id = prop["image_id"]
if image_id not in self.proposals:
self.proposals[image_id] = []
x, y, w, h = prop["bbox"]
self.proposals[image_id].append([x, y, x + w, y + h])
def _merge_anno_proposals(self):
assert self.roidbs
if self.proposals and len(self.proposals.keys()) > 0:
logger.info("merge proposals to annos")
for id, record in enumerate(self.roidbs):
image_id = int(record["im_id"])
if image_id not in self.proposals.keys():
logger.info("image id :{} no proposals".format(image_id))
record["proposals"] = np.array(self.proposals.get(image_id, []), dtype=np.float32)
self.roidbs[id] = record
def get_ori_roidbs(self):
if not hasattr(self, "ori_roidbs"):
return None
return self.ori_roidbs
def get_roidbs(self):
if not hasattr(self, "roidbs"):
self.parse_dataset()
return self.roidbs
def set_roidbs(self, roidbs):
self.roidbs = roidbs
def check_or_download_dataset(self):
return
def _parse(self):
image_dir = self.image_dir
if not isinstance(image_dir, Sequence):
image_dir = [image_dir]
images = []
for im_dir in image_dir:
if os.path.isdir(im_dir):
im_dir = os.path.join(self.dataset_dir, im_dir)
images.extend(_make_dataset(im_dir))
elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
images.append(im_dir)
return images
def _load_images(self):
images = self._parse()
ct = 0
records = []
for image in images:
assert image != '' and os.path.isfile(image), \
"Image {} not found".format(image)
if self.sample_num > 0 and ct >= self.sample_num:
break
im = cv2.imread(image)
h, w, c = im.shape
rec = {'im_id': np.array([ct]), 'im_file': image, "h": h, "w": w}
self._imid2path[ct] = image
ct += 1
records.append(rec)
assert len(records) > 0, "No image file found"
return records
def get_imid2path(self):
return self._imid2path
def set_images(self, images):
self._imid2path = {}
self.image_dir = images
self.roidbs = self._load_images()

View File

@@ -0,0 +1,234 @@
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import xml.etree.ElementTree as ET
from ppdet.core.workspace import register, serializable
from .dataset import DetDataset
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
@register
@serializable
class VOCDataSet(DetDataset):
"""
Load dataset with PascalVOC format.
Notes:
`anno_path` must contains xml file and image file path for annotations.
Args:
dataset_dir (str): root directory for dataset.
image_dir (str): directory for images.
anno_path (str): voc annotation file path.
data_fields (list): key name of data dictionary, at least have 'image'.
sample_num (int): number of samples to load, -1 means all.
label_list (str): if use_default_label is False, will load
mapping between category and class index.
allow_empty (bool): whether to load empty entry. False as default
empty_ratio (float): the ratio of empty record number to total
record's, if empty_ratio is out of [0. ,1.), do not sample the
records and use all the empty entries. 1. as default
repeat (int): repeat times for dataset, use in benchmark.
"""
def __init__(self,
dataset_dir=None,
image_dir=None,
anno_path=None,
data_fields=['image'],
sample_num=-1,
label_list=None,
allow_empty=False,
empty_ratio=1.,
repeat=1):
super(VOCDataSet, self).__init__(
dataset_dir=dataset_dir,
image_dir=image_dir,
anno_path=anno_path,
data_fields=data_fields,
sample_num=sample_num,
repeat=repeat)
self.label_list = label_list
self.allow_empty = allow_empty
self.empty_ratio = empty_ratio
def _sample_empty(self, records, num):
# if empty_ratio is out of [0. ,1.), do not sample the records
if self.empty_ratio < 0. or self.empty_ratio >= 1.:
return records
import random
sample_num = min(
int(num * self.empty_ratio / (1 - self.empty_ratio)), len(records))
records = random.sample(records, sample_num)
return records
def parse_dataset(self, ):
anno_path = os.path.join(self.dataset_dir, self.anno_path)
image_dir = os.path.join(self.dataset_dir, self.image_dir)
# mapping category name to class id
# first_class:0, second_class:1, ...
records = []
empty_records = []
ct = 0
cname2cid = {}
if self.label_list:
label_path = os.path.join(self.dataset_dir, self.label_list)
if not os.path.exists(label_path):
raise ValueError("label_list {} does not exists".format(
label_path))
with open(label_path, 'r') as fr:
label_id = 0
for line in fr.readlines():
cname2cid[line.strip()] = label_id
label_id += 1
else:
cname2cid = pascalvoc_label()
with open(anno_path, 'r') as fr:
while True:
line = fr.readline()
if not line:
break
img_file, xml_file = [os.path.join(image_dir, x) \
for x in line.strip().split()[:2]]
if not os.path.exists(img_file):
logger.warning(
'Illegal image file: {}, and it will be ignored'.format(
img_file))
continue
if not os.path.isfile(xml_file):
logger.warning(
'Illegal xml file: {}, and it will be ignored'.format(
xml_file))
continue
tree = ET.parse(xml_file)
if tree.find('id') is None:
im_id = np.array([ct])
else:
im_id = np.array([int(tree.find('id').text)])
objs = tree.findall('object')
im_w = float(tree.find('size').find('width').text)
im_h = float(tree.find('size').find('height').text)
if im_w < 0 or im_h < 0:
logger.warning(
'Illegal width: {} or height: {} in annotation, '
'and {} will be ignored'.format(im_w, im_h, xml_file))
continue
num_bbox, i = len(objs), 0
gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
gt_score = np.zeros((num_bbox, 1), dtype=np.float32)
difficult = np.zeros((num_bbox, 1), dtype=np.int32)
for obj in objs:
cname = obj.find('name').text
# user dataset may not contain difficult field
_difficult = obj.find('difficult')
_difficult = int(
_difficult.text) if _difficult is not None else 0
x1 = float(obj.find('bndbox').find('xmin').text)
y1 = float(obj.find('bndbox').find('ymin').text)
x2 = float(obj.find('bndbox').find('xmax').text)
y2 = float(obj.find('bndbox').find('ymax').text)
x1 = max(0, x1)
y1 = max(0, y1)
x2 = min(im_w - 1, x2)
y2 = min(im_h - 1, y2)
if x2 > x1 and y2 > y1:
gt_bbox[i, :] = [x1, y1, x2, y2]
gt_class[i, 0] = cname2cid[cname]
gt_score[i, 0] = 1.
difficult[i, 0] = _difficult
i += 1
else:
logger.warning(
'Found an invalid bbox in annotations: xml_file: {}'
', x1: {}, y1: {}, x2: {}, y2: {}.'.format(
xml_file, x1, y1, x2, y2))
gt_bbox = gt_bbox[:i, :]
gt_class = gt_class[:i, :]
gt_score = gt_score[:i, :]
difficult = difficult[:i, :]
voc_rec = {
'im_file': img_file,
'im_id': im_id,
'h': im_h,
'w': im_w
} if 'image' in self.data_fields else {}
gt_rec = {
'gt_class': gt_class,
'gt_score': gt_score,
'gt_bbox': gt_bbox,
'difficult': difficult
}
for k, v in gt_rec.items():
if k in self.data_fields:
voc_rec[k] = v
if len(objs) == 0:
empty_records.append(voc_rec)
else:
records.append(voc_rec)
ct += 1
if self.sample_num > 0 and ct >= self.sample_num:
break
assert ct > 0, 'not found any voc record in %s' % (self.anno_path)
logger.debug('{} samples in file {}'.format(ct, anno_path))
if self.allow_empty and len(empty_records) > 0:
empty_records = self._sample_empty(empty_records, len(records))
records += empty_records
self.roidbs, self.cname2cid = records, cname2cid
def get_label_list(self):
return os.path.join(self.dataset_dir, self.label_list)
def pascalvoc_label():
labels_map = {
'aeroplane': 0,
'bicycle': 1,
'bird': 2,
'boat': 3,
'bottle': 4,
'bus': 5,
'car': 6,
'cat': 7,
'chair': 8,
'cow': 9,
'diningtable': 10,
'dog': 11,
'horse': 12,
'motorbike': 13,
'person': 14,
'pottedplant': 15,
'sheep': 16,
'sofa': 17,
'train': 18,
'tvmonitor': 19
}
return labels_map

View File

@@ -0,0 +1,180 @@
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
from ppdet.core.workspace import register, serializable
from .dataset import DetDataset
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
@register
@serializable
class WIDERFaceDataSet(DetDataset):
"""
Load WiderFace records with 'anno_path'
Args:
dataset_dir (str): root directory for dataset.
image_dir (str): directory for images.
anno_path (str): WiderFace annotation data.
data_fields (list): key name of data dictionary, at least have 'image'.
sample_num (int): number of samples to load, -1 means all.
with_lmk (bool): whether to load face landmark keypoint labels.
"""
def __init__(self,
dataset_dir=None,
image_dir=None,
anno_path=None,
data_fields=['image'],
sample_num=-1,
with_lmk=False):
super(WIDERFaceDataSet, self).__init__(
dataset_dir=dataset_dir,
image_dir=image_dir,
anno_path=anno_path,
data_fields=data_fields,
sample_num=sample_num,
with_lmk=with_lmk)
self.anno_path = anno_path
self.sample_num = sample_num
self.roidbs = None
self.cname2cid = None
self.with_lmk = with_lmk
def parse_dataset(self):
anno_path = os.path.join(self.dataset_dir, self.anno_path)
image_dir = os.path.join(self.dataset_dir, self.image_dir)
txt_file = anno_path
records = []
ct = 0
file_lists = self._load_file_list(txt_file)
cname2cid = widerface_label()
for item in file_lists:
im_fname = item[0]
im_id = np.array([ct])
gt_bbox = np.zeros((len(item) - 1, 4), dtype=np.float32)
gt_class = np.zeros((len(item) - 1, 1), dtype=np.int32)
gt_lmk_labels = np.zeros((len(item) - 1, 10), dtype=np.float32)
lmk_ignore_flag = np.zeros((len(item) - 1, 1), dtype=np.int32)
for index_box in range(len(item)):
if index_box < 1:
continue
gt_bbox[index_box - 1] = item[index_box][0]
if self.with_lmk:
gt_lmk_labels[index_box - 1] = item[index_box][1]
lmk_ignore_flag[index_box - 1] = item[index_box][2]
im_fname = os.path.join(image_dir,
im_fname) if image_dir else im_fname
widerface_rec = {
'im_file': im_fname,
'im_id': im_id,
} if 'image' in self.data_fields else {}
gt_rec = {
'gt_bbox': gt_bbox,
'gt_class': gt_class,
}
for k, v in gt_rec.items():
if k in self.data_fields:
widerface_rec[k] = v
if self.with_lmk:
widerface_rec['gt_keypoint'] = gt_lmk_labels
widerface_rec['keypoint_ignore'] = lmk_ignore_flag
if len(item) != 0:
records.append(widerface_rec)
ct += 1
if self.sample_num > 0 and ct >= self.sample_num:
break
assert len(records) > 0, 'not found any widerface in %s' % (anno_path)
logger.debug('{} samples in file {}'.format(ct, anno_path))
self.roidbs, self.cname2cid = records, cname2cid
def _load_file_list(self, input_txt):
with open(input_txt, 'r') as f_dir:
lines_input_txt = f_dir.readlines()
file_dict = {}
num_class = 0
exts = ['jpg', 'jpeg', 'png', 'bmp']
exts += [ext.upper() for ext in exts]
for i in range(len(lines_input_txt)):
line_txt = lines_input_txt[i].strip('\n\t\r')
split_str = line_txt.split(' ')
if len(split_str) == 1:
img_file_name = os.path.split(split_str[0])[1]
split_txt = img_file_name.split('.')
if len(split_txt) < 2:
continue
elif split_txt[-1] in exts:
if i != 0:
num_class += 1
file_dict[num_class] = [line_txt]
else:
if len(line_txt) <= 6:
continue
result_boxs = []
xmin = float(split_str[0])
ymin = float(split_str[1])
w = float(split_str[2])
h = float(split_str[3])
# Filter out wrong labels
if w < 0 or h < 0:
logger.warning('Illegal box with w: {}, h: {} in '
'img: {}, and it will be ignored'.format(
w, h, file_dict[num_class][0]))
continue
xmin = max(0, xmin)
ymin = max(0, ymin)
xmax = xmin + w
ymax = ymin + h
gt_bbox = [xmin, ymin, xmax, ymax]
result_boxs.append(gt_bbox)
if self.with_lmk:
assert len(split_str) > 18, 'When `with_lmk=True`, the number' \
'of characters per line in the annotation file should' \
'exceed 18.'
lmk0_x = float(split_str[5])
lmk0_y = float(split_str[6])
lmk1_x = float(split_str[8])
lmk1_y = float(split_str[9])
lmk2_x = float(split_str[11])
lmk2_y = float(split_str[12])
lmk3_x = float(split_str[14])
lmk3_y = float(split_str[15])
lmk4_x = float(split_str[17])
lmk4_y = float(split_str[18])
lmk_ignore_flag = 0 if lmk0_x == -1 else 1
gt_lmk_label = [
lmk0_x, lmk0_y, lmk1_x, lmk1_y, lmk2_x, lmk2_y, lmk3_x,
lmk3_y, lmk4_x, lmk4_y
]
result_boxs.append(gt_lmk_label)
result_boxs.append(lmk_ignore_flag)
file_dict[num_class].append(result_boxs)
return list(file_dict.values())
def widerface_label():
labels_map = {'face': 0}
return labels_map

View File

@@ -0,0 +1,35 @@
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import operators
from . import batch_operators
from . import keypoint_operators
from . import mot_operators
from . import rotated_operators
from . import keypoints_3d_operators
from . import culane_operators
from .operators import *
from .batch_operators import *
from .keypoint_operators import *
from .mot_operators import *
from .rotated_operators import *
from .keypoints_3d_operators import *
from .culane_operators import *
__all__ = []
__all__ += registered_ops
__all__ += keypoint_operators.__all__
__all__ += mot_operators.__all__
__all__ += culane_operators.__all__

View File

@@ -0,0 +1,421 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# The code is based on:
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/bbox/assigners/atss_assigner.py
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-6):
"""Calculate overlap between two set of bboxes.
If ``is_aligned `` is ``False``, then calculate the overlaps between each
bbox of bboxes1 and bboxes2, otherwise the overlaps between each aligned
pair of bboxes1 and bboxes2.
Args:
bboxes1 (Tensor): shape (B, m, 4) in <x1, y1, x2, y2> format or empty.
bboxes2 (Tensor): shape (B, n, 4) in <x1, y1, x2, y2> format or empty.
B indicates the batch dim, in shape (B1, B2, ..., Bn).
If ``is_aligned `` is ``True``, then m and n must be equal.
mode (str): "iou" (intersection over union) or "iof" (intersection over
foreground).
is_aligned (bool, optional): If True, then m and n must be equal.
Default False.
eps (float, optional): A value added to the denominator for numerical
stability. Default 1e-6.
Returns:
Tensor: shape (m, n) if ``is_aligned `` is False else shape (m,)
"""
assert mode in ['iou', 'iof', 'giou', 'diou'], 'Unsupported mode {}'.format(
mode)
# Either the boxes are empty or the length of boxes's last dimenstion is 4
assert (bboxes1.shape[-1] == 4 or bboxes1.shape[0] == 0)
assert (bboxes2.shape[-1] == 4 or bboxes2.shape[0] == 0)
# Batch dim must be the same
# Batch dim: (B1, B2, ... Bn)
assert bboxes1.shape[:-2] == bboxes2.shape[:-2]
batch_shape = bboxes1.shape[:-2]
rows = bboxes1.shape[-2] if bboxes1.shape[0] > 0 else 0
cols = bboxes2.shape[-2] if bboxes2.shape[0] > 0 else 0
if is_aligned:
assert rows == cols
if rows * cols == 0:
if is_aligned:
return np.random.random(batch_shape + (rows, ))
else:
return np.random.random(batch_shape + (rows, cols))
area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * (
bboxes1[..., 3] - bboxes1[..., 1])
area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * (
bboxes2[..., 3] - bboxes2[..., 1])
if is_aligned:
lt = np.maximum(bboxes1[..., :2], bboxes2[..., :2]) # [B, rows, 2]
rb = np.minimum(bboxes1[..., 2:], bboxes2[..., 2:]) # [B, rows, 2]
wh = (rb - lt).clip(min=0) # [B, rows, 2]
overlap = wh[..., 0] * wh[..., 1]
if mode in ['iou', 'giou']:
union = area1 + area2 - overlap
else:
union = area1
if mode == 'giou':
enclosed_lt = np.minimum(bboxes1[..., :2], bboxes2[..., :2])
enclosed_rb = np.maximum(bboxes1[..., 2:], bboxes2[..., 2:])
if mode == 'diou':
enclosed_lt = np.minimum(bboxes1[..., :2], bboxes2[..., :2])
enclosed_rb = np.maximum(bboxes1[..., 2:], bboxes2[..., 2:])
b1_x1, b1_y1 = bboxes1[..., 0], bboxes1[..., 1]
b1_x2, b1_y2 = bboxes1[..., 2], bboxes1[..., 3]
b2_x1, b2_y1 = bboxes2[..., 0], bboxes2[..., 1]
b2_x2, b2_y2 = bboxes2[..., 2], bboxes2[..., 3]
else:
lt = np.maximum(bboxes1[..., :, None, :2],
bboxes2[..., None, :, :2]) # [B, rows, cols, 2]
rb = np.minimum(bboxes1[..., :, None, 2:],
bboxes2[..., None, :, 2:]) # [B, rows, cols, 2]
wh = (rb - lt).clip(min=0) # [B, rows, cols, 2]
overlap = wh[..., 0] * wh[..., 1]
if mode in ['iou', 'giou']:
union = area1[..., None] + area2[..., None, :] - overlap
else:
union = area1[..., None]
if mode == 'giou':
enclosed_lt = np.minimum(bboxes1[..., :, None, :2],
bboxes2[..., None, :, :2])
enclosed_rb = np.maximum(bboxes1[..., :, None, 2:],
bboxes2[..., None, :, 2:])
if mode == 'diou':
enclosed_lt = np.minimum(bboxes1[..., :, None, :2],
bboxes2[..., None, :, :2])
enclosed_rb = np.maximum(bboxes1[..., :, None, 2:],
bboxes2[..., None, :, 2:])
b1_x1, b1_y1 = bboxes1[..., :, None, 0], bboxes1[..., :, None, 1]
b1_x2, b1_y2 = bboxes1[..., :, None, 2], bboxes1[..., :, None, 3]
b2_x1, b2_y1 = bboxes2[..., None, :, 0], bboxes2[..., None, :, 1]
b2_x2, b2_y2 = bboxes2[..., None, :, 2], bboxes2[..., None, :, 3]
eps = np.array([eps])
union = np.maximum(union, eps)
ious = overlap / union
if mode in ['iou', 'iof']:
return ious
# calculate gious
if mode in ['giou']:
enclose_wh = (enclosed_rb - enclosed_lt).clip(min=0)
enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1]
enclose_area = np.maximum(enclose_area, eps)
gious = ious - (enclose_area - union) / enclose_area
return gious
if mode in ['diou']:
left = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2))**2 / 4
right = ((b2_y1 + b2_y2) - (b1_y1 + b1_y2))**2 / 4
rho2 = left + right
enclose_wh = (enclosed_rb - enclosed_lt).clip(min=0)
enclose_c = enclose_wh[..., 0]**2 + enclose_wh[..., 1]**2
enclose_c = np.maximum(enclose_c, eps)
dious = ious - rho2 / enclose_c
return dious
def topk_(input, k, axis=1, largest=True):
x = -input if largest else input
if axis == 0:
row_index = np.arange(input.shape[1 - axis])
if k == x.shape[0]: # argpartition requires index < len(input)
topk_index = np.argpartition(x, k - 1, axis=axis)[0:k, :]
else:
topk_index = np.argpartition(x, k, axis=axis)[0:k, :]
topk_data = x[topk_index, row_index]
topk_index_sort = np.argsort(topk_data, axis=axis)
topk_data_sort = topk_data[topk_index_sort, row_index]
topk_index_sort = topk_index[0:k, :][topk_index_sort, row_index]
else:
column_index = np.arange(x.shape[1 - axis])[:, None]
topk_index = np.argpartition(x, k, axis=axis)[:, 0:k]
topk_data = x[column_index, topk_index]
topk_data = -topk_data if largest else topk_data
topk_index_sort = np.argsort(topk_data, axis=axis)
topk_data_sort = topk_data[column_index, topk_index_sort]
topk_index_sort = topk_index[:, 0:k][column_index, topk_index_sort]
return topk_data_sort, topk_index_sort
class ATSSAssigner(object):
"""Assign a corresponding gt bbox or background to each bbox.
Each proposals will be assigned with `0` or a positive integer
indicating the ground truth index.
- 0: negative sample, no assigned gt
- positive integer: positive sample, index (1-based) of assigned gt
Args:
topk (float): number of bbox selected in each level
"""
def __init__(self, topk=9):
self.topk = topk
def __call__(self,
bboxes,
num_level_bboxes,
gt_bboxes,
gt_bboxes_ignore=None,
gt_labels=None):
"""Assign gt to bboxes.
The assignment is done in following steps
1. compute iou between all bbox (bbox of all pyramid levels) and gt
2. compute center distance between all bbox and gt
3. on each pyramid level, for each gt, select k bbox whose center
are closest to the gt center, so we total select k*l bbox as
candidates for each gt
4. get corresponding iou for the these candidates, and compute the
mean and std, set mean + std as the iou threshold
5. select these candidates whose iou are greater than or equal to
the threshold as postive
6. limit the positive sample's center in gt
Args:
bboxes (np.array): Bounding boxes to be assigned, shape(n, 4).
num_level_bboxes (List): num of bboxes in each level
gt_bboxes (np.array): Groundtruth boxes, shape (k, 4).
gt_bboxes_ignore (np.array, optional): Ground truth bboxes that are
labelled as `ignored`, e.g., crowd boxes in COCO.
gt_labels (np.array, optional): Label of gt_bboxes, shape (k, ).
"""
bboxes = bboxes[:, :4]
num_gt, num_bboxes = gt_bboxes.shape[0], bboxes.shape[0]
# assign 0 by default
assigned_gt_inds = np.zeros((num_bboxes, ), dtype=np.int64)
if num_gt == 0 or num_bboxes == 0:
# No ground truth or boxes, return empty assignment
max_overlaps = np.zeros((num_bboxes, ))
if num_gt == 0:
# No truth, assign everything to background
assigned_gt_inds[:] = 0
if not np.any(gt_labels):
assigned_labels = None
else:
assigned_labels = -np.ones((num_bboxes, ), dtype=np.int64)
return assigned_gt_inds, max_overlaps
# compute iou between all bbox and gt
overlaps = bbox_overlaps(bboxes, gt_bboxes)
# compute center distance between all bbox and gt
gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
gt_points = np.stack((gt_cx, gt_cy), axis=1)
bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0
bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0
bboxes_points = np.stack((bboxes_cx, bboxes_cy), axis=1)
distances = np.sqrt(
np.power((bboxes_points[:, None, :] - gt_points[None, :, :]), 2)
.sum(-1))
# Selecting candidates based on the center distance
candidate_idxs = []
start_idx = 0
for bboxes_per_level in num_level_bboxes:
# on each pyramid level, for each gt,
# select k bbox whose center are closest to the gt center
end_idx = start_idx + bboxes_per_level
distances_per_level = distances[start_idx:end_idx, :]
selectable_k = min(self.topk, bboxes_per_level)
_, topk_idxs_per_level = topk_(
distances_per_level, selectable_k, axis=0, largest=False)
candidate_idxs.append(topk_idxs_per_level + start_idx)
start_idx = end_idx
candidate_idxs = np.concatenate(candidate_idxs, axis=0)
# get corresponding iou for the these candidates, and compute the
# mean and std, set mean + std as the iou threshold
candidate_overlaps = overlaps[candidate_idxs, np.arange(num_gt)]
overlaps_mean_per_gt = candidate_overlaps.mean(0)
overlaps_std_per_gt = candidate_overlaps.std(0)
overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
is_pos = candidate_overlaps >= overlaps_thr_per_gt[None, :]
# limit the positive sample's center in gt
for gt_idx in range(num_gt):
candidate_idxs[:, gt_idx] += gt_idx * num_bboxes
ep_bboxes_cx = np.broadcast_to(
bboxes_cx.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1)
ep_bboxes_cy = np.broadcast_to(
bboxes_cy.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1)
candidate_idxs = candidate_idxs.reshape(-1)
# calculate the left, top, right, bottom distance between positive
# bbox center and gt side
l_ = ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 0]
t_ = ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 1]
r_ = gt_bboxes[:, 2] - ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt)
b_ = gt_bboxes[:, 3] - ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt)
is_in_gts = np.stack([l_, t_, r_, b_], axis=1).min(axis=1) > 0.01
is_pos = is_pos & is_in_gts
# if an anchor box is assigned to multiple gts,
# the one with the highest IoU will be selected.
overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1)
index = candidate_idxs.reshape(-1)[is_pos.reshape(-1)]
overlaps_inf[index] = overlaps.T.reshape(-1)[index]
overlaps_inf = overlaps_inf.reshape(num_gt, -1).T
max_overlaps = overlaps_inf.max(axis=1)
argmax_overlaps = overlaps_inf.argmax(axis=1)
assigned_gt_inds[max_overlaps !=
-np.inf] = argmax_overlaps[max_overlaps != -np.inf] + 1
return assigned_gt_inds, max_overlaps
def get_vlr_region(self,
bboxes,
num_level_bboxes,
gt_bboxes,
gt_bboxes_ignore=None,
gt_labels=None):
"""get vlr region for ld distillation.
Args:
bboxes (np.array): Bounding boxes to be assigned, shape(n, 4).
num_level_bboxes (List): num of bboxes in each level
gt_bboxes (np.array): Groundtruth boxes, shape (k, 4).
gt_bboxes_ignore (np.array, optional): Ground truth bboxes that are
labelled as `ignored`, e.g., crowd boxes in COCO.
gt_labels (np.array, optional): Label of gt_bboxes, shape (k, ).
"""
bboxes = bboxes[:, :4]
num_gt, num_bboxes = gt_bboxes.shape[0], bboxes.shape[0]
# compute iou between all bbox and gt
overlaps = bbox_overlaps(bboxes, gt_bboxes)
# compute diou between all bbox and gt
diou = bbox_overlaps(bboxes, gt_bboxes, mode='diou')
# assign 0 by default
assigned_gt_inds = np.zeros((num_bboxes, ), dtype=np.int64)
vlr_region_iou = (assigned_gt_inds + 0).astype(np.float32)
if num_gt == 0 or num_bboxes == 0:
# No ground truth or boxes, return empty assignment
max_overlaps = np.zeros((num_bboxes, ))
if num_gt == 0:
# No truth, assign everything to background
assigned_gt_inds[:] = 0
if not np.any(gt_labels):
assigned_labels = None
else:
assigned_labels = -np.ones((num_bboxes, ), dtype=np.int64)
return assigned_gt_inds, max_overlaps
# compute center distance between all bbox and gt
gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
gt_points = np.stack((gt_cx, gt_cy), axis=1)
bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0
bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0
bboxes_points = np.stack((bboxes_cx, bboxes_cy), axis=1)
distances = np.sqrt(
np.power((bboxes_points[:, None, :] - gt_points[None, :, :]), 2)
.sum(-1))
# Selecting candidates based on the center distance
candidate_idxs = []
candidate_idxs_t = []
start_idx = 0
for bboxes_per_level in num_level_bboxes:
# on each pyramid level, for each gt,
# select k bbox whose center are closest to the gt center
end_idx = start_idx + bboxes_per_level
distances_per_level = distances[start_idx:end_idx, :]
selectable_t = min(self.topk, bboxes_per_level)
selectable_k = bboxes_per_level #k for all
_, topt_idxs_per_level = topk_(
distances_per_level, selectable_t, axis=0, largest=False)
_, topk_idxs_per_level = topk_(
distances_per_level, selectable_k, axis=0, largest=False)
candidate_idxs_t.append(topt_idxs_per_level + start_idx)
candidate_idxs.append(topk_idxs_per_level + start_idx)
start_idx = end_idx
candidate_idxs_t = np.concatenate(candidate_idxs_t, axis=0)
candidate_idxs = np.concatenate(candidate_idxs, axis=0)
# get corresponding iou for the these candidates, and compute the
# mean and std, set mean + std as the iou threshold
candidate_overlaps_t = overlaps[candidate_idxs_t, np.arange(num_gt)]
# compute tdiou
t_diou = diou[candidate_idxs, np.arange(num_gt)]
overlaps_mean_per_gt = candidate_overlaps_t.mean(0)
overlaps_std_per_gt = candidate_overlaps_t.std(
0, ddof=1) # NOTE: use Bessel correction
overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
# compute region
is_pos = (t_diou < overlaps_thr_per_gt[None, :]) & (
t_diou >= 0.25 * overlaps_thr_per_gt[None, :])
# limit the positive sample's center in gt
for gt_idx in range(num_gt):
candidate_idxs[:, gt_idx] += gt_idx * num_bboxes
candidate_idxs = candidate_idxs.reshape(-1)
# if an anchor box is assigned to multiple gts,
# the one with the highest IoU will be selected.
overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1)
index = candidate_idxs.reshape(-1)[is_pos.reshape(-1)]
overlaps_inf[index] = overlaps.T.reshape(-1)[index]
overlaps_inf = overlaps_inf.reshape(num_gt, -1).T
max_overlaps = overlaps_inf.max(axis=1)
argmax_overlaps = overlaps_inf.argmax(axis=1)
overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1)
overlaps_inf = overlaps_inf.reshape(num_gt, -1).T
assigned_gt_inds[max_overlaps !=
-np.inf] = argmax_overlaps[max_overlaps != -np.inf] + 1
vlr_region_iou[max_overlaps !=
-np.inf] = max_overlaps[max_overlaps != -np.inf] + 0
return vlr_region_iou

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,366 @@
import numpy as np
import imgaug.augmenters as iaa
from .operators import BaseOperator, register_op
from ppdet.utils.logger import setup_logger
from ppdet.data.culane_utils import linestrings_to_lanes, transform_annotation
logger = setup_logger(__name__)
__all__ = [
"CULaneTrainProcess", "CULaneDataProcess", "HorizontalFlip",
"ChannelShuffle", "CULaneAffine", "CULaneResize", "OneOfBlur",
"MultiplyAndAddToBrightness", "AddToHueAndSaturation"
]
def trainTransforms(img_h, img_w):
transforms = [{
'name': 'Resize',
'parameters': dict(size=dict(
height=img_h, width=img_w)),
'p': 1.0
}, {
'name': 'HorizontalFlip',
'parameters': dict(p=1.0),
'p': 0.5
}, {
'name': 'ChannelShuffle',
'parameters': dict(p=1.0),
'p': 0.1
}, {
'name': 'MultiplyAndAddToBrightness',
'parameters': dict(
mul=(0.85, 1.15), add=(-10, 10)),
'p': 0.6
}, {
'name': 'AddToHueAndSaturation',
'parameters': dict(value=(-10, 10)),
'p': 0.7
}, {
'name': 'OneOf',
'transforms': [
dict(
name='MotionBlur', parameters=dict(k=(3, 5))), dict(
name='MedianBlur', parameters=dict(k=(3, 5)))
],
'p': 0.2
}, {
'name': 'Affine',
'parameters': dict(
translate_percent=dict(
x=(-0.1, 0.1), y=(-0.1, 0.1)),
rotate=(-10, 10),
scale=(0.8, 1.2)),
'p': 0.7
}, {
'name': 'Resize',
'parameters': dict(size=dict(
height=img_h, width=img_w)),
'p': 1.0
}]
return transforms
@register_op
class CULaneTrainProcess(BaseOperator):
def __init__(self, img_w, img_h):
super(CULaneTrainProcess, self).__init__()
self.img_w = img_w
self.img_h = img_h
self.transforms = trainTransforms(self.img_h, self.img_w)
if self.transforms is not None:
img_transforms = []
for aug in self.transforms:
p = aug['p']
if aug['name'] != 'OneOf':
img_transforms.append(
iaa.Sometimes(
p=p,
then_list=getattr(iaa, aug['name'])(**aug[
'parameters'])))
else:
img_transforms.append(
iaa.Sometimes(
p=p,
then_list=iaa.OneOf([
getattr(iaa, aug_['name'])(**aug_['parameters'])
for aug_ in aug['transforms']
])))
else:
img_transforms = []
self.iaa_transform = iaa.Sequential(img_transforms)
def apply(self, sample, context=None):
img, line_strings, seg = self.iaa_transform(
image=sample['image'],
line_strings=sample['lanes'],
segmentation_maps=sample['mask'])
sample['image'] = img
sample['lanes'] = line_strings
sample['mask'] = seg
return sample
@register_op
class CULaneDataProcess(BaseOperator):
def __init__(self, img_w, img_h, num_points, max_lanes):
super(CULaneDataProcess, self).__init__()
self.img_w = img_w
self.img_h = img_h
self.num_points = num_points
self.n_offsets = num_points
self.n_strips = num_points - 1
self.strip_size = self.img_h / self.n_strips
self.max_lanes = max_lanes
self.offsets_ys = np.arange(self.img_h, -1, -self.strip_size)
def apply(self, sample, context=None):
data = {}
line_strings = sample['lanes']
line_strings.clip_out_of_image_()
new_anno = {'lanes': linestrings_to_lanes(line_strings)}
for i in range(30):
try:
annos = transform_annotation(
self.img_w, self.img_h, self.max_lanes, self.n_offsets,
self.offsets_ys, self.n_strips, self.strip_size, new_anno)
label = annos['label']
lane_endpoints = annos['lane_endpoints']
break
except:
if (i + 1) == 30:
logger.critical('Transform annotation failed 30 times :(')
exit()
sample['image'] = sample['image'].astype(np.float32) / 255.
data['image'] = sample['image'].transpose(2, 0, 1)
data['lane_line'] = label
data['seg'] = sample['seg']
data['full_img_path'] = sample['full_img_path']
data['img_name'] = sample['img_name']
data['im_id'] = sample['im_id']
if 'mask' in sample.keys():
data['seg'] = sample['mask'].get_arr()
data['im_shape'] = np.array([self.img_w, self.img_h], dtype=np.float32)
data['scale_factor'] = np.array([1., 1.], dtype=np.float32)
return data
@register_op
class CULaneResize(BaseOperator):
def __init__(self, img_h, img_w, prob=0.5):
super(CULaneResize, self).__init__()
self.img_h = img_h
self.img_w = img_w
self.prob = prob
def apply(self, sample, context=None):
transform = iaa.Sometimes(self.prob,
iaa.Resize({
"height": self.img_h,
"width": self.img_w
}))
if 'mask' in sample.keys():
img, line_strings, seg = transform(
image=sample['image'],
line_strings=sample['lanes'],
segmentation_maps=sample['mask'])
sample['image'] = img
sample['lanes'] = line_strings
sample['mask'] = seg
else:
img, line_strings = transform(
image=sample['image'].copy().astype(np.uint8),
line_strings=sample['lanes'])
sample['image'] = img
sample['lanes'] = line_strings
return sample
@register_op
class HorizontalFlip(BaseOperator):
def __init__(self, prob=0.5):
super(HorizontalFlip, self).__init__()
self.prob = prob
def apply(self, sample, context=None):
transform = iaa.Sometimes(self.prob, iaa.HorizontalFlip(1.0))
if 'mask' in sample.keys():
img, line_strings, seg = transform(
image=sample['image'],
line_strings=sample['lanes'],
segmentation_maps=sample['mask'])
sample['image'] = img
sample['lanes'] = line_strings
sample['mask'] = seg
else:
img, line_strings = transform(
image=sample['image'], line_strings=sample['lanes'])
sample['image'] = img
sample['lanes'] = line_strings
return sample
@register_op
class ChannelShuffle(BaseOperator):
def __init__(self, prob=0.1):
super(ChannelShuffle, self).__init__()
self.prob = prob
def apply(self, sample, context=None):
transform = iaa.Sometimes(self.prob, iaa.ChannelShuffle(1.0))
if 'mask' in sample.keys():
img, line_strings, seg = transform(
image=sample['image'],
line_strings=sample['lanes'],
segmentation_maps=sample['mask'])
sample['image'] = img
sample['lanes'] = line_strings
sample['mask'] = seg
else:
img, line_strings = transform(
image=sample['image'], line_strings=sample['lanes'])
sample['image'] = img
sample['lanes'] = line_strings
return sample
@register_op
class MultiplyAndAddToBrightness(BaseOperator):
def __init__(self, mul=(0.85, 1.15), add=(-10, 10), prob=0.5):
super(MultiplyAndAddToBrightness, self).__init__()
self.mul = tuple(mul)
self.add = tuple(add)
self.prob = prob
def apply(self, sample, context=None):
transform = iaa.Sometimes(
self.prob,
iaa.MultiplyAndAddToBrightness(
mul=self.mul, add=self.add))
if 'mask' in sample.keys():
img, line_strings, seg = transform(
image=sample['image'],
line_strings=sample['lanes'],
segmentation_maps=sample['mask'])
sample['image'] = img
sample['lanes'] = line_strings
sample['mask'] = seg
else:
img, line_strings = transform(
image=sample['image'], line_strings=sample['lanes'])
sample['image'] = img
sample['lanes'] = line_strings
return sample
@register_op
class AddToHueAndSaturation(BaseOperator):
def __init__(self, value=(-10, 10), prob=0.5):
super(AddToHueAndSaturation, self).__init__()
self.value = tuple(value)
self.prob = prob
def apply(self, sample, context=None):
transform = iaa.Sometimes(
self.prob, iaa.AddToHueAndSaturation(value=self.value))
if 'mask' in sample.keys():
img, line_strings, seg = transform(
image=sample['image'],
line_strings=sample['lanes'],
segmentation_maps=sample['mask'])
sample['image'] = img
sample['lanes'] = line_strings
sample['mask'] = seg
else:
img, line_strings = transform(
image=sample['image'], line_strings=sample['lanes'])
sample['image'] = img
sample['lanes'] = line_strings
return sample
@register_op
class OneOfBlur(BaseOperator):
def __init__(self, MotionBlur_k=(3, 5), MedianBlur_k=(3, 5), prob=0.5):
super(OneOfBlur, self).__init__()
self.MotionBlur_k = tuple(MotionBlur_k)
self.MedianBlur_k = tuple(MedianBlur_k)
self.prob = prob
def apply(self, sample, context=None):
transform = iaa.Sometimes(
self.prob,
iaa.OneOf([
iaa.MotionBlur(k=self.MotionBlur_k),
iaa.MedianBlur(k=self.MedianBlur_k)
]))
if 'mask' in sample.keys():
img, line_strings, seg = transform(
image=sample['image'],
line_strings=sample['lanes'],
segmentation_maps=sample['mask'])
sample['image'] = img
sample['lanes'] = line_strings
sample['mask'] = seg
else:
img, line_strings = transform(
image=sample['image'], line_strings=sample['lanes'])
sample['image'] = img
sample['lanes'] = line_strings
return sample
@register_op
class CULaneAffine(BaseOperator):
def __init__(self,
translate_percent_x=(-0.1, 0.1),
translate_percent_y=(-0.1, 0.1),
rotate=(3, 5),
scale=(0.8, 1.2),
prob=0.5):
super(CULaneAffine, self).__init__()
self.translate_percent = {
'x': tuple(translate_percent_x),
'y': tuple(translate_percent_y)
}
self.rotate = tuple(rotate)
self.scale = tuple(scale)
self.prob = prob
def apply(self, sample, context=None):
transform = iaa.Sometimes(
self.prob,
iaa.Affine(
translate_percent=self.translate_percent,
rotate=self.rotate,
scale=self.scale))
if 'mask' in sample.keys():
img, line_strings, seg = transform(
image=sample['image'],
line_strings=sample['lanes'],
segmentation_maps=sample['mask'])
sample['image'] = img
sample['lanes'] = line_strings
sample['mask'] = seg
else:
img, line_strings = transform(
image=sample['image'], line_strings=sample['lanes'])
sample['image'] = img
sample['lanes'] = line_strings
return sample

View File

@@ -0,0 +1,86 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# The code is based on:
# https://github.com/dvlab-research/GridMask/blob/master/detection_grid/maskrcnn_benchmark/data/transforms/grid.py
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import numpy as np
from PIL import Image
class Gridmask(object):
def __init__(self,
use_h=True,
use_w=True,
rotate=1,
offset=False,
ratio=0.5,
mode=1,
prob=0.7,
upper_iter=360000):
super(Gridmask, self).__init__()
self.use_h = use_h
self.use_w = use_w
self.rotate = rotate
self.offset = offset
self.ratio = ratio
self.mode = mode
self.prob = prob
self.st_prob = prob
self.upper_iter = upper_iter
def __call__(self, x, curr_iter):
self.prob = self.st_prob * min(1, 1.0 * curr_iter / self.upper_iter)
if np.random.rand() > self.prob:
return x
h, w, _ = x.shape
hh = int(1.5 * h)
ww = int(1.5 * w)
d = np.random.randint(2, h)
self.l = min(max(int(d * self.ratio + 0.5), 1), d - 1)
mask = np.ones((hh, ww), np.float32)
st_h = np.random.randint(d)
st_w = np.random.randint(d)
if self.use_h:
for i in range(hh // d):
s = d * i + st_h
t = min(s + self.l, hh)
mask[s:t, :] *= 0
if self.use_w:
for i in range(ww // d):
s = d * i + st_w
t = min(s + self.l, ww)
mask[:, s:t] *= 0
r = np.random.randint(self.rotate)
mask = Image.fromarray(np.uint8(mask))
mask = mask.rotate(r)
mask = np.asarray(mask)
mask = mask[(hh - h) // 2:(hh - h) // 2 + h, (ww - w) // 2:(ww - w) // 2
+ w].astype(np.float32)
if self.mode == 1:
mask = 1 - mask
mask = np.expand_dims(mask, axis=-1)
if self.offset:
offset = (2 * (np.random.rand(h, w) - 0.5)).astype(np.float32)
x = (x * mask + offset * (1 - mask)).astype(x.dtype)
else:
x = (x * mask).astype(x.dtype)
return x

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,296 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
import cv2
import numpy as np
import math
import copy
import random
import uuid
from numbers import Number, Integral
from ...modeling.keypoint_utils import get_affine_mat_kernel, warp_affine_joints, get_affine_transform, affine_transform, get_warp_matrix
from ppdet.core.workspace import serializable
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
registered_ops = []
__all__ = [
'CropAndFlipImages', 'PermuteImages', 'RandomFlipHalfBody3DTransformImages'
]
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw
from mpl_toolkits.mplot3d import Axes3D
def register_keypointop(cls):
return serializable(cls)
def register_op(cls):
registered_ops.append(cls.__name__)
if not hasattr(BaseOperator, cls.__name__):
setattr(BaseOperator, cls.__name__, cls)
else:
raise KeyError("The {} class has been registered.".format(cls.__name__))
return serializable(cls)
class BaseOperator(object):
def __init__(self, name=None):
if name is None:
name = self.__class__.__name__
self._id = name + '_' + str(uuid.uuid4())[-6:]
def apply(self, sample, context=None):
""" Process a sample.
Args:
sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
context (dict): info about this sample processing
Returns:
result (dict): a processed sample
"""
return sample
def __call__(self, sample, context=None):
""" Process a sample.
Args:
sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
context (dict): info about this sample processing
Returns:
result (dict): a processed sample
"""
if isinstance(sample, Sequence): # for batch_size
for i in range(len(sample)):
sample[i] = self.apply(sample[i], context)
else:
# image.shape changed
sample = self.apply(sample, context)
return sample
def __str__(self):
return str(self._id)
@register_keypointop
class CropAndFlipImages(object):
"""Crop all images"""
def __init__(self, crop_range, flip_pairs=None):
super(CropAndFlipImages, self).__init__()
self.crop_range = crop_range
self.flip_pairs = flip_pairs
def __call__(self, records): # tuple
images = records["image"]
images = images[:, :, ::-1, :]
images = images[:, :, self.crop_range[0]:self.crop_range[1]]
records["image"] = images
if "kps2d" in records.keys():
kps2d = records["kps2d"]
width, height = images.shape[2], images.shape[1]
kps2d = np.array(kps2d)
kps2d[:, :, 0] = kps2d[:, :, 0] - self.crop_range[0]
for pair in self.flip_pairs:
kps2d[:, pair[0], :], kps2d[:,pair[1], :] = \
kps2d[:,pair[1], :], kps2d[:,pair[0], :].copy()
records["kps2d"] = kps2d
return records
@register_op
class PermuteImages(BaseOperator):
def __init__(self):
"""
Change the channel to be (batch_size, C, H, W) #(6, 3, 1080, 1920)
"""
super(PermuteImages, self).__init__()
def apply(self, sample, context=None):
images = sample["image"]
images = images.transpose((0, 3, 1, 2))
sample["image"] = images
return sample
@register_keypointop
class RandomFlipHalfBody3DTransformImages(object):
"""apply data augment to images and coords
to achieve the flip, scale, rotate and half body transform effect for training image
Args:
trainsize (list):[w, h], Image target size
upper_body_ids (list): The upper body joint ids
flip_pairs (list): The left-right joints exchange order list
pixel_std (int): The pixel std of the scale
scale (float): The scale factor to transform the image
rot (int): The rotate factor to transform the image
num_joints_half_body (int): The joints threshold of the half body transform
prob_half_body (float): The threshold of the half body transform
flip (bool): Whether to flip the image
Returns:
records(dict): contain the image and coords after tranformed
"""
def __init__(self,
trainsize,
upper_body_ids,
flip_pairs,
pixel_std,
scale=0.35,
rot=40,
num_joints_half_body=8,
prob_half_body=0.3,
flip=True,
rot_prob=0.6,
do_occlusion=False):
super(RandomFlipHalfBody3DTransformImages, self).__init__()
self.trainsize = trainsize
self.upper_body_ids = upper_body_ids
self.flip_pairs = flip_pairs
self.pixel_std = pixel_std
self.scale = scale
self.rot = rot
self.num_joints_half_body = num_joints_half_body
self.prob_half_body = prob_half_body
self.flip = flip
self.aspect_ratio = trainsize[0] * 1.0 / trainsize[1]
self.rot_prob = rot_prob
self.do_occlusion = do_occlusion
def halfbody_transform(self, joints, joints_vis):
upper_joints = []
lower_joints = []
for joint_id in range(joints.shape[0]):
if joints_vis[joint_id][0] > 0:
if joint_id in self.upper_body_ids:
upper_joints.append(joints[joint_id])
else:
lower_joints.append(joints[joint_id])
if np.random.randn() < 0.5 and len(upper_joints) > 2:
selected_joints = upper_joints
else:
selected_joints = lower_joints if len(
lower_joints) > 2 else upper_joints
if len(selected_joints) < 2:
return None, None
selected_joints = np.array(selected_joints, dtype=np.float32)
center = selected_joints.mean(axis=0)[:2]
left_top = np.amin(selected_joints, axis=0)
right_bottom = np.amax(selected_joints, axis=0)
w = right_bottom[0] - left_top[0]
h = right_bottom[1] - left_top[1]
if w > self.aspect_ratio * h:
h = w * 1.0 / self.aspect_ratio
elif w < self.aspect_ratio * h:
w = h * self.aspect_ratio
scale = np.array(
[w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
dtype=np.float32)
scale = scale * 1.5
return center, scale
def flip_joints(self, joints, joints_vis, width, matched_parts, kps2d=None):
# joints: (6, 24, 3),(num_frames, num_joints, 3)
joints[:, :, 0] = width - joints[:, :, 0] - 1 # x
if kps2d is not None:
kps2d[:, :, 0] = width - kps2d[:, :, 0] - 1
for pair in matched_parts:
joints[:, pair[0], :], joints[:,pair[1], :] = \
joints[:,pair[1], :], joints[:,pair[0], :].copy()
joints_vis[:,pair[0], :], joints_vis[:,pair[1], :] = \
joints_vis[:,pair[1], :], joints_vis[:,pair[0], :].copy()
if kps2d is not None:
kps2d[:, pair[0], :], kps2d[:,pair[1], :] = \
kps2d[:,pair[1], :], kps2d[:,pair[0], :].copy()
# move to zero
joints -= joints[:, [0], :] # (batch_size, 24, 3),numpy.ndarray
return joints, joints_vis, kps2d
def __call__(self, records):
images = records[
'image'] #kps3d, kps3d_vis, images. images.shape(num_frames, width, height, 3)
joints = records['kps3d']
joints_vis = records['kps3d_vis']
kps2d = None
if 'kps2d' in records.keys():
kps2d = records['kps2d']
if self.flip and np.random.random() <= 0.5:
images = images[:, :, ::-1, :] # 图像水平翻转 (6, 1080, 810, 3)
joints, joints_vis, kps2d = self.flip_joints(
joints, joints_vis, images.shape[2], self.flip_pairs,
kps2d) # 关键点左右对称翻转
occlusion = False
if self.do_occlusion and random.random() <= 0.5: # 随机遮挡
height = images[0].shape[0]
width = images[0].shape[1]
occlusion = True
while True:
area_min = 0.0
area_max = 0.2
synth_area = (random.random() *
(area_max - area_min) + area_min) * width * height
ratio_min = 0.3
ratio_max = 1 / 0.3
synth_ratio = (random.random() *
(ratio_max - ratio_min) + ratio_min)
synth_h = math.sqrt(synth_area * synth_ratio)
synth_w = math.sqrt(synth_area / synth_ratio)
synth_xmin = random.random() * (width - synth_w - 1)
synth_ymin = random.random() * (height - synth_h - 1)
if synth_xmin >= 0 and synth_ymin >= 0 and synth_xmin + synth_w < width and synth_ymin + synth_h < height:
xmin = int(synth_xmin)
ymin = int(synth_ymin)
w = int(synth_w)
h = int(synth_h)
mask = np.random.rand(h, w, 3) * 255
images[:, ymin:ymin + h, xmin:xmin + w, :] = mask[
None, :, :, :]
break
records['image'] = images
records['kps3d'] = joints
records['kps3d_vis'] = joints_vis
if kps2d is not None:
records['kps2d'] = kps2d
return records

View File

@@ -0,0 +1,627 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
from numbers import Integral
import cv2
import copy
import numpy as np
import random
import math
from .operators import BaseOperator, register_op
from .batch_operators import Gt2TTFTarget
from ppdet.modeling.bbox_utils import bbox_iou_np_expand
from ppdet.utils.logger import setup_logger
from .op_helper import gaussian_radius
logger = setup_logger(__name__)
__all__ = [
'RGBReverse', 'LetterBoxResize', 'MOTRandomAffine', 'Gt2JDETargetThres',
'Gt2JDETargetMax', 'Gt2FairMOTTarget'
]
@register_op
class RGBReverse(BaseOperator):
"""RGB to BGR, or BGR to RGB, sensitive to MOTRandomAffine
"""
def __init__(self):
super(RGBReverse, self).__init__()
def apply(self, sample, context=None):
im = sample['image']
sample['image'] = np.ascontiguousarray(im[:, :, ::-1])
return sample
@register_op
class LetterBoxResize(BaseOperator):
def __init__(self, target_size):
"""
Resize image to target size, convert normalized xywh to pixel xyxy
format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
Args:
target_size (int|list): image target size.
"""
super(LetterBoxResize, self).__init__()
if not isinstance(target_size, (Integral, Sequence)):
raise TypeError(
"Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
format(type(target_size)))
if isinstance(target_size, Integral):
target_size = [target_size, target_size]
self.target_size = target_size
def apply_image(self, img, height, width, color=(127.5, 127.5, 127.5)):
# letterbox: resize a rectangular image to a padded rectangular
shape = img.shape[:2] # [height, width]
ratio_h = float(height) / shape[0]
ratio_w = float(width) / shape[1]
ratio = min(ratio_h, ratio_w)
new_shape = (round(shape[1] * ratio),
round(shape[0] * ratio)) # [width, height]
padw = (width - new_shape[0]) / 2
padh = (height - new_shape[1]) / 2
top, bottom = round(padh - 0.1), round(padh + 0.1)
left, right = round(padw - 0.1), round(padw + 0.1)
img = cv2.resize(
img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
img = cv2.copyMakeBorder(
img, top, bottom, left, right, cv2.BORDER_CONSTANT,
value=color) # padded rectangular
return img, ratio, padw, padh
def apply_bbox(self, bbox0, h, w, ratio, padw, padh):
bboxes = bbox0.copy()
bboxes[:, 0] = ratio * w * (bbox0[:, 0] - bbox0[:, 2] / 2) + padw
bboxes[:, 1] = ratio * h * (bbox0[:, 1] - bbox0[:, 3] / 2) + padh
bboxes[:, 2] = ratio * w * (bbox0[:, 0] + bbox0[:, 2] / 2) + padw
bboxes[:, 3] = ratio * h * (bbox0[:, 1] + bbox0[:, 3] / 2) + padh
return bboxes
def apply(self, sample, context=None):
""" Resize the image numpy.
"""
im = sample['image']
h, w = sample['im_shape']
if not isinstance(im, np.ndarray):
raise TypeError("{}: image type is not numpy.".format(self))
if len(im.shape) != 3:
from PIL import UnidentifiedImageError
raise UnidentifiedImageError(
'{}: image is not 3-dimensional.'.format(self))
# apply image
height, width = self.target_size
img, ratio, padw, padh = self.apply_image(
im, height=height, width=width)
sample['image'] = img
new_shape = (round(h * ratio), round(w * ratio))
sample['im_shape'] = np.asarray(new_shape, dtype=np.float32)
sample['scale_factor'] = np.asarray([ratio, ratio], dtype=np.float32)
# apply bbox
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], h, w, ratio,
padw, padh)
return sample
@register_op
class MOTRandomAffine(BaseOperator):
"""
Affine transform to image and coords to achieve the rotate, scale and
shift effect for training image.
Args:
degrees (list[2]): the rotate range to apply, transform range is [min, max]
translate (list[2]): the translate range to apply, transform range is [min, max]
scale (list[2]): the scale range to apply, transform range is [min, max]
shear (list[2]): the shear range to apply, transform range is [min, max]
borderValue (list[3]): value used in case of a constant border when appling
the perspective transformation
reject_outside (bool): reject warped bounding bboxes outside of image
Returns:
records(dict): contain the image and coords after tranformed
"""
def __init__(self,
degrees=(-5, 5),
translate=(0.10, 0.10),
scale=(0.50, 1.20),
shear=(-2, 2),
borderValue=(127.5, 127.5, 127.5),
reject_outside=True):
super(MOTRandomAffine, self).__init__()
self.degrees = degrees
self.translate = translate
self.scale = scale
self.shear = shear
self.borderValue = borderValue
self.reject_outside = reject_outside
def apply(self, sample, context=None):
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
border = 0 # width of added border (optional)
img = sample['image']
height, width = img.shape[0], img.shape[1]
# Rotation and Scale
R = np.eye(3)
a = random.random() * (self.degrees[1] - self.degrees[0]
) + self.degrees[0]
s = random.random() * (self.scale[1] - self.scale[0]) + self.scale[0]
R[:2] = cv2.getRotationMatrix2D(
angle=a, center=(width / 2, height / 2), scale=s)
# Translation
T = np.eye(3)
T[0, 2] = (
random.random() * 2 - 1
) * self.translate[0] * height + border # x translation (pixels)
T[1, 2] = (
random.random() * 2 - 1
) * self.translate[1] * width + border # y translation (pixels)
# Shear
S = np.eye(3)
S[0, 1] = math.tan((random.random() *
(self.shear[1] - self.shear[0]) + self.shear[0]) *
math.pi / 180) # x shear (deg)
S[1, 0] = math.tan((random.random() *
(self.shear[1] - self.shear[0]) + self.shear[0]) *
math.pi / 180) # y shear (deg)
M = S @T @R # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
imw = cv2.warpPerspective(
img,
M,
dsize=(width, height),
flags=cv2.INTER_LINEAR,
borderValue=self.borderValue) # BGR order borderValue
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
targets = sample['gt_bbox']
n = targets.shape[0]
points = targets.copy()
area0 = (points[:, 2] - points[:, 0]) * (
points[:, 3] - points[:, 1])
# warp points
xy = np.ones((n * 4, 3))
xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
n * 4, 2) # x1y1, x2y2, x1y2, x2y1
xy = (xy @M.T)[:, :2].reshape(n, 8)
# create new boxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
xy = np.concatenate(
(x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# apply angle-based reduction
radians = a * math.pi / 180
reduction = max(abs(math.sin(radians)), abs(math.cos(radians)))**0.5
x = (xy[:, 2] + xy[:, 0]) / 2
y = (xy[:, 3] + xy[:, 1]) / 2
w = (xy[:, 2] - xy[:, 0]) * reduction
h = (xy[:, 3] - xy[:, 1]) * reduction
xy = np.concatenate(
(x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
# reject warped points outside of image
if self.reject_outside:
np.clip(xy[:, 0], 0, width, out=xy[:, 0])
np.clip(xy[:, 2], 0, width, out=xy[:, 2])
np.clip(xy[:, 1], 0, height, out=xy[:, 1])
np.clip(xy[:, 3], 0, height, out=xy[:, 3])
w = xy[:, 2] - xy[:, 0]
h = xy[:, 3] - xy[:, 1]
area = w * h
ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
if sum(i) > 0:
sample['gt_bbox'] = xy[i].astype(sample['gt_bbox'].dtype)
sample['gt_class'] = sample['gt_class'][i]
if 'difficult' in sample:
sample['difficult'] = sample['difficult'][i]
if 'gt_ide' in sample:
sample['gt_ide'] = sample['gt_ide'][i]
if 'is_crowd' in sample:
sample['is_crowd'] = sample['is_crowd'][i]
sample['image'] = imw
return sample
else:
return sample
@register_op
class Gt2JDETargetThres(BaseOperator):
__shared__ = ['num_classes']
"""
Generate JDE targets by groud truth data when training
Args:
anchors (list): anchors of JDE model
anchor_masks (list): anchor_masks of JDE model
downsample_ratios (list): downsample ratios of JDE model
ide_thresh (float): thresh of identity, higher is groud truth
fg_thresh (float): thresh of foreground, higher is foreground
bg_thresh (float): thresh of background, lower is background
num_classes (int): number of classes
"""
def __init__(self,
anchors,
anchor_masks,
downsample_ratios,
ide_thresh=0.5,
fg_thresh=0.5,
bg_thresh=0.4,
num_classes=1):
super(Gt2JDETargetThres, self).__init__()
self.anchors = anchors
self.anchor_masks = anchor_masks
self.downsample_ratios = downsample_ratios
self.ide_thresh = ide_thresh
self.fg_thresh = fg_thresh
self.bg_thresh = bg_thresh
self.num_classes = num_classes
def generate_anchor(self, nGh, nGw, anchor_hw):
nA = len(anchor_hw)
yy, xx = np.meshgrid(np.arange(nGh), np.arange(nGw))
mesh = np.stack([xx.T, yy.T], axis=0) # [2, nGh, nGw]
mesh = np.repeat(mesh[None, :], nA, axis=0) # [nA, 2, nGh, nGw]
anchor_offset_mesh = anchor_hw[:, :, None][:, :, :, None]
anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGh, axis=-2)
anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGw, axis=-1)
anchor_mesh = np.concatenate(
[mesh, anchor_offset_mesh], axis=1) # [nA, 4, nGh, nGw]
return anchor_mesh
def encode_delta(self, gt_box_list, fg_anchor_list):
px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \
fg_anchor_list[:, 2], fg_anchor_list[:,3]
gx, gy, gw, gh = gt_box_list[:, 0], gt_box_list[:, 1], \
gt_box_list[:, 2], gt_box_list[:, 3]
dx = (gx - px) / pw
dy = (gy - py) / ph
dw = np.log(gw / pw)
dh = np.log(gh / ph)
return np.stack([dx, dy, dw, dh], axis=1)
def pad_box(self, sample, num_max):
assert 'gt_bbox' in sample
bbox = sample['gt_bbox']
gt_num = len(bbox)
pad_bbox = np.zeros((num_max, 4), dtype=np.float32)
if gt_num > 0:
pad_bbox[:gt_num, :] = bbox[:gt_num, :]
sample['gt_bbox'] = pad_bbox
if 'gt_score' in sample:
pad_score = np.zeros((num_max, ), dtype=np.float32)
if gt_num > 0:
pad_score[:gt_num] = sample['gt_score'][:gt_num, 0]
sample['gt_score'] = pad_score
if 'difficult' in sample:
pad_diff = np.zeros((num_max, ), dtype=np.int32)
if gt_num > 0:
pad_diff[:gt_num] = sample['difficult'][:gt_num, 0]
sample['difficult'] = pad_diff
if 'is_crowd' in sample:
pad_crowd = np.zeros((num_max, ), dtype=np.int32)
if gt_num > 0:
pad_crowd[:gt_num] = sample['is_crowd'][:gt_num, 0]
sample['is_crowd'] = pad_crowd
if 'gt_ide' in sample:
pad_ide = np.zeros((num_max, ), dtype=np.int32)
if gt_num > 0:
pad_ide[:gt_num] = sample['gt_ide'][:gt_num, 0]
sample['gt_ide'] = pad_ide
return sample
def __call__(self, samples, context=None):
assert len(self.anchor_masks) == len(self.downsample_ratios), \
"anchor_masks', and 'downsample_ratios' should have same length."
h, w = samples[0]['image'].shape[1:3]
num_max = 0
for sample in samples:
num_max = max(num_max, len(sample['gt_bbox']))
for sample in samples:
gt_bbox = sample['gt_bbox']
gt_ide = sample['gt_ide']
for i, (anchor_hw, downsample_ratio
) in enumerate(zip(self.anchors, self.downsample_ratios)):
anchor_hw = np.array(
anchor_hw, dtype=np.float32) / downsample_ratio
nA = len(anchor_hw)
nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio)
tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32)
tconf = np.zeros((nA, nGh, nGw), dtype=np.float32)
tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32)
gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy()
gxy[:, 0] = gxy[:, 0] * nGw
gxy[:, 1] = gxy[:, 1] * nGh
gwh[:, 0] = gwh[:, 0] * nGw
gwh[:, 1] = gwh[:, 1] * nGh
gxy[:, 0] = np.clip(gxy[:, 0], 0, nGw - 1)
gxy[:, 1] = np.clip(gxy[:, 1], 0, nGh - 1)
tboxes = np.concatenate([gxy, gwh], axis=1)
anchor_mesh = self.generate_anchor(nGh, nGw, anchor_hw)
anchor_list = np.transpose(anchor_mesh,
(0, 2, 3, 1)).reshape(-1, 4)
iou_pdist = bbox_iou_np_expand(
anchor_list, tboxes, x1y1x2y2=False)
iou_max = np.max(iou_pdist, axis=1)
max_gt_index = np.argmax(iou_pdist, axis=1)
iou_map = iou_max.reshape(nA, nGh, nGw)
gt_index_map = max_gt_index.reshape(nA, nGh, nGw)
id_index = iou_map > self.ide_thresh
fg_index = iou_map > self.fg_thresh
bg_index = iou_map < self.bg_thresh
ign_index = (iou_map < self.fg_thresh) * (
iou_map > self.bg_thresh)
tconf[fg_index] = 1
tconf[bg_index] = 0
tconf[ign_index] = -1
gt_index = gt_index_map[fg_index]
gt_box_list = tboxes[gt_index]
gt_id_list = gt_ide[gt_index_map[id_index]]
if np.sum(fg_index) > 0:
tid[id_index] = gt_id_list
fg_anchor_list = anchor_list.reshape(nA, nGh, nGw,
4)[fg_index]
delta_target = self.encode_delta(gt_box_list,
fg_anchor_list)
tbox[fg_index] = delta_target
sample['tbox{}'.format(i)] = tbox
sample['tconf{}'.format(i)] = tconf
sample['tide{}'.format(i)] = tid
sample.pop('gt_class')
sample = self.pad_box(sample, num_max)
return samples
@register_op
class Gt2JDETargetMax(BaseOperator):
__shared__ = ['num_classes']
"""
Generate JDE targets by groud truth data when evaluating
Args:
anchors (list): anchors of JDE model
anchor_masks (list): anchor_masks of JDE model
downsample_ratios (list): downsample ratios of JDE model
max_iou_thresh (float): iou thresh for high quality anchor
num_classes (int): number of classes
"""
def __init__(self,
anchors,
anchor_masks,
downsample_ratios,
max_iou_thresh=0.60,
num_classes=1):
super(Gt2JDETargetMax, self).__init__()
self.anchors = anchors
self.anchor_masks = anchor_masks
self.downsample_ratios = downsample_ratios
self.max_iou_thresh = max_iou_thresh
self.num_classes = num_classes
def __call__(self, samples, context=None):
assert len(self.anchor_masks) == len(self.downsample_ratios), \
"anchor_masks', and 'downsample_ratios' should have same length."
h, w = samples[0]['image'].shape[1:3]
for sample in samples:
gt_bbox = sample['gt_bbox']
gt_ide = sample['gt_ide']
for i, (anchor_hw, downsample_ratio
) in enumerate(zip(self.anchors, self.downsample_ratios)):
anchor_hw = np.array(
anchor_hw, dtype=np.float32) / downsample_ratio
nA = len(anchor_hw)
nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio)
tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32)
tconf = np.zeros((nA, nGh, nGw), dtype=np.float32)
tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32)
gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy()
gxy[:, 0] = gxy[:, 0] * nGw
gxy[:, 1] = gxy[:, 1] * nGh
gwh[:, 0] = gwh[:, 0] * nGw
gwh[:, 1] = gwh[:, 1] * nGh
gi = np.clip(gxy[:, 0], 0, nGw - 1).astype(int)
gj = np.clip(gxy[:, 1], 0, nGh - 1).astype(int)
# iou of targets-anchors (using wh only)
box1 = gwh
box2 = anchor_hw[:, None, :]
inter_area = np.minimum(box1, box2).prod(2)
iou = inter_area / (
box1.prod(1) + box2.prod(2) - inter_area + 1e-16)
# Select best iou_pred and anchor
iou_best = iou.max(0) # best anchor [0-2] for each target
a = np.argmax(iou, axis=0)
# Select best unique target-anchor combinations
iou_order = np.argsort(-iou_best) # best to worst
# Unique anchor selection
u = np.stack((gi, gj, a), 0)[:, iou_order]
_, first_unique = np.unique(u, axis=1, return_index=True)
mask = iou_order[first_unique]
# best anchor must share significant commonality (iou) with target
# TODO: examine arbitrary threshold
idx = mask[iou_best[mask] > self.max_iou_thresh]
if len(idx) > 0:
a_i, gj_i, gi_i = a[idx], gj[idx], gi[idx]
t_box = gt_bbox[idx]
t_id = gt_ide[idx]
if len(t_box.shape) == 1:
t_box = t_box.reshape(1, 4)
gxy, gwh = t_box[:, 0:2].copy(), t_box[:, 2:4].copy()
gxy[:, 0] = gxy[:, 0] * nGw
gxy[:, 1] = gxy[:, 1] * nGh
gwh[:, 0] = gwh[:, 0] * nGw
gwh[:, 1] = gwh[:, 1] * nGh
# XY coordinates
tbox[:, :, :, 0:2][a_i, gj_i, gi_i] = gxy - gxy.astype(int)
# Width and height in yolo method
tbox[:, :, :, 2:4][a_i, gj_i, gi_i] = np.log(gwh /
anchor_hw[a_i])
tconf[a_i, gj_i, gi_i] = 1
tid[a_i, gj_i, gi_i] = t_id
sample['tbox{}'.format(i)] = tbox
sample['tconf{}'.format(i)] = tconf
sample['tide{}'.format(i)] = tid
class Gt2FairMOTTarget(Gt2TTFTarget):
__shared__ = ['num_classes']
"""
Generate FairMOT targets by ground truth data.
Difference between Gt2FairMOTTarget and Gt2TTFTarget are:
1. the gaussian kernal radius to generate a heatmap.
2. the targets needed during training.
Args:
num_classes(int): the number of classes.
down_ratio(int): the down ratio from images to heatmap, 4 by default.
max_objs(int): the maximum number of ground truth objects in a image, 500 by default.
"""
def __init__(self, num_classes=1, down_ratio=4, max_objs=500):
super(Gt2TTFTarget, self).__init__()
self.down_ratio = down_ratio
self.num_classes = num_classes
self.max_objs = max_objs
def __call__(self, samples, context=None):
for b_id, sample in enumerate(samples):
output_h = sample['image'].shape[1] // self.down_ratio
output_w = sample['image'].shape[2] // self.down_ratio
heatmap = np.zeros(
(self.num_classes, output_h, output_w), dtype='float32')
bbox_size = np.zeros((self.max_objs, 4), dtype=np.float32)
center_offset = np.zeros((self.max_objs, 2), dtype=np.float32)
index = np.zeros((self.max_objs, ), dtype=np.int64)
index_mask = np.zeros((self.max_objs, ), dtype=np.int32)
reid = np.zeros((self.max_objs, ), dtype=np.int64)
bbox_xys = np.zeros((self.max_objs, 4), dtype=np.float32)
if self.num_classes > 1:
# each category corresponds to a set of track ids
cls_tr_ids = np.zeros(
(self.num_classes, output_h, output_w), dtype=np.int64)
cls_id_map = np.full((output_h, output_w), -1, dtype=np.int64)
gt_bbox = sample['gt_bbox']
gt_class = sample['gt_class']
gt_ide = sample['gt_ide']
for k in range(len(gt_bbox)):
cls_id = gt_class[k][0]
bbox = gt_bbox[k]
ide = gt_ide[k][0]
bbox[[0, 2]] = bbox[[0, 2]] * output_w
bbox[[1, 3]] = bbox[[1, 3]] * output_h
bbox_amodal = copy.deepcopy(bbox)
bbox_amodal[0] = bbox_amodal[0] - bbox_amodal[2] / 2.
bbox_amodal[1] = bbox_amodal[1] - bbox_amodal[3] / 2.
bbox_amodal[2] = bbox_amodal[0] + bbox_amodal[2]
bbox_amodal[3] = bbox_amodal[1] + bbox_amodal[3]
bbox[0] = np.clip(bbox[0], 0, output_w - 1)
bbox[1] = np.clip(bbox[1], 0, output_h - 1)
h = bbox[3]
w = bbox[2]
bbox_xy = copy.deepcopy(bbox)
bbox_xy[0] = bbox_xy[0] - bbox_xy[2] / 2
bbox_xy[1] = bbox_xy[1] - bbox_xy[3] / 2
bbox_xy[2] = bbox_xy[0] + bbox_xy[2]
bbox_xy[3] = bbox_xy[1] + bbox_xy[3]
if h > 0 and w > 0:
radius = gaussian_radius((math.ceil(h), math.ceil(w)), 0.7)
radius = max(0, int(radius))
ct = np.array([bbox[0], bbox[1]], dtype=np.float32)
ct_int = ct.astype(np.int32)
self.draw_truncate_gaussian(heatmap[cls_id], ct_int, radius,
radius)
bbox_size[k] = ct[0] - bbox_amodal[0], ct[1] - bbox_amodal[1], \
bbox_amodal[2] - ct[0], bbox_amodal[3] - ct[1]
index[k] = ct_int[1] * output_w + ct_int[0]
center_offset[k] = ct - ct_int
index_mask[k] = 1
reid[k] = ide
bbox_xys[k] = bbox_xy
if self.num_classes > 1:
cls_id_map[ct_int[1], ct_int[0]] = cls_id
cls_tr_ids[cls_id][ct_int[1]][ct_int[0]] = ide - 1
# track id start from 0
sample['heatmap'] = heatmap
sample['index'] = index
sample['offset'] = center_offset
sample['size'] = bbox_size
sample['index_mask'] = index_mask
sample['reid'] = reid
if self.num_classes > 1:
sample['cls_id_map'] = cls_id_map
sample['cls_tr_ids'] = cls_tr_ids
sample['bbox_xys'] = bbox_xys
sample.pop('is_crowd', None)
sample.pop('difficult', None)
sample.pop('gt_class', None)
sample.pop('gt_bbox', None)
sample.pop('gt_score', None)
sample.pop('gt_ide', None)
return samples

View File

@@ -0,0 +1,494 @@
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# this file contains helper methods for BBOX processing
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import random
import math
import cv2
def meet_emit_constraint(src_bbox, sample_bbox):
center_x = (src_bbox[2] + src_bbox[0]) / 2
center_y = (src_bbox[3] + src_bbox[1]) / 2
if center_x >= sample_bbox[0] and \
center_x <= sample_bbox[2] and \
center_y >= sample_bbox[1] and \
center_y <= sample_bbox[3]:
return True
return False
def clip_bbox(src_bbox):
src_bbox[0] = max(min(src_bbox[0], 1.0), 0.0)
src_bbox[1] = max(min(src_bbox[1], 1.0), 0.0)
src_bbox[2] = max(min(src_bbox[2], 1.0), 0.0)
src_bbox[3] = max(min(src_bbox[3], 1.0), 0.0)
return src_bbox
def bbox_area(src_bbox):
if src_bbox[2] < src_bbox[0] or src_bbox[3] < src_bbox[1]:
return 0.
else:
width = src_bbox[2] - src_bbox[0]
height = src_bbox[3] - src_bbox[1]
return width * height
def is_overlap(object_bbox, sample_bbox):
if object_bbox[0] >= sample_bbox[2] or \
object_bbox[2] <= sample_bbox[0] or \
object_bbox[1] >= sample_bbox[3] or \
object_bbox[3] <= sample_bbox[1]:
return False
else:
return True
def filter_and_process(sample_bbox, bboxes, labels, scores=None,
keypoints=None):
new_bboxes = []
new_labels = []
new_scores = []
new_keypoints = []
new_kp_ignore = []
for i in range(len(bboxes)):
new_bbox = [0, 0, 0, 0]
obj_bbox = [bboxes[i][0], bboxes[i][1], bboxes[i][2], bboxes[i][3]]
if not meet_emit_constraint(obj_bbox, sample_bbox):
continue
if not is_overlap(obj_bbox, sample_bbox):
continue
sample_width = sample_bbox[2] - sample_bbox[0]
sample_height = sample_bbox[3] - sample_bbox[1]
new_bbox[0] = (obj_bbox[0] - sample_bbox[0]) / sample_width
new_bbox[1] = (obj_bbox[1] - sample_bbox[1]) / sample_height
new_bbox[2] = (obj_bbox[2] - sample_bbox[0]) / sample_width
new_bbox[3] = (obj_bbox[3] - sample_bbox[1]) / sample_height
new_bbox = clip_bbox(new_bbox)
if bbox_area(new_bbox) > 0:
new_bboxes.append(new_bbox)
new_labels.append([labels[i][0]])
if scores is not None:
new_scores.append([scores[i][0]])
if keypoints is not None:
sample_keypoint = keypoints[0][i]
for j in range(len(sample_keypoint)):
kp_len = sample_height if j % 2 else sample_width
sample_coord = sample_bbox[1] if j % 2 else sample_bbox[0]
sample_keypoint[j] = (
sample_keypoint[j] - sample_coord) / kp_len
sample_keypoint[j] = max(min(sample_keypoint[j], 1.0), 0.0)
new_keypoints.append(sample_keypoint)
new_kp_ignore.append(keypoints[1][i])
bboxes = np.array(new_bboxes)
labels = np.array(new_labels)
scores = np.array(new_scores)
if keypoints is not None:
keypoints = np.array(new_keypoints)
new_kp_ignore = np.array(new_kp_ignore)
return bboxes, labels, scores, (keypoints, new_kp_ignore)
return bboxes, labels, scores
def bbox_area_sampling(bboxes, labels, scores, target_size, min_size):
new_bboxes = []
new_labels = []
new_scores = []
for i, bbox in enumerate(bboxes):
w = float((bbox[2] - bbox[0]) * target_size)
h = float((bbox[3] - bbox[1]) * target_size)
if w * h < float(min_size * min_size):
continue
else:
new_bboxes.append(bbox)
new_labels.append(labels[i])
if scores is not None and scores.size != 0:
new_scores.append(scores[i])
bboxes = np.array(new_bboxes)
labels = np.array(new_labels)
scores = np.array(new_scores)
return bboxes, labels, scores
def generate_sample_bbox(sampler):
scale = np.random.uniform(sampler[2], sampler[3])
aspect_ratio = np.random.uniform(sampler[4], sampler[5])
aspect_ratio = max(aspect_ratio, (scale**2.0))
aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))
bbox_width = scale * (aspect_ratio**0.5)
bbox_height = scale / (aspect_ratio**0.5)
xmin_bound = 1 - bbox_width
ymin_bound = 1 - bbox_height
xmin = np.random.uniform(0, xmin_bound)
ymin = np.random.uniform(0, ymin_bound)
xmax = xmin + bbox_width
ymax = ymin + bbox_height
sampled_bbox = [xmin, ymin, xmax, ymax]
return sampled_bbox
def generate_sample_bbox_square(sampler, image_width, image_height):
scale = np.random.uniform(sampler[2], sampler[3])
aspect_ratio = np.random.uniform(sampler[4], sampler[5])
aspect_ratio = max(aspect_ratio, (scale**2.0))
aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))
bbox_width = scale * (aspect_ratio**0.5)
bbox_height = scale / (aspect_ratio**0.5)
if image_height < image_width:
bbox_width = bbox_height * image_height / image_width
else:
bbox_height = bbox_width * image_width / image_height
xmin_bound = 1 - bbox_width
ymin_bound = 1 - bbox_height
xmin = np.random.uniform(0, xmin_bound)
ymin = np.random.uniform(0, ymin_bound)
xmax = xmin + bbox_width
ymax = ymin + bbox_height
sampled_bbox = [xmin, ymin, xmax, ymax]
return sampled_bbox
def data_anchor_sampling(bbox_labels, image_width, image_height, scale_array,
resize_width):
num_gt = len(bbox_labels)
# np.random.randint range: [low, high)
rand_idx = np.random.randint(0, num_gt) if num_gt != 0 else 0
if num_gt != 0:
norm_xmin = bbox_labels[rand_idx][0]
norm_ymin = bbox_labels[rand_idx][1]
norm_xmax = bbox_labels[rand_idx][2]
norm_ymax = bbox_labels[rand_idx][3]
xmin = norm_xmin * image_width
ymin = norm_ymin * image_height
wid = image_width * (norm_xmax - norm_xmin)
hei = image_height * (norm_ymax - norm_ymin)
range_size = 0
area = wid * hei
for scale_ind in range(0, len(scale_array) - 1):
if area > scale_array[scale_ind] ** 2 and area < \
scale_array[scale_ind + 1] ** 2:
range_size = scale_ind + 1
break
if area > scale_array[len(scale_array) - 2]**2:
range_size = len(scale_array) - 2
scale_choose = 0.0
if range_size == 0:
rand_idx_size = 0
else:
# np.random.randint range: [low, high)
rng_rand_size = np.random.randint(0, range_size + 1)
rand_idx_size = rng_rand_size % (range_size + 1)
if rand_idx_size == range_size:
min_resize_val = scale_array[rand_idx_size] / 2.0
max_resize_val = min(2.0 * scale_array[rand_idx_size],
2 * math.sqrt(wid * hei))
scale_choose = random.uniform(min_resize_val, max_resize_val)
else:
min_resize_val = scale_array[rand_idx_size] / 2.0
max_resize_val = 2.0 * scale_array[rand_idx_size]
scale_choose = random.uniform(min_resize_val, max_resize_val)
sample_bbox_size = wid * resize_width / scale_choose
w_off_orig = 0.0
h_off_orig = 0.0
if sample_bbox_size < max(image_height, image_width):
if wid <= sample_bbox_size:
w_off_orig = np.random.uniform(xmin + wid - sample_bbox_size,
xmin)
else:
w_off_orig = np.random.uniform(xmin,
xmin + wid - sample_bbox_size)
if hei <= sample_bbox_size:
h_off_orig = np.random.uniform(ymin + hei - sample_bbox_size,
ymin)
else:
h_off_orig = np.random.uniform(ymin,
ymin + hei - sample_bbox_size)
else:
w_off_orig = np.random.uniform(image_width - sample_bbox_size, 0.0)
h_off_orig = np.random.uniform(image_height - sample_bbox_size, 0.0)
w_off_orig = math.floor(w_off_orig)
h_off_orig = math.floor(h_off_orig)
# Figure out top left coordinates.
w_off = float(w_off_orig / image_width)
h_off = float(h_off_orig / image_height)
sampled_bbox = [
w_off, h_off, w_off + float(sample_bbox_size / image_width),
h_off + float(sample_bbox_size / image_height)
]
return sampled_bbox
else:
return 0
def jaccard_overlap(sample_bbox, object_bbox):
if sample_bbox[0] >= object_bbox[2] or \
sample_bbox[2] <= object_bbox[0] or \
sample_bbox[1] >= object_bbox[3] or \
sample_bbox[3] <= object_bbox[1]:
return 0
intersect_xmin = max(sample_bbox[0], object_bbox[0])
intersect_ymin = max(sample_bbox[1], object_bbox[1])
intersect_xmax = min(sample_bbox[2], object_bbox[2])
intersect_ymax = min(sample_bbox[3], object_bbox[3])
intersect_size = (intersect_xmax - intersect_xmin) * (
intersect_ymax - intersect_ymin)
sample_bbox_size = bbox_area(sample_bbox)
object_bbox_size = bbox_area(object_bbox)
overlap = intersect_size / (
sample_bbox_size + object_bbox_size - intersect_size)
return overlap
def intersect_bbox(bbox1, bbox2):
if bbox2[0] > bbox1[2] or bbox2[2] < bbox1[0] or \
bbox2[1] > bbox1[3] or bbox2[3] < bbox1[1]:
intersection_box = [0.0, 0.0, 0.0, 0.0]
else:
intersection_box = [
max(bbox1[0], bbox2[0]), max(bbox1[1], bbox2[1]),
min(bbox1[2], bbox2[2]), min(bbox1[3], bbox2[3])
]
return intersection_box
def bbox_coverage(bbox1, bbox2):
inter_box = intersect_bbox(bbox1, bbox2)
intersect_size = bbox_area(inter_box)
if intersect_size > 0:
bbox1_size = bbox_area(bbox1)
return intersect_size / bbox1_size
else:
return 0.
def satisfy_sample_constraint(sampler,
sample_bbox,
gt_bboxes,
satisfy_all=False):
if sampler[6] == 0 and sampler[7] == 0:
return True
satisfied = []
for i in range(len(gt_bboxes)):
object_bbox = [
gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3]
]
overlap = jaccard_overlap(sample_bbox, object_bbox)
if sampler[6] != 0 and \
overlap < sampler[6]:
satisfied.append(False)
continue
if sampler[7] != 0 and \
overlap > sampler[7]:
satisfied.append(False)
continue
satisfied.append(True)
if not satisfy_all:
return True
if satisfy_all:
return np.all(satisfied)
else:
return False
def satisfy_sample_constraint_coverage(sampler, sample_bbox, gt_bboxes):
if sampler[6] == 0 and sampler[7] == 0:
has_jaccard_overlap = False
else:
has_jaccard_overlap = True
if sampler[8] == 0 and sampler[9] == 0:
has_object_coverage = False
else:
has_object_coverage = True
if not has_jaccard_overlap and not has_object_coverage:
return True
found = False
for i in range(len(gt_bboxes)):
object_bbox = [
gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3]
]
if has_jaccard_overlap:
overlap = jaccard_overlap(sample_bbox, object_bbox)
if sampler[6] != 0 and \
overlap < sampler[6]:
continue
if sampler[7] != 0 and \
overlap > sampler[7]:
continue
found = True
if has_object_coverage:
object_coverage = bbox_coverage(object_bbox, sample_bbox)
if sampler[8] != 0 and \
object_coverage < sampler[8]:
continue
if sampler[9] != 0 and \
object_coverage > sampler[9]:
continue
found = True
if found:
return True
return found
def crop_image_sampling(img, sample_bbox, image_width, image_height,
target_size):
# no clipping here
xmin = int(sample_bbox[0] * image_width)
xmax = int(sample_bbox[2] * image_width)
ymin = int(sample_bbox[1] * image_height)
ymax = int(sample_bbox[3] * image_height)
w_off = xmin
h_off = ymin
width = xmax - xmin
height = ymax - ymin
cross_xmin = max(0.0, float(w_off))
cross_ymin = max(0.0, float(h_off))
cross_xmax = min(float(w_off + width - 1.0), float(image_width))
cross_ymax = min(float(h_off + height - 1.0), float(image_height))
cross_width = cross_xmax - cross_xmin
cross_height = cross_ymax - cross_ymin
roi_xmin = 0 if w_off >= 0 else abs(w_off)
roi_ymin = 0 if h_off >= 0 else abs(h_off)
roi_width = cross_width
roi_height = cross_height
roi_y1 = int(roi_ymin)
roi_y2 = int(roi_ymin + roi_height)
roi_x1 = int(roi_xmin)
roi_x2 = int(roi_xmin + roi_width)
cross_y1 = int(cross_ymin)
cross_y2 = int(cross_ymin + cross_height)
cross_x1 = int(cross_xmin)
cross_x2 = int(cross_xmin + cross_width)
sample_img = np.zeros((height, width, 3))
sample_img[roi_y1: roi_y2, roi_x1: roi_x2] = \
img[cross_y1: cross_y2, cross_x1: cross_x2]
sample_img = cv2.resize(
sample_img, (target_size, target_size), interpolation=cv2.INTER_AREA)
return sample_img
def is_poly(segm):
assert isinstance(segm, (list, dict)), \
"Invalid segm type: {}".format(type(segm))
return isinstance(segm, list)
def gaussian_radius(bbox_size, min_overlap):
height, width = bbox_size
a1 = 1
b1 = (height + width)
c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
sq1 = np.sqrt(b1**2 - 4 * a1 * c1)
radius1 = (b1 + sq1) / (2 * a1)
a2 = 4
b2 = 2 * (height + width)
c2 = (1 - min_overlap) * width * height
sq2 = np.sqrt(b2**2 - 4 * a2 * c2)
radius2 = (b2 + sq2) / 2
a3 = 4 * min_overlap
b3 = -2 * min_overlap * (height + width)
c3 = (min_overlap - 1) * width * height
sq3 = np.sqrt(b3**2 - 4 * a3 * c3)
radius3 = (b3 + sq3) / 2
return min(radius1, radius2, radius3)
def draw_gaussian(heatmap, center, radius, k=1, delte=6):
diameter = 2 * radius + 1
sigma = diameter / delte
gaussian = gaussian2D((diameter, diameter), sigma_x=sigma, sigma_y=sigma)
x, y = center
height, width = heatmap.shape[0:2]
left, right = min(x, radius), min(width - x, radius + 1)
top, bottom = min(y, radius), min(height - y, radius + 1)
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:
radius + right]
np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
def gaussian2D(shape, sigma_x=1, sigma_y=1):
m, n = [(ss - 1.) / 2. for ss in shape]
y, x = np.ogrid[-m:m + 1, -n:n + 1]
h = np.exp(-(x * x / (2 * sigma_x * sigma_x) + y * y / (2 * sigma_y *
sigma_y)))
h[h < np.finfo(h.dtype).eps * h.max()] = 0
return h
def draw_umich_gaussian(heatmap, center, radius, k=1):
"""
draw_umich_gaussian, refer to https://github.com/xingyizhou/CenterNet/blob/master/src/lib/utils/image.py#L126
"""
diameter = 2 * radius + 1
gaussian = gaussian2D(
(diameter, diameter), sigma_x=diameter / 6, sigma_y=diameter / 6)
x, y = int(center[0]), int(center[1])
height, width = heatmap.shape[0:2]
left, right = min(x, radius), min(width - x, radius + 1)
top, bottom = min(y, radius), min(height - y, radius + 1)
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:
radius + right]
if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:
np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
return heatmap
def get_border(border, size):
i = 1
while size - border // i <= border // i:
i *= 2
return border // i

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,480 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
try:
from collections.abc import Sequence
except Exception:
from collections import Sequence
from numbers import Number, Integral
import cv2
import numpy as np
import math
import copy
from .operators import register_op, BaseOperator
from ppdet.modeling.rbox_utils import poly2rbox_le135_np, poly2rbox_oc_np, rbox2poly_np
from ppdet.utils.logger import setup_logger
from ppdet.utils.compact import imagedraw_textsize_c
logger = setup_logger(__name__)
@register_op
class RRotate(BaseOperator):
""" Rotate Image, Polygon, Box
Args:
scale (float): rotate scale
angle (float): rotate angle
fill_value (int, tuple): fill color
auto_bound (bool): whether auto bound or not
"""
def __init__(self, scale=1.0, angle=0., fill_value=0., auto_bound=True):
super(RRotate, self).__init__()
self.scale = scale
self.angle = angle
self.fill_value = fill_value
self.auto_bound = auto_bound
def get_rotated_matrix(self, angle, scale, h, w):
center = ((w - 1) * 0.5, (h - 1) * 0.5)
matrix = cv2.getRotationMatrix2D(center, -angle, scale)
# calculate the new size
cos = np.abs(matrix[0, 0])
sin = np.abs(matrix[0, 1])
new_w = h * sin + w * cos
new_h = h * cos + w * sin
# calculate offset
n_w = int(np.round(new_w))
n_h = int(np.round(new_h))
if self.auto_bound:
ratio = min(w / n_w, h / n_h)
matrix = cv2.getRotationMatrix2D(center, -angle, ratio)
else:
matrix[0, 2] += (new_w - w) * 0.5
matrix[1, 2] += (new_h - h) * 0.5
w = n_w
h = n_h
return matrix, h, w
def get_rect_from_pts(self, pts, h, w):
""" get minimum rectangle of points
"""
assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
min_x, min_y = np.min(pts[:, 0::2], axis=1), np.min(pts[:, 1::2],
axis=1)
max_x, max_y = np.max(pts[:, 0::2], axis=1), np.max(pts[:, 1::2],
axis=1)
min_x, min_y = np.clip(min_x, 0, w), np.clip(min_y, 0, h)
max_x, max_y = np.clip(max_x, 0, w), np.clip(max_y, 0, h)
boxes = np.stack([min_x, min_y, max_x, max_y], axis=-1)
return boxes
def apply_image(self, image, matrix, h, w):
return cv2.warpAffine(
image, matrix, (w, h), borderValue=self.fill_value)
def apply_pts(self, pts, matrix, h, w):
assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
# n is number of samples and m is two times the number of points due to (x, y)
_, m = pts.shape
# transpose points
pts_ = pts.reshape(-1, 2).T
# pad 1 to convert the points to homogeneous coordinates
padding = np.ones((1, pts_.shape[1]), pts.dtype)
rotated_pts = np.matmul(matrix, np.concatenate((pts_, padding), axis=0))
return rotated_pts[:2, :].T.reshape(-1, m)
def apply(self, sample, context=None):
image = sample['image']
h, w = image.shape[:2]
matrix, h, w = self.get_rotated_matrix(self.angle, self.scale, h, w)
sample['image'] = self.apply_image(image, matrix, h, w)
polys = sample['gt_poly']
# TODO: segment or keypoint to be processed
if len(polys) > 0:
pts = self.apply_pts(polys, matrix, h, w)
sample['gt_poly'] = pts
sample['gt_bbox'] = self.get_rect_from_pts(pts, h, w)
return sample
@register_op
class RandomRRotate(BaseOperator):
""" Random Rotate Image
Args:
scale (float, tuple, list): rotate scale
scale_mode (str): mode of scale, [range, value, None]
angle (float, tuple, list): rotate angle
angle_mode (str): mode of angle, [range, value, None]
fill_value (float, tuple, list): fill value
rotate_prob (float): probability of rotation
auto_bound (bool): whether auto bound or not
"""
def __init__(self,
scale=1.0,
scale_mode=None,
angle=0.,
angle_mode=None,
fill_value=0.,
rotate_prob=1.0,
auto_bound=True):
super(RandomRRotate, self).__init__()
self.scale = scale
self.scale_mode = scale_mode
self.angle = angle
self.angle_mode = angle_mode
self.fill_value = fill_value
self.rotate_prob = rotate_prob
self.auto_bound = auto_bound
def get_angle(self, angle, angle_mode):
assert not angle_mode or angle_mode in [
'range', 'value'
], 'angle mode should be in [range, value, None]'
if not angle_mode:
return angle
elif angle_mode == 'range':
low, high = angle
return np.random.rand() * (high - low) + low
elif angle_mode == 'value':
return np.random.choice(angle)
def get_scale(self, scale, scale_mode):
assert not scale_mode or scale_mode in [
'range', 'value'
], 'scale mode should be in [range, value, None]'
if not scale_mode:
return scale
elif scale_mode == 'range':
low, high = scale
return np.random.rand() * (high - low) + low
elif scale_mode == 'value':
return np.random.choice(scale)
def apply(self, sample, context=None):
if np.random.rand() > self.rotate_prob:
return sample
angle = self.get_angle(self.angle, self.angle_mode)
scale = self.get_scale(self.scale, self.scale_mode)
rotator = RRotate(scale, angle, self.fill_value, self.auto_bound)
return rotator(sample)
@register_op
class Poly2RBox(BaseOperator):
""" Polygon to Rotated Box, using new OpenCV definition since 4.5.1
Args:
filter_threshold (int, float): threshold to filter annotations
filter_mode (str): filter mode, ['area', 'edge']
rbox_type (str): rbox type, ['le135', 'oc']
"""
def __init__(self, filter_threshold=4, filter_mode=None, rbox_type='le135'):
super(Poly2RBox, self).__init__()
self.filter_fn = lambda size: self.filter(size, filter_threshold, filter_mode)
self.rbox_fn = poly2rbox_le135_np if rbox_type == 'le135' else poly2rbox_oc_np
def filter(self, size, threshold, mode):
if mode == 'area':
if size[0] * size[1] < threshold:
return True
elif mode == 'edge':
if min(size) < threshold:
return True
return False
def get_rbox(self, polys):
valid_ids, rboxes, bboxes = [], [], []
for i, poly in enumerate(polys):
cx, cy, w, h, angle = self.rbox_fn(poly)
if self.filter_fn((w, h)):
continue
rboxes.append(np.array([cx, cy, w, h, angle], dtype=np.float32))
valid_ids.append(i)
xmin, ymin = min(poly[0::2]), min(poly[1::2])
xmax, ymax = max(poly[0::2]), max(poly[1::2])
bboxes.append(np.array([xmin, ymin, xmax, ymax], dtype=np.float32))
if len(valid_ids) == 0:
rboxes = np.zeros((0, 5), dtype=np.float32)
bboxes = np.zeros((0, 4), dtype=np.float32)
else:
rboxes = np.stack(rboxes)
bboxes = np.stack(bboxes)
return rboxes, bboxes, valid_ids
def apply(self, sample, context=None):
rboxes, bboxes, valid_ids = self.get_rbox(sample['gt_poly'])
sample['gt_rbox'] = rboxes
sample['gt_bbox'] = bboxes
for k in ['gt_class', 'gt_score', 'gt_poly', 'is_crowd', 'difficult']:
if k in sample:
sample[k] = sample[k][valid_ids]
return sample
@register_op
class Poly2Array(BaseOperator):
""" convert gt_poly to np.array for rotated bboxes
"""
def __init__(self):
super(Poly2Array, self).__init__()
def apply(self, sample, context=None):
if 'gt_poly' in sample:
sample['gt_poly'] = np.array(
sample['gt_poly'], dtype=np.float32).reshape((-1, 8))
return sample
@register_op
class RResize(BaseOperator):
def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
"""
Resize image to target size. if keep_ratio is True,
resize the image's long side to the maximum of target_size
if keep_ratio is False, resize the image to target size(h, w)
Args:
target_size (int|list): image target size
keep_ratio (bool): whether keep_ratio or not, default true
interp (int): the interpolation method
"""
super(RResize, self).__init__()
self.keep_ratio = keep_ratio
self.interp = interp
if not isinstance(target_size, (Integral, Sequence)):
raise TypeError(
"Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
format(type(target_size)))
if isinstance(target_size, Integral):
target_size = [target_size, target_size]
self.target_size = target_size
def apply_image(self, image, scale):
im_scale_x, im_scale_y = scale
return cv2.resize(
image,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
def apply_pts(self, pts, scale, size):
im_scale_x, im_scale_y = scale
resize_w, resize_h = size
pts[:, 0::2] *= im_scale_x
pts[:, 1::2] *= im_scale_y
pts[:, 0::2] = np.clip(pts[:, 0::2], 0, resize_w)
pts[:, 1::2] = np.clip(pts[:, 1::2], 0, resize_h)
return pts
def apply(self, sample, context=None):
""" Resize the image numpy.
"""
im = sample['image']
if not isinstance(im, np.ndarray):
raise TypeError("{}: image type is not numpy.".format(self))
if len(im.shape) != 3:
raise ImageError('{}: image is not 3-dimensional.'.format(self))
# apply image
im_shape = im.shape
if self.keep_ratio:
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
target_size_min = np.min(self.target_size)
target_size_max = np.max(self.target_size)
im_scale = min(target_size_min / im_size_min,
target_size_max / im_size_max)
resize_h = im_scale * float(im_shape[0])
resize_w = im_scale * float(im_shape[1])
im_scale_x = im_scale
im_scale_y = im_scale
else:
resize_h, resize_w = self.target_size
im_scale_y = resize_h / im_shape[0]
im_scale_x = resize_w / im_shape[1]
im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
sample['image'] = im.astype(np.float32)
sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
if 'scale_factor' in sample:
scale_factor = sample['scale_factor']
sample['scale_factor'] = np.asarray(
[scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
dtype=np.float32)
else:
sample['scale_factor'] = np.asarray(
[im_scale_y, im_scale_x], dtype=np.float32)
# apply bbox
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'],
[im_scale_x, im_scale_y],
[resize_w, resize_h])
# apply polygon
if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
sample['gt_poly'] = self.apply_pts(sample['gt_poly'],
[im_scale_x, im_scale_y],
[resize_w, resize_h])
return sample
@register_op
class RandomRFlip(BaseOperator):
def __init__(self, prob=0.5):
"""
Args:
prob (float): the probability of flipping image
"""
super(RandomRFlip, self).__init__()
self.prob = prob
if not (isinstance(self.prob, float)):
raise TypeError("{}: input type is invalid.".format(self))
def apply_image(self, image):
return image[:, ::-1, :]
def apply_pts(self, pts, width):
oldx = pts[:, 0::2].copy()
pts[:, 0::2] = width - oldx - 1
return pts
def apply(self, sample, context=None):
"""Filp the image and bounding box.
Operators:
1. Flip the image numpy.
2. Transform the bboxes' x coordinates.
(Must judge whether the coordinates are normalized!)
3. Transform the segmentations' x coordinates.
(Must judge whether the coordinates are normalized!)
Output:
sample: the image, bounding box and segmentation part
in sample are flipped.
"""
if np.random.uniform(0, 1) < self.prob:
im = sample['image']
height, width = im.shape[:2]
im = self.apply_image(im)
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'], width)
if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
sample['gt_poly'] = self.apply_pts(sample['gt_poly'], width)
sample['flipped'] = True
sample['image'] = im
return sample
@register_op
class VisibleRBox(BaseOperator):
"""
In debug mode, visualize images according to `gt_box`.
(Currently only supported when not cropping and flipping image.)
"""
def __init__(self, output_dir='debug'):
super(VisibleRBox, self).__init__()
self.output_dir = output_dir
if not os.path.isdir(output_dir):
os.makedirs(output_dir)
def apply(self, sample, context=None):
image = Image.fromarray(sample['image'].astype(np.uint8))
out_file_name = '{:012d}.jpg'.format(sample['im_id'][0])
width = sample['w']
height = sample['h']
# gt_poly = sample['gt_rbox']
gt_poly = sample['gt_poly']
gt_class = sample['gt_class']
draw = ImageDraw.Draw(image)
for i in range(gt_poly.shape[0]):
x1, y1, x2, y2, x3, y3, x4, y4 = gt_poly[i]
draw.line(
[(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)],
width=2,
fill='green')
# draw label
xmin = min(x1, x2, x3, x4)
ymin = min(y1, y2, y3, y4)
text = str(gt_class[i][0])
tw, th = imagedraw_textsize_c(draw, text)
draw.rectangle(
[(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green')
draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
if 'gt_keypoint' in sample.keys():
gt_keypoint = sample['gt_keypoint']
if self.is_normalized:
for i in range(gt_keypoint.shape[1]):
if i % 2:
gt_keypoint[:, i] = gt_keypoint[:, i] * height
else:
gt_keypoint[:, i] = gt_keypoint[:, i] * width
for i in range(gt_keypoint.shape[0]):
keypoint = gt_keypoint[i]
for j in range(int(keypoint.shape[0] / 2)):
x1 = round(keypoint[2 * j]).astype(np.int32)
y1 = round(keypoint[2 * j + 1]).astype(np.int32)
draw.ellipse(
(x1, y1, x1 + 5, y1 + 5), fill='green', outline='green')
save_path = os.path.join(self.output_dir, out_file_name)
image.save(save_path, quality=95)
return sample
@register_op
class Rbox2Poly(BaseOperator):
"""
Convert rbbox format to poly format.
"""
def __init__(self):
super(Rbox2Poly, self).__init__()
def apply(self, sample, context=None):
assert 'gt_rbox' in sample
assert sample['gt_rbox'].shape[1] == 5
rboxes = sample['gt_rbox']
polys = rbox2poly_np(rboxes)
sample['gt_poly'] = polys
xmin, ymin = polys[:, 0::2].min(1), polys[:, 1::2].min(1)
xmax, ymax = polys[:, 0::2].max(1), polys[:, 1::2].max(1)
sample['gt_bbox'] = np.stack([xmin, ymin, xmin, ymin], axis=1)
return sample

View File

@@ -0,0 +1,72 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import numbers
import numpy as np
try:
from collections.abc import Sequence, Mapping
except:
from collections import Sequence, Mapping
def default_collate_fn(batch):
"""
Default batch collating function for :code:`paddle.io.DataLoader`,
get input data as a list of sample datas, each element in list
if the data of a sample, and sample data should composed of list,
dictionary, string, number, numpy array, this
function will parse input data recursively and stack number,
numpy array and paddle.Tensor datas as batch datas. e.g. for
following input data:
[{'image': np.array(shape=[3, 224, 224]), 'label': 1},
{'image': np.array(shape=[3, 224, 224]), 'label': 3},
{'image': np.array(shape=[3, 224, 224]), 'label': 4},
{'image': np.array(shape=[3, 224, 224]), 'label': 5},]
This default collate function zipped each number and numpy array
field together and stack each field as the batch field as follows:
{'image': np.array(shape=[4, 3, 224, 224]), 'label': np.array([1, 3, 4, 5])}
Args:
batch(list of sample data): batch should be a list of sample data.
Returns:
Batched data: batched each number, numpy array and paddle.Tensor
in input data.
"""
sample = batch[0]
if isinstance(sample, np.ndarray):
batch = np.stack(batch, axis=0)
return batch
elif isinstance(sample, numbers.Number):
batch = np.array(batch)
return batch
elif isinstance(sample, (str, bytes)):
return batch
elif isinstance(sample, Mapping):
return {
key: default_collate_fn([d[key] for d in batch])
for key in sample
}
elif isinstance(sample, Sequence):
sample_fields_num = len(sample)
if not all(len(sample) == sample_fields_num for sample in iter(batch)):
raise RuntimeError(
"fileds number not same among samples in a batch")
return [default_collate_fn(fields) for fields in zip(*batch)]
raise TypeError("batch data con only contains: tensor, numpy.ndarray, "
"dict, list, number, but got {}".format(type(sample)))