更换文档检测模型
This commit is contained in:
21
paddle_detection/ppdet/data/__init__.py
Normal file
21
paddle_detection/ppdet/data/__init__.py
Normal file
@@ -0,0 +1,21 @@
|
||||
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from . import source
|
||||
from . import transform
|
||||
from . import reader
|
||||
|
||||
from .source import *
|
||||
from .transform import *
|
||||
from .reader import *
|
||||
13
paddle_detection/ppdet/data/crop_utils/__init__.py
Normal file
13
paddle_detection/ppdet/data/crop_utils/__init__.py
Normal file
@@ -0,0 +1,13 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
580
paddle_detection/ppdet/data/crop_utils/annotation_cropper.py
Normal file
580
paddle_detection/ppdet/data/crop_utils/annotation_cropper.py
Normal file
@@ -0,0 +1,580 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import copy
|
||||
import math
|
||||
import random
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
from typing import List, Tuple
|
||||
from collections import defaultdict
|
||||
|
||||
from .chip_box_utils import nms, transform_chip_boxes2image_boxes
|
||||
from .chip_box_utils import find_chips_to_cover_overlaped_boxes
|
||||
from .chip_box_utils import transform_chip_box
|
||||
from .chip_box_utils import intersection_over_box
|
||||
|
||||
|
||||
class AnnoCropper(object):
|
||||
def __init__(self,
|
||||
image_target_sizes: List[int],
|
||||
valid_box_ratio_ranges: List[List[float]],
|
||||
chip_target_size: int,
|
||||
chip_target_stride: int,
|
||||
use_neg_chip: bool=False,
|
||||
max_neg_num_per_im: int=8,
|
||||
max_per_img: int=-1,
|
||||
nms_thresh: int=0.5):
|
||||
"""
|
||||
Generate chips by chip_target_size and chip_target_stride.
|
||||
These two parameters just like kernel_size and stride in cnn.
|
||||
|
||||
Each image has its raw size. After resizing, then get its target size.
|
||||
The resizing scale = target_size / raw_size.
|
||||
So are chips of the image.
|
||||
box_ratio = box_raw_size / image_raw_size = box_target_size / image_target_size
|
||||
The 'size' above mentioned is the size of long-side of image, box or chip.
|
||||
|
||||
:param image_target_sizes: [2000, 1000]
|
||||
:param valid_box_ratio_ranges: [[-1, 0.1],[0.08, -1]]
|
||||
:param chip_target_size: 500
|
||||
:param chip_target_stride: 200
|
||||
"""
|
||||
self.target_sizes = image_target_sizes
|
||||
self.valid_box_ratio_ranges = valid_box_ratio_ranges
|
||||
assert len(self.target_sizes) == len(self.valid_box_ratio_ranges)
|
||||
self.scale_num = len(self.target_sizes)
|
||||
self.chip_target_size = chip_target_size # is target size
|
||||
self.chip_target_stride = chip_target_stride # is target stride
|
||||
self.use_neg_chip = use_neg_chip
|
||||
self.max_neg_num_per_im = max_neg_num_per_im
|
||||
self.max_per_img = max_per_img
|
||||
self.nms_thresh = nms_thresh
|
||||
|
||||
def crop_anno_records(self, records: List[dict]):
|
||||
"""
|
||||
The main logic:
|
||||
# foreach record(image):
|
||||
# foreach scale:
|
||||
# 1 generate chips by chip size and stride for each scale
|
||||
# 2 get pos chips
|
||||
# - validate boxes: current scale; h,w >= 1
|
||||
# - find pos chips greedily by valid gt boxes in each scale
|
||||
# - for every valid gt box, find its corresponding pos chips in each scale
|
||||
# 3 get neg chips
|
||||
# - If given proposals, find neg boxes in them which are not in pos chips
|
||||
# - If got neg boxes in last step, we find neg chips and assign neg boxes to neg chips such as 2.
|
||||
# 4 sample neg chips if too much each image
|
||||
# transform this image-scale annotations to chips(pos chips&neg chips) annotations
|
||||
|
||||
:param records, standard coco_record but with extra key `proposals`(Px4), which are predicted by stage1
|
||||
model and maybe have neg boxes in them.
|
||||
:return: new_records, list of dict like
|
||||
{
|
||||
'im_file': 'fake_image1.jpg',
|
||||
'im_id': np.array([1]), # new _global_chip_id as im_id
|
||||
'h': h, # chip height
|
||||
'w': w, # chip width
|
||||
'is_crowd': is_crowd, # Nx1 -> Mx1
|
||||
'gt_class': gt_class, # Nx1 -> Mx1
|
||||
'gt_bbox': gt_bbox, # Nx4 -> Mx4, 4 represents [x1,y1,x2,y2]
|
||||
'gt_poly': gt_poly, # [None]xN -> [None]xM
|
||||
'chip': [x1, y1, x2, y2] # added
|
||||
}
|
||||
|
||||
Attention:
|
||||
------------------------------>x
|
||||
|
|
||||
| (x1,y1)------
|
||||
| | |
|
||||
| | |
|
||||
| | |
|
||||
| | |
|
||||
| | |
|
||||
| ----------
|
||||
| (x2,y2)
|
||||
|
|
||||
↓
|
||||
y
|
||||
|
||||
If we use [x1, y1, x2, y2] to represent boxes or chips,
|
||||
(x1,y1) is the left-top point which is in the box,
|
||||
but (x2,y2) is the right-bottom point which is not in the box.
|
||||
So x1 in [0, w-1], x2 in [1, w], y1 in [0, h-1], y2 in [1,h].
|
||||
And you can use x2-x1 to get width, and you can use image[y1:y2, x1:x2] to get the box area.
|
||||
"""
|
||||
|
||||
self.chip_records = []
|
||||
self._global_chip_id = 1
|
||||
for r in records:
|
||||
self._cur_im_pos_chips = [
|
||||
] # element: (chip, boxes_idx), chip is [x1, y1, x2, y2], boxes_ids is List[int]
|
||||
self._cur_im_neg_chips = [] # element: (chip, neg_box_num)
|
||||
for scale_i in range(self.scale_num):
|
||||
self._get_current_scale_parameters(scale_i, r)
|
||||
|
||||
# Cx4
|
||||
chips = self._create_chips(r['h'], r['w'], self._cur_scale)
|
||||
|
||||
# # dict: chipid->[box_id, ...]
|
||||
pos_chip2boxes_idx = self._get_valid_boxes_and_pos_chips(
|
||||
r['gt_bbox'], chips)
|
||||
|
||||
# dict: chipid->neg_box_num
|
||||
neg_chip2box_num = self._get_neg_boxes_and_chips(
|
||||
chips,
|
||||
list(pos_chip2boxes_idx.keys()), r.get('proposals', None))
|
||||
|
||||
self._add_to_cur_im_chips(chips, pos_chip2boxes_idx,
|
||||
neg_chip2box_num)
|
||||
|
||||
cur_image_records = self._trans_all_chips2annotations(r)
|
||||
self.chip_records.extend(cur_image_records)
|
||||
return self.chip_records
|
||||
|
||||
def _add_to_cur_im_chips(self, chips, pos_chip2boxes_idx, neg_chip2box_num):
|
||||
for pos_chipid, boxes_idx in pos_chip2boxes_idx.items():
|
||||
chip = np.array(chips[pos_chipid]) # copy chips slice
|
||||
self._cur_im_pos_chips.append((chip, boxes_idx))
|
||||
|
||||
if neg_chip2box_num is None:
|
||||
return
|
||||
|
||||
for neg_chipid, neg_box_num in neg_chip2box_num.items():
|
||||
chip = np.array(chips[neg_chipid])
|
||||
self._cur_im_neg_chips.append((chip, neg_box_num))
|
||||
|
||||
def _trans_all_chips2annotations(self, r):
|
||||
gt_bbox = r['gt_bbox']
|
||||
im_file = r['im_file']
|
||||
is_crowd = r['is_crowd']
|
||||
gt_class = r['gt_class']
|
||||
# gt_poly = r['gt_poly'] # [None]xN
|
||||
# remaining keys: im_id, h, w
|
||||
chip_records = self._trans_pos_chips2annotations(im_file, gt_bbox,
|
||||
is_crowd, gt_class)
|
||||
|
||||
if not self.use_neg_chip:
|
||||
return chip_records
|
||||
|
||||
sampled_neg_chips = self._sample_neg_chips()
|
||||
neg_chip_records = self._trans_neg_chips2annotations(im_file,
|
||||
sampled_neg_chips)
|
||||
chip_records.extend(neg_chip_records)
|
||||
return chip_records
|
||||
|
||||
def _trans_pos_chips2annotations(self, im_file, gt_bbox, is_crowd,
|
||||
gt_class):
|
||||
chip_records = []
|
||||
for chip, boxes_idx in self._cur_im_pos_chips:
|
||||
chip_bbox, final_boxes_idx = transform_chip_box(gt_bbox, boxes_idx,
|
||||
chip)
|
||||
x1, y1, x2, y2 = chip
|
||||
chip_h = y2 - y1
|
||||
chip_w = x2 - x1
|
||||
rec = {
|
||||
'im_file': im_file,
|
||||
'im_id': np.array([self._global_chip_id]),
|
||||
'h': chip_h,
|
||||
'w': chip_w,
|
||||
'gt_bbox': chip_bbox,
|
||||
'is_crowd': is_crowd[final_boxes_idx].copy(),
|
||||
'gt_class': gt_class[final_boxes_idx].copy(),
|
||||
# 'gt_poly': [None] * len(final_boxes_idx),
|
||||
'chip': chip
|
||||
}
|
||||
self._global_chip_id += 1
|
||||
chip_records.append(rec)
|
||||
return chip_records
|
||||
|
||||
def _sample_neg_chips(self):
|
||||
pos_num = len(self._cur_im_pos_chips)
|
||||
neg_num = len(self._cur_im_neg_chips)
|
||||
sample_num = min(pos_num + 2, self.max_neg_num_per_im)
|
||||
assert sample_num >= 1
|
||||
if neg_num <= sample_num:
|
||||
return self._cur_im_neg_chips
|
||||
|
||||
candidate_num = int(sample_num * 1.5)
|
||||
candidate_neg_chips = sorted(
|
||||
self._cur_im_neg_chips, key=lambda x: -x[1])[:candidate_num]
|
||||
random.shuffle(candidate_neg_chips)
|
||||
sampled_neg_chips = candidate_neg_chips[:sample_num]
|
||||
return sampled_neg_chips
|
||||
|
||||
def _trans_neg_chips2annotations(self,
|
||||
im_file: str,
|
||||
sampled_neg_chips: List[Tuple]):
|
||||
chip_records = []
|
||||
for chip, neg_box_num in sampled_neg_chips:
|
||||
x1, y1, x2, y2 = chip
|
||||
chip_h = y2 - y1
|
||||
chip_w = x2 - x1
|
||||
rec = {
|
||||
'im_file': im_file,
|
||||
'im_id': np.array([self._global_chip_id]),
|
||||
'h': chip_h,
|
||||
'w': chip_w,
|
||||
'gt_bbox': np.zeros(
|
||||
(0, 4), dtype=np.float32),
|
||||
'is_crowd': np.zeros(
|
||||
(0, 1), dtype=np.int32),
|
||||
'gt_class': np.zeros(
|
||||
(0, 1), dtype=np.int32),
|
||||
# 'gt_poly': [],
|
||||
'chip': chip
|
||||
}
|
||||
self._global_chip_id += 1
|
||||
chip_records.append(rec)
|
||||
return chip_records
|
||||
|
||||
def _get_current_scale_parameters(self, scale_i, r):
|
||||
im_size = max(r['h'], r['w'])
|
||||
im_target_size = self.target_sizes[scale_i]
|
||||
self._cur_im_size, self._cur_im_target_size = im_size, im_target_size
|
||||
self._cur_scale = self._get_current_scale(im_target_size, im_size)
|
||||
self._cur_valid_ratio_range = self.valid_box_ratio_ranges[scale_i]
|
||||
|
||||
def _get_current_scale(self, im_target_size, im_size):
|
||||
return im_target_size / im_size
|
||||
|
||||
def _create_chips(self, h: int, w: int, scale: float):
|
||||
"""
|
||||
Generate chips by chip_target_size and chip_target_stride.
|
||||
These two parameters just like kernel_size and stride in cnn.
|
||||
:return: chips, Cx4, xy in raw size dimension
|
||||
"""
|
||||
chip_size = self.chip_target_size # omit target for simplicity
|
||||
stride = self.chip_target_stride
|
||||
width = int(scale * w)
|
||||
height = int(scale * h)
|
||||
min_chip_location_diff = 20 # in target size
|
||||
|
||||
assert chip_size >= stride
|
||||
chip_overlap = chip_size - stride
|
||||
if (width - chip_overlap
|
||||
) % stride > min_chip_location_diff: # 不能被stride整除的部分比较大,则保留
|
||||
w_steps = max(1, int(math.ceil((width - chip_overlap) / stride)))
|
||||
else: # 不能被stride整除的部分比较小,则丢弃
|
||||
w_steps = max(1, int(math.floor((width - chip_overlap) / stride)))
|
||||
if (height - chip_overlap) % stride > min_chip_location_diff:
|
||||
h_steps = max(1, int(math.ceil((height - chip_overlap) / stride)))
|
||||
else:
|
||||
h_steps = max(1, int(math.floor((height - chip_overlap) / stride)))
|
||||
|
||||
chips = list()
|
||||
for j in range(h_steps):
|
||||
for i in range(w_steps):
|
||||
x1 = i * stride
|
||||
y1 = j * stride
|
||||
x2 = min(x1 + chip_size, width)
|
||||
y2 = min(y1 + chip_size, height)
|
||||
chips.append([x1, y1, x2, y2])
|
||||
|
||||
# check chip size
|
||||
for item in chips:
|
||||
if item[2] - item[0] > chip_size * 1.1 or item[3] - item[
|
||||
1] > chip_size * 1.1:
|
||||
raise ValueError(item)
|
||||
chips = np.array(chips, dtype=np.float32)
|
||||
|
||||
raw_size_chips = chips / scale
|
||||
return raw_size_chips
|
||||
|
||||
def _get_valid_boxes_and_pos_chips(self, gt_bbox, chips):
|
||||
valid_ratio_range = self._cur_valid_ratio_range
|
||||
im_size = self._cur_im_size
|
||||
scale = self._cur_scale
|
||||
# Nx4 N
|
||||
valid_boxes, valid_boxes_idx = self._validate_boxes(
|
||||
valid_ratio_range, im_size, gt_bbox, scale)
|
||||
# dict: chipid->[box_id, ...]
|
||||
pos_chip2boxes_idx = self._find_pos_chips(chips, valid_boxes,
|
||||
valid_boxes_idx)
|
||||
return pos_chip2boxes_idx
|
||||
|
||||
def _validate_boxes(self,
|
||||
valid_ratio_range: List[float],
|
||||
im_size: int,
|
||||
gt_boxes: 'np.array of Nx4',
|
||||
scale: float):
|
||||
"""
|
||||
:return: valid_boxes: Nx4, valid_boxes_idx: N
|
||||
"""
|
||||
ws = (gt_boxes[:, 2] - gt_boxes[:, 0]).astype(np.int32)
|
||||
hs = (gt_boxes[:, 3] - gt_boxes[:, 1]).astype(np.int32)
|
||||
maxs = np.maximum(ws, hs)
|
||||
box_ratio = maxs / im_size
|
||||
mins = np.minimum(ws, hs)
|
||||
target_mins = mins * scale
|
||||
|
||||
low = valid_ratio_range[0] if valid_ratio_range[0] > 0 else 0
|
||||
high = valid_ratio_range[1] if valid_ratio_range[1] > 0 else np.finfo(
|
||||
np.float32).max
|
||||
|
||||
valid_boxes_idx = np.nonzero((low <= box_ratio) & (box_ratio < high) & (
|
||||
target_mins >= 2))[0]
|
||||
valid_boxes = gt_boxes[valid_boxes_idx]
|
||||
return valid_boxes, valid_boxes_idx
|
||||
|
||||
def _find_pos_chips(self,
|
||||
chips: 'Cx4',
|
||||
valid_boxes: 'Bx4',
|
||||
valid_boxes_idx: 'B'):
|
||||
"""
|
||||
:return: pos_chip2boxes_idx, dict: chipid->[box_id, ...]
|
||||
"""
|
||||
iob = intersection_over_box(chips, valid_boxes) # overlap, CxB
|
||||
|
||||
iob_threshold_to_find_chips = 1.
|
||||
pos_chip_ids, _ = self._find_chips_to_cover_overlaped_boxes(
|
||||
iob, iob_threshold_to_find_chips)
|
||||
pos_chip_ids = set(pos_chip_ids)
|
||||
|
||||
iob_threshold_to_assign_box = 0.5
|
||||
pos_chip2boxes_idx = self._assign_boxes_to_pos_chips(
|
||||
iob, iob_threshold_to_assign_box, pos_chip_ids, valid_boxes_idx)
|
||||
return pos_chip2boxes_idx
|
||||
|
||||
def _find_chips_to_cover_overlaped_boxes(self, iob, overlap_threshold):
|
||||
return find_chips_to_cover_overlaped_boxes(iob, overlap_threshold)
|
||||
|
||||
def _assign_boxes_to_pos_chips(self, iob, overlap_threshold, pos_chip_ids,
|
||||
valid_boxes_idx):
|
||||
chip_ids, box_ids = np.nonzero(iob >= overlap_threshold)
|
||||
pos_chip2boxes_idx = defaultdict(list)
|
||||
for chip_id, box_id in zip(chip_ids, box_ids):
|
||||
if chip_id not in pos_chip_ids:
|
||||
continue
|
||||
raw_gt_box_idx = valid_boxes_idx[box_id]
|
||||
pos_chip2boxes_idx[chip_id].append(raw_gt_box_idx)
|
||||
return pos_chip2boxes_idx
|
||||
|
||||
def _get_neg_boxes_and_chips(self,
|
||||
chips: 'Cx4',
|
||||
pos_chip_ids: 'D',
|
||||
proposals: 'Px4'):
|
||||
"""
|
||||
:param chips:
|
||||
:param pos_chip_ids:
|
||||
:param proposals:
|
||||
:return: neg_chip2box_num, None or dict: chipid->neg_box_num
|
||||
"""
|
||||
if not self.use_neg_chip:
|
||||
return None
|
||||
|
||||
# train proposals maybe None
|
||||
if proposals is None or len(proposals) < 1:
|
||||
return None
|
||||
|
||||
valid_ratio_range = self._cur_valid_ratio_range
|
||||
im_size = self._cur_im_size
|
||||
scale = self._cur_scale
|
||||
|
||||
valid_props, _ = self._validate_boxes(valid_ratio_range, im_size,
|
||||
proposals, scale)
|
||||
neg_boxes = self._find_neg_boxes(chips, pos_chip_ids, valid_props)
|
||||
neg_chip2box_num = self._find_neg_chips(chips, pos_chip_ids, neg_boxes)
|
||||
return neg_chip2box_num
|
||||
|
||||
def _find_neg_boxes(self,
|
||||
chips: 'Cx4',
|
||||
pos_chip_ids: 'D',
|
||||
valid_props: 'Px4'):
|
||||
"""
|
||||
:return: neg_boxes: Nx4
|
||||
"""
|
||||
if len(pos_chip_ids) == 0:
|
||||
return valid_props
|
||||
|
||||
pos_chips = chips[pos_chip_ids]
|
||||
iob = intersection_over_box(pos_chips, valid_props)
|
||||
overlap_per_prop = np.max(iob, axis=0)
|
||||
non_overlap_props_idx = overlap_per_prop < 0.5
|
||||
neg_boxes = valid_props[non_overlap_props_idx]
|
||||
return neg_boxes
|
||||
|
||||
def _find_neg_chips(self, chips: 'Cx4', pos_chip_ids: 'D',
|
||||
neg_boxes: 'Nx4'):
|
||||
"""
|
||||
:return: neg_chip2box_num, dict: chipid->neg_box_num
|
||||
"""
|
||||
neg_chip_ids = np.setdiff1d(np.arange(len(chips)), pos_chip_ids)
|
||||
neg_chips = chips[neg_chip_ids]
|
||||
|
||||
iob = intersection_over_box(neg_chips, neg_boxes)
|
||||
iob_threshold_to_find_chips = 0.7
|
||||
chosen_neg_chip_ids, chip_id2overlap_box_num = \
|
||||
self._find_chips_to_cover_overlaped_boxes(iob, iob_threshold_to_find_chips)
|
||||
|
||||
neg_chipid2box_num = {}
|
||||
for cid in chosen_neg_chip_ids:
|
||||
box_num = chip_id2overlap_box_num[cid]
|
||||
raw_chip_id = neg_chip_ids[cid]
|
||||
neg_chipid2box_num[raw_chip_id] = box_num
|
||||
return neg_chipid2box_num
|
||||
|
||||
def crop_infer_anno_records(self, records: List[dict]):
|
||||
"""
|
||||
transform image record to chips record
|
||||
:param records:
|
||||
:return: new_records, list of dict like
|
||||
{
|
||||
'im_file': 'fake_image1.jpg',
|
||||
'im_id': np.array([1]), # new _global_chip_id as im_id
|
||||
'h': h, # chip height
|
||||
'w': w, # chip width
|
||||
'chip': [x1, y1, x2, y2] # added
|
||||
'ori_im_h': ori_im_h # added, origin image height
|
||||
'ori_im_w': ori_im_w # added, origin image width
|
||||
'scale_i': 0 # added,
|
||||
}
|
||||
"""
|
||||
self.chip_records = []
|
||||
self._global_chip_id = 1 # im_id start from 1
|
||||
self._global_chip_id2img_id = {}
|
||||
|
||||
for r in records:
|
||||
for scale_i in range(self.scale_num):
|
||||
self._get_current_scale_parameters(scale_i, r)
|
||||
# Cx4
|
||||
chips = self._create_chips(r['h'], r['w'], self._cur_scale)
|
||||
cur_img_chip_record = self._get_chips_records(r, chips, scale_i)
|
||||
self.chip_records.extend(cur_img_chip_record)
|
||||
|
||||
return self.chip_records
|
||||
|
||||
def _get_chips_records(self, rec, chips, scale_i):
|
||||
cur_img_chip_records = []
|
||||
ori_im_h = rec["h"]
|
||||
ori_im_w = rec["w"]
|
||||
im_file = rec["im_file"]
|
||||
ori_im_id = rec["im_id"]
|
||||
for id, chip in enumerate(chips):
|
||||
chip_rec = {}
|
||||
x1, y1, x2, y2 = chip
|
||||
chip_h = y2 - y1
|
||||
chip_w = x2 - x1
|
||||
chip_rec["im_file"] = im_file
|
||||
chip_rec["im_id"] = self._global_chip_id
|
||||
chip_rec["h"] = chip_h
|
||||
chip_rec["w"] = chip_w
|
||||
chip_rec["chip"] = chip
|
||||
chip_rec["ori_im_h"] = ori_im_h
|
||||
chip_rec["ori_im_w"] = ori_im_w
|
||||
chip_rec["scale_i"] = scale_i
|
||||
|
||||
self._global_chip_id2img_id[self._global_chip_id] = int(ori_im_id)
|
||||
self._global_chip_id += 1
|
||||
cur_img_chip_records.append(chip_rec)
|
||||
|
||||
return cur_img_chip_records
|
||||
|
||||
def aggregate_chips_detections(self, results, records=None):
|
||||
"""
|
||||
# 1. transform chip dets to image dets
|
||||
# 2. nms boxes per image;
|
||||
# 3. format output results
|
||||
:param results:
|
||||
:param roidb:
|
||||
:return:
|
||||
"""
|
||||
results = deepcopy(results)
|
||||
records = records if records else self.chip_records
|
||||
img_id2bbox = self._transform_chip2image_bboxes(results, records)
|
||||
nms_img_id2bbox = self._nms_dets(img_id2bbox)
|
||||
aggregate_results = self._reformat_results(nms_img_id2bbox)
|
||||
return aggregate_results
|
||||
|
||||
def _transform_chip2image_bboxes(self, results, records):
|
||||
# 1. Transform chip dets to image dets;
|
||||
# 2. Filter valid range;
|
||||
# 3. Reformat and Aggregate chip dets to Get scale_cls_dets
|
||||
img_id2bbox = defaultdict(list)
|
||||
for result in results:
|
||||
bbox_locs = result['bbox']
|
||||
bbox_nums = result['bbox_num']
|
||||
if len(bbox_locs) == 1 and bbox_locs[0][
|
||||
0] == -1: # current batch has no detections
|
||||
# bbox_locs = array([[-1.]], dtype=float32); bbox_nums = [[1]]
|
||||
# MultiClassNMS output: If there is no detected boxes for all images, lod will be set to {1} and Out only contains one value which is -1.
|
||||
continue
|
||||
im_ids = result['im_id'] # replace with range(len(bbox_nums))
|
||||
|
||||
last_bbox_num = 0
|
||||
for idx, im_id in enumerate(im_ids):
|
||||
|
||||
cur_bbox_len = bbox_nums[idx]
|
||||
bboxes = bbox_locs[last_bbox_num:last_bbox_num + cur_bbox_len]
|
||||
last_bbox_num += cur_bbox_len
|
||||
# box: [num_id, score, xmin, ymin, xmax, ymax]
|
||||
if len(bboxes) == 0: # current image has no detections
|
||||
continue
|
||||
|
||||
chip_rec = records[int(im_id) -
|
||||
1] # im_id starts from 1, type is np.int64
|
||||
image_size = max(chip_rec["ori_im_h"], chip_rec["ori_im_w"])
|
||||
|
||||
bboxes = transform_chip_boxes2image_boxes(
|
||||
bboxes, chip_rec["chip"], chip_rec["ori_im_h"],
|
||||
chip_rec["ori_im_w"])
|
||||
|
||||
scale_i = chip_rec["scale_i"]
|
||||
cur_scale = self._get_current_scale(self.target_sizes[scale_i],
|
||||
image_size)
|
||||
_, valid_boxes_idx = self._validate_boxes(
|
||||
self.valid_box_ratio_ranges[scale_i], image_size,
|
||||
bboxes[:, 2:], cur_scale)
|
||||
ori_img_id = self._global_chip_id2img_id[int(im_id)]
|
||||
|
||||
img_id2bbox[ori_img_id].append(bboxes[valid_boxes_idx])
|
||||
|
||||
return img_id2bbox
|
||||
|
||||
def _nms_dets(self, img_id2bbox):
|
||||
# 1. NMS on each image-class
|
||||
# 2. Limit number of detections to MAX_PER_IMAGE if requested
|
||||
max_per_img = self.max_per_img
|
||||
nms_thresh = self.nms_thresh
|
||||
|
||||
for img_id in img_id2bbox:
|
||||
box = img_id2bbox[
|
||||
img_id] # list of np.array of shape [N, 6], 6 is [label, score, x1, y1, x2, y2]
|
||||
box = np.concatenate(box, axis=0)
|
||||
nms_dets = nms(box, nms_thresh)
|
||||
if max_per_img > 0:
|
||||
if len(nms_dets) > max_per_img:
|
||||
keep = np.argsort(-nms_dets[:, 1])[:max_per_img]
|
||||
nms_dets = nms_dets[keep]
|
||||
|
||||
img_id2bbox[img_id] = nms_dets
|
||||
|
||||
return img_id2bbox
|
||||
|
||||
def _reformat_results(self, img_id2bbox):
|
||||
"""reformat results"""
|
||||
im_ids = img_id2bbox.keys()
|
||||
results = []
|
||||
for img_id in im_ids: # output by original im_id order
|
||||
if len(img_id2bbox[img_id]) == 0:
|
||||
bbox = np.array(
|
||||
[[-1., 0., 0., 0., 0., 0.]]) # edge case: no detections
|
||||
bbox_num = np.array([0])
|
||||
else:
|
||||
# np.array of shape [N, 6], 6 is [label, score, x1, y1, x2, y2]
|
||||
bbox = img_id2bbox[img_id]
|
||||
bbox_num = np.array([len(bbox)])
|
||||
res = dict(im_id=np.array([[img_id]]), bbox=bbox, bbox_num=bbox_num)
|
||||
results.append(res)
|
||||
return results
|
||||
170
paddle_detection/ppdet/data/crop_utils/chip_box_utils.py
Normal file
170
paddle_detection/ppdet/data/crop_utils/chip_box_utils.py
Normal file
@@ -0,0 +1,170 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def bbox_area(boxes):
|
||||
return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
|
||||
|
||||
|
||||
def intersection_over_box(chips, boxes):
|
||||
"""
|
||||
intersection area over box area
|
||||
:param chips: C
|
||||
:param boxes: B
|
||||
:return: iob, CxB
|
||||
"""
|
||||
M = chips.shape[0]
|
||||
N = boxes.shape[0]
|
||||
if M * N == 0:
|
||||
return np.zeros([M, N], dtype='float32')
|
||||
|
||||
box_area = bbox_area(boxes) # B
|
||||
|
||||
inter_x2y2 = np.minimum(np.expand_dims(chips, 1)[:, :, 2:],
|
||||
boxes[:, 2:]) # CxBX2
|
||||
inter_x1y1 = np.maximum(np.expand_dims(chips, 1)[:, :, :2],
|
||||
boxes[:, :2]) # CxBx2
|
||||
inter_wh = inter_x2y2 - inter_x1y1
|
||||
inter_wh = np.clip(inter_wh, a_min=0, a_max=None)
|
||||
inter_area = inter_wh[:, :, 0] * inter_wh[:, :, 1] # CxB
|
||||
|
||||
iob = inter_area / np.expand_dims(box_area, 0)
|
||||
return iob
|
||||
|
||||
|
||||
def clip_boxes(boxes, im_shape):
|
||||
"""
|
||||
Clip boxes to image boundaries.
|
||||
:param boxes: [N, 4]
|
||||
:param im_shape: tuple of 2, [h, w]
|
||||
:return: [N, 4]
|
||||
"""
|
||||
# x1 >= 0
|
||||
boxes[:, 0] = np.clip(boxes[:, 0], 0, im_shape[1] - 1)
|
||||
# y1 >= 0
|
||||
boxes[:, 1] = np.clip(boxes[:, 1], 0, im_shape[0] - 1)
|
||||
# x2 < im_shape[1]
|
||||
boxes[:, 2] = np.clip(boxes[:, 2], 1, im_shape[1])
|
||||
# y2 < im_shape[0]
|
||||
boxes[:, 3] = np.clip(boxes[:, 3], 1, im_shape[0])
|
||||
return boxes
|
||||
|
||||
|
||||
def transform_chip_box(gt_bbox: 'Gx4', boxes_idx: 'B', chip: '4'):
|
||||
boxes_idx = np.array(boxes_idx)
|
||||
cur_gt_bbox = gt_bbox[boxes_idx].copy() # Bx4
|
||||
x1, y1, x2, y2 = chip
|
||||
cur_gt_bbox[:, 0] -= x1
|
||||
cur_gt_bbox[:, 1] -= y1
|
||||
cur_gt_bbox[:, 2] -= x1
|
||||
cur_gt_bbox[:, 3] -= y1
|
||||
h = y2 - y1
|
||||
w = x2 - x1
|
||||
cur_gt_bbox = clip_boxes(cur_gt_bbox, (h, w))
|
||||
ws = (cur_gt_bbox[:, 2] - cur_gt_bbox[:, 0]).astype(np.int32)
|
||||
hs = (cur_gt_bbox[:, 3] - cur_gt_bbox[:, 1]).astype(np.int32)
|
||||
valid_idx = (ws >= 2) & (hs >= 2)
|
||||
return cur_gt_bbox[valid_idx], boxes_idx[valid_idx]
|
||||
|
||||
|
||||
def find_chips_to_cover_overlaped_boxes(iob, overlap_threshold):
|
||||
chip_ids, box_ids = np.nonzero(iob >= overlap_threshold)
|
||||
chip_id2overlap_box_num = np.bincount(chip_ids) # 1d array
|
||||
chip_id2overlap_box_num = np.pad(
|
||||
chip_id2overlap_box_num, (0, len(iob) - len(chip_id2overlap_box_num)),
|
||||
constant_values=0)
|
||||
|
||||
chosen_chip_ids = []
|
||||
while len(box_ids) > 0:
|
||||
value_counts = np.bincount(chip_ids) # 1d array
|
||||
max_count_chip_id = np.argmax(value_counts)
|
||||
assert max_count_chip_id not in chosen_chip_ids
|
||||
chosen_chip_ids.append(max_count_chip_id)
|
||||
|
||||
box_ids_in_cur_chip = box_ids[chip_ids == max_count_chip_id]
|
||||
ids_not_in_cur_boxes_mask = np.logical_not(
|
||||
np.isin(box_ids, box_ids_in_cur_chip))
|
||||
chip_ids = chip_ids[ids_not_in_cur_boxes_mask]
|
||||
box_ids = box_ids[ids_not_in_cur_boxes_mask]
|
||||
return chosen_chip_ids, chip_id2overlap_box_num
|
||||
|
||||
|
||||
def transform_chip_boxes2image_boxes(chip_boxes, chip, img_h, img_w):
|
||||
chip_boxes = np.array(sorted(chip_boxes, key=lambda item: -item[1]))
|
||||
xmin, ymin, _, _ = chip
|
||||
# Transform to origin image loc
|
||||
chip_boxes[:, 2] += xmin
|
||||
chip_boxes[:, 4] += xmin
|
||||
chip_boxes[:, 3] += ymin
|
||||
chip_boxes[:, 5] += ymin
|
||||
chip_boxes = clip_boxes(chip_boxes, (img_h, img_w))
|
||||
return chip_boxes
|
||||
|
||||
|
||||
def nms(dets, thresh):
|
||||
"""Apply classic DPM-style greedy NMS."""
|
||||
if dets.shape[0] == 0:
|
||||
return dets[[], :]
|
||||
scores = dets[:, 1]
|
||||
x1 = dets[:, 2]
|
||||
y1 = dets[:, 3]
|
||||
x2 = dets[:, 4]
|
||||
y2 = dets[:, 5]
|
||||
|
||||
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
||||
order = scores.argsort()[::-1]
|
||||
|
||||
ndets = dets.shape[0]
|
||||
suppressed = np.zeros((ndets), dtype=np.int32)
|
||||
|
||||
# nominal indices
|
||||
# _i, _j
|
||||
# sorted indices
|
||||
# i, j
|
||||
# temp variables for box i's (the box currently under consideration)
|
||||
# ix1, iy1, ix2, iy2, iarea
|
||||
|
||||
# variables for computing overlap with box j (lower scoring box)
|
||||
# xx1, yy1, xx2, yy2
|
||||
# w, h
|
||||
# inter, ovr
|
||||
|
||||
for _i in range(ndets):
|
||||
i = order[_i]
|
||||
if suppressed[i] == 1:
|
||||
continue
|
||||
ix1 = x1[i]
|
||||
iy1 = y1[i]
|
||||
ix2 = x2[i]
|
||||
iy2 = y2[i]
|
||||
iarea = areas[i]
|
||||
for _j in range(_i + 1, ndets):
|
||||
j = order[_j]
|
||||
if suppressed[j] == 1:
|
||||
continue
|
||||
xx1 = max(ix1, x1[j])
|
||||
yy1 = max(iy1, y1[j])
|
||||
xx2 = min(ix2, x2[j])
|
||||
yy2 = min(iy2, y2[j])
|
||||
w = max(0.0, xx2 - xx1 + 1)
|
||||
h = max(0.0, yy2 - yy1 + 1)
|
||||
inter = w * h
|
||||
ovr = inter / (iarea + areas[j] - inter)
|
||||
if ovr >= thresh:
|
||||
suppressed[j] = 1
|
||||
keep = np.where(suppressed == 0)[0]
|
||||
dets = dets[keep, :]
|
||||
return dets
|
||||
130
paddle_detection/ppdet/data/culane_utils.py
Normal file
130
paddle_detection/ppdet/data/culane_utils.py
Normal file
@@ -0,0 +1,130 @@
|
||||
import math
|
||||
import numpy as np
|
||||
from imgaug.augmentables.lines import LineString
|
||||
from scipy.interpolate import InterpolatedUnivariateSpline
|
||||
|
||||
|
||||
def lane_to_linestrings(lanes):
|
||||
lines = []
|
||||
for lane in lanes:
|
||||
lines.append(LineString(lane))
|
||||
|
||||
return lines
|
||||
|
||||
|
||||
def linestrings_to_lanes(lines):
|
||||
lanes = []
|
||||
for line in lines:
|
||||
lanes.append(line.coords)
|
||||
|
||||
return lanes
|
||||
|
||||
|
||||
def sample_lane(points, sample_ys, img_w):
|
||||
# this function expects the points to be sorted
|
||||
points = np.array(points)
|
||||
if not np.all(points[1:, 1] < points[:-1, 1]):
|
||||
raise Exception('Annotaion points have to be sorted')
|
||||
x, y = points[:, 0], points[:, 1]
|
||||
|
||||
# interpolate points inside domain
|
||||
assert len(points) > 1
|
||||
interp = InterpolatedUnivariateSpline(
|
||||
y[::-1], x[::-1], k=min(3, len(points) - 1))
|
||||
domain_min_y = y.min()
|
||||
domain_max_y = y.max()
|
||||
sample_ys_inside_domain = sample_ys[(sample_ys >= domain_min_y) & (
|
||||
sample_ys <= domain_max_y)]
|
||||
assert len(sample_ys_inside_domain) > 0
|
||||
interp_xs = interp(sample_ys_inside_domain)
|
||||
|
||||
# extrapolate lane to the bottom of the image with a straight line using the 2 points closest to the bottom
|
||||
two_closest_points = points[:2]
|
||||
extrap = np.polyfit(
|
||||
two_closest_points[:, 1], two_closest_points[:, 0], deg=1)
|
||||
extrap_ys = sample_ys[sample_ys > domain_max_y]
|
||||
extrap_xs = np.polyval(extrap, extrap_ys)
|
||||
all_xs = np.hstack((extrap_xs, interp_xs))
|
||||
|
||||
# separate between inside and outside points
|
||||
inside_mask = (all_xs >= 0) & (all_xs < img_w)
|
||||
xs_inside_image = all_xs[inside_mask]
|
||||
xs_outside_image = all_xs[~inside_mask]
|
||||
|
||||
return xs_outside_image, xs_inside_image
|
||||
|
||||
|
||||
def filter_lane(lane):
|
||||
assert lane[-1][1] <= lane[0][1]
|
||||
filtered_lane = []
|
||||
used = set()
|
||||
for p in lane:
|
||||
if p[1] not in used:
|
||||
filtered_lane.append(p)
|
||||
used.add(p[1])
|
||||
|
||||
return filtered_lane
|
||||
|
||||
|
||||
def transform_annotation(img_w, img_h, max_lanes, n_offsets, offsets_ys,
|
||||
n_strips, strip_size, anno):
|
||||
old_lanes = anno['lanes']
|
||||
|
||||
# removing lanes with less than 2 points
|
||||
old_lanes = filter(lambda x: len(x) > 1, old_lanes)
|
||||
# sort lane points by Y (bottom to top of the image)
|
||||
old_lanes = [sorted(lane, key=lambda x: -x[1]) for lane in old_lanes]
|
||||
# remove points with same Y (keep first occurrence)
|
||||
old_lanes = [filter_lane(lane) for lane in old_lanes]
|
||||
# normalize the annotation coordinates
|
||||
old_lanes = [[[x * img_w / float(img_w), y * img_h / float(img_h)]
|
||||
for x, y in lane] for lane in old_lanes]
|
||||
# create tranformed annotations
|
||||
lanes = np.ones(
|
||||
(max_lanes, 2 + 1 + 1 + 2 + n_offsets), dtype=np.float32
|
||||
) * -1e5 # 2 scores, 1 start_y, 1 start_x, 1 theta, 1 length, S+1 coordinates
|
||||
lanes_endpoints = np.ones((max_lanes, 2))
|
||||
# lanes are invalid by default
|
||||
lanes[:, 0] = 1
|
||||
lanes[:, 1] = 0
|
||||
for lane_idx, lane in enumerate(old_lanes):
|
||||
if lane_idx >= max_lanes:
|
||||
break
|
||||
|
||||
try:
|
||||
xs_outside_image, xs_inside_image = sample_lane(lane, offsets_ys,
|
||||
img_w)
|
||||
except AssertionError:
|
||||
continue
|
||||
if len(xs_inside_image) <= 1:
|
||||
continue
|
||||
all_xs = np.hstack((xs_outside_image, xs_inside_image))
|
||||
lanes[lane_idx, 0] = 0
|
||||
lanes[lane_idx, 1] = 1
|
||||
lanes[lane_idx, 2] = len(xs_outside_image) / n_strips
|
||||
lanes[lane_idx, 3] = xs_inside_image[0]
|
||||
|
||||
thetas = []
|
||||
for i in range(1, len(xs_inside_image)):
|
||||
theta = math.atan(
|
||||
i * strip_size /
|
||||
(xs_inside_image[i] - xs_inside_image[0] + 1e-5)) / math.pi
|
||||
theta = theta if theta > 0 else 1 - abs(theta)
|
||||
thetas.append(theta)
|
||||
|
||||
theta_far = sum(thetas) / len(thetas)
|
||||
|
||||
# lanes[lane_idx,
|
||||
# 4] = (theta_closest + theta_far) / 2 # averaged angle
|
||||
lanes[lane_idx, 4] = theta_far
|
||||
lanes[lane_idx, 5] = len(xs_inside_image)
|
||||
lanes[lane_idx, 6:6 + len(all_xs)] = all_xs
|
||||
lanes_endpoints[lane_idx, 0] = (len(all_xs) - 1) / n_strips
|
||||
lanes_endpoints[lane_idx, 1] = xs_inside_image[-1]
|
||||
|
||||
new_anno = {
|
||||
'label': lanes,
|
||||
'old_anno': anno,
|
||||
'lane_endpoints': lanes_endpoints
|
||||
}
|
||||
return new_anno
|
||||
615
paddle_detection/ppdet/data/reader.py
Normal file
615
paddle_detection/ppdet/data/reader.py
Normal file
@@ -0,0 +1,615 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import copy
|
||||
import os
|
||||
import traceback
|
||||
import six
|
||||
import sys
|
||||
if sys.version_info >= (3, 0):
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
import numpy as np
|
||||
import paddle
|
||||
import paddle.nn.functional as F
|
||||
|
||||
from copy import deepcopy
|
||||
|
||||
from paddle.io import DataLoader, DistributedBatchSampler
|
||||
from .utils import default_collate_fn
|
||||
|
||||
from ppdet.core.workspace import register
|
||||
from . import transform
|
||||
from .shm_utils import _get_shared_memory_size_in_M
|
||||
|
||||
from ppdet.utils.logger import setup_logger
|
||||
logger = setup_logger('reader')
|
||||
|
||||
MAIN_PID = os.getpid()
|
||||
|
||||
|
||||
class Compose(object):
|
||||
def __init__(self, transforms, num_classes=80):
|
||||
self.transforms = transforms
|
||||
self.transforms_cls = []
|
||||
for t in self.transforms:
|
||||
for k, v in t.items():
|
||||
op_cls = getattr(transform, k)
|
||||
f = op_cls(**v)
|
||||
if hasattr(f, 'num_classes'):
|
||||
f.num_classes = num_classes
|
||||
|
||||
self.transforms_cls.append(f)
|
||||
|
||||
def __call__(self, data):
|
||||
for f in self.transforms_cls:
|
||||
try:
|
||||
data = f(data)
|
||||
except Exception as e:
|
||||
stack_info = traceback.format_exc()
|
||||
logger.warning("fail to map sample transform [{}] "
|
||||
"with error: {} and stack:\n{}".format(
|
||||
f, e, str(stack_info)))
|
||||
raise e
|
||||
|
||||
return data
|
||||
|
||||
|
||||
class BatchCompose(Compose):
|
||||
def __init__(self, transforms, num_classes=80, collate_batch=True):
|
||||
super(BatchCompose, self).__init__(transforms, num_classes)
|
||||
self.collate_batch = collate_batch
|
||||
|
||||
def __call__(self, data):
|
||||
for f in self.transforms_cls:
|
||||
try:
|
||||
data = f(data)
|
||||
except Exception as e:
|
||||
stack_info = traceback.format_exc()
|
||||
logger.warning("fail to map batch transform [{}] "
|
||||
"with error: {} and stack:\n{}".format(
|
||||
f, e, str(stack_info)))
|
||||
raise e
|
||||
|
||||
# remove keys which is not needed by model
|
||||
extra_key = ['h', 'w', 'flipped']
|
||||
for k in extra_key:
|
||||
for sample in data:
|
||||
if k in sample:
|
||||
sample.pop(k)
|
||||
|
||||
# batch data, if user-define batch function needed
|
||||
# use user-defined here
|
||||
if self.collate_batch:
|
||||
batch_data = default_collate_fn(data)
|
||||
else:
|
||||
batch_data = {}
|
||||
for k in data[0].keys():
|
||||
tmp_data = []
|
||||
for i in range(len(data)):
|
||||
tmp_data.append(data[i][k])
|
||||
if not 'gt_' in k and not 'is_crowd' in k and not 'difficult' in k:
|
||||
tmp_data = np.stack(tmp_data, axis=0)
|
||||
batch_data[k] = tmp_data
|
||||
return batch_data
|
||||
|
||||
|
||||
class BaseDataLoader(object):
|
||||
"""
|
||||
Base DataLoader implementation for detection models
|
||||
|
||||
Args:
|
||||
sample_transforms (list): a list of transforms to perform
|
||||
on each sample
|
||||
batch_transforms (list): a list of transforms to perform
|
||||
on batch
|
||||
batch_size (int): batch size for batch collating, default 1.
|
||||
shuffle (bool): whether to shuffle samples
|
||||
drop_last (bool): whether to drop the last incomplete,
|
||||
default False
|
||||
num_classes (int): class number of dataset, default 80
|
||||
collate_batch (bool): whether to collate batch in dataloader.
|
||||
If set to True, the samples will collate into batch according
|
||||
to the batch size. Otherwise, the ground-truth will not collate,
|
||||
which is used when the number of ground-truch is different in
|
||||
samples.
|
||||
use_shared_memory (bool): whether to use shared memory to
|
||||
accelerate data loading, enable this only if you
|
||||
are sure that the shared memory size of your OS
|
||||
is larger than memory cost of input datas of model.
|
||||
Note that shared memory will be automatically
|
||||
disabled if the shared memory of OS is less than
|
||||
1G, which is not enough for detection models.
|
||||
Default False.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
sample_transforms=[],
|
||||
batch_transforms=[],
|
||||
batch_size=1,
|
||||
shuffle=False,
|
||||
drop_last=False,
|
||||
num_classes=80,
|
||||
collate_batch=True,
|
||||
use_shared_memory=False,
|
||||
**kwargs):
|
||||
# sample transform
|
||||
self._sample_transforms = Compose(
|
||||
sample_transforms, num_classes=num_classes)
|
||||
|
||||
# batch transfrom
|
||||
self._batch_transforms = BatchCompose(batch_transforms, num_classes,
|
||||
collate_batch)
|
||||
self.batch_size = batch_size
|
||||
self.shuffle = shuffle
|
||||
self.drop_last = drop_last
|
||||
self.use_shared_memory = use_shared_memory
|
||||
self.kwargs = kwargs
|
||||
|
||||
def __call__(self,
|
||||
dataset,
|
||||
worker_num,
|
||||
batch_sampler=None,
|
||||
return_list=False):
|
||||
self.dataset = dataset
|
||||
self.dataset.check_or_download_dataset()
|
||||
self.dataset.parse_dataset()
|
||||
# get data
|
||||
self.dataset.set_transform(self._sample_transforms)
|
||||
# set kwargs
|
||||
self.dataset.set_kwargs(**self.kwargs)
|
||||
# batch sampler
|
||||
if batch_sampler is None:
|
||||
self._batch_sampler = DistributedBatchSampler(
|
||||
self.dataset,
|
||||
batch_size=self.batch_size,
|
||||
shuffle=self.shuffle,
|
||||
drop_last=self.drop_last)
|
||||
else:
|
||||
self._batch_sampler = batch_sampler
|
||||
|
||||
# DataLoader do not start sub-process in Windows and Mac
|
||||
# system, do not need to use shared memory
|
||||
use_shared_memory = self.use_shared_memory and \
|
||||
sys.platform not in ['win32', 'darwin']
|
||||
# check whether shared memory size is bigger than 1G(1024M)
|
||||
if use_shared_memory:
|
||||
shm_size = _get_shared_memory_size_in_M()
|
||||
if shm_size is not None and shm_size < 1024.:
|
||||
logger.warning("Shared memory size is less than 1G, "
|
||||
"disable shared_memory in DataLoader")
|
||||
use_shared_memory = False
|
||||
|
||||
self.dataloader = DataLoader(
|
||||
dataset=self.dataset,
|
||||
batch_sampler=self._batch_sampler,
|
||||
collate_fn=self._batch_transforms,
|
||||
num_workers=worker_num,
|
||||
return_list=return_list,
|
||||
use_shared_memory=use_shared_memory)
|
||||
self.loader = iter(self.dataloader)
|
||||
|
||||
return self
|
||||
|
||||
def __len__(self):
|
||||
return len(self._batch_sampler)
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
try:
|
||||
return next(self.loader)
|
||||
except StopIteration:
|
||||
self.loader = iter(self.dataloader)
|
||||
six.reraise(*sys.exc_info())
|
||||
|
||||
def next(self):
|
||||
# python2 compatibility
|
||||
return self.__next__()
|
||||
|
||||
|
||||
@register
|
||||
class TrainReader(BaseDataLoader):
|
||||
__shared__ = ['num_classes']
|
||||
|
||||
def __init__(self,
|
||||
sample_transforms=[],
|
||||
batch_transforms=[],
|
||||
batch_size=1,
|
||||
shuffle=True,
|
||||
drop_last=True,
|
||||
num_classes=80,
|
||||
collate_batch=True,
|
||||
**kwargs):
|
||||
super(TrainReader, self).__init__(sample_transforms, batch_transforms,
|
||||
batch_size, shuffle, drop_last,
|
||||
num_classes, collate_batch, **kwargs)
|
||||
|
||||
|
||||
@register
|
||||
class EvalReader(BaseDataLoader):
|
||||
__shared__ = ['num_classes']
|
||||
|
||||
def __init__(self,
|
||||
sample_transforms=[],
|
||||
batch_transforms=[],
|
||||
batch_size=1,
|
||||
shuffle=False,
|
||||
drop_last=False,
|
||||
num_classes=80,
|
||||
**kwargs):
|
||||
super(EvalReader, self).__init__(sample_transforms, batch_transforms,
|
||||
batch_size, shuffle, drop_last,
|
||||
num_classes, **kwargs)
|
||||
|
||||
|
||||
@register
|
||||
class TestReader(BaseDataLoader):
|
||||
__shared__ = ['num_classes']
|
||||
|
||||
def __init__(self,
|
||||
sample_transforms=[],
|
||||
batch_transforms=[],
|
||||
batch_size=1,
|
||||
shuffle=False,
|
||||
drop_last=False,
|
||||
num_classes=80,
|
||||
**kwargs):
|
||||
super(TestReader, self).__init__(sample_transforms, batch_transforms,
|
||||
batch_size, shuffle, drop_last,
|
||||
num_classes, **kwargs)
|
||||
|
||||
|
||||
@register
|
||||
class EvalMOTReader(BaseDataLoader):
|
||||
__shared__ = ['num_classes']
|
||||
|
||||
def __init__(self,
|
||||
sample_transforms=[],
|
||||
batch_transforms=[],
|
||||
batch_size=1,
|
||||
shuffle=False,
|
||||
drop_last=False,
|
||||
num_classes=1,
|
||||
**kwargs):
|
||||
super(EvalMOTReader, self).__init__(sample_transforms, batch_transforms,
|
||||
batch_size, shuffle, drop_last,
|
||||
num_classes, **kwargs)
|
||||
|
||||
|
||||
@register
|
||||
class TestMOTReader(BaseDataLoader):
|
||||
__shared__ = ['num_classes']
|
||||
|
||||
def __init__(self,
|
||||
sample_transforms=[],
|
||||
batch_transforms=[],
|
||||
batch_size=1,
|
||||
shuffle=False,
|
||||
drop_last=False,
|
||||
num_classes=1,
|
||||
**kwargs):
|
||||
super(TestMOTReader, self).__init__(sample_transforms, batch_transforms,
|
||||
batch_size, shuffle, drop_last,
|
||||
num_classes, **kwargs)
|
||||
|
||||
|
||||
# For Semi-Supervised Object Detection (SSOD)
|
||||
class Compose_SSOD(object):
|
||||
def __init__(self, base_transforms, weak_aug, strong_aug, num_classes=80):
|
||||
self.base_transforms = base_transforms
|
||||
self.base_transforms_cls = []
|
||||
for t in self.base_transforms:
|
||||
for k, v in t.items():
|
||||
op_cls = getattr(transform, k)
|
||||
f = op_cls(**v)
|
||||
if hasattr(f, 'num_classes'):
|
||||
f.num_classes = num_classes
|
||||
self.base_transforms_cls.append(f)
|
||||
|
||||
self.weak_augs = weak_aug
|
||||
self.weak_augs_cls = []
|
||||
for t in self.weak_augs:
|
||||
for k, v in t.items():
|
||||
op_cls = getattr(transform, k)
|
||||
f = op_cls(**v)
|
||||
if hasattr(f, 'num_classes'):
|
||||
f.num_classes = num_classes
|
||||
self.weak_augs_cls.append(f)
|
||||
|
||||
self.strong_augs = strong_aug
|
||||
self.strong_augs_cls = []
|
||||
for t in self.strong_augs:
|
||||
for k, v in t.items():
|
||||
op_cls = getattr(transform, k)
|
||||
f = op_cls(**v)
|
||||
if hasattr(f, 'num_classes'):
|
||||
f.num_classes = num_classes
|
||||
self.strong_augs_cls.append(f)
|
||||
|
||||
def __call__(self, data):
|
||||
for f in self.base_transforms_cls:
|
||||
try:
|
||||
data = f(data)
|
||||
except Exception as e:
|
||||
stack_info = traceback.format_exc()
|
||||
logger.warning("fail to map sample transform [{}] "
|
||||
"with error: {} and stack:\n{}".format(
|
||||
f, e, str(stack_info)))
|
||||
raise e
|
||||
|
||||
weak_data = deepcopy(data)
|
||||
strong_data = deepcopy(data)
|
||||
for f in self.weak_augs_cls:
|
||||
try:
|
||||
weak_data = f(weak_data)
|
||||
except Exception as e:
|
||||
stack_info = traceback.format_exc()
|
||||
logger.warning("fail to map weak aug [{}] "
|
||||
"with error: {} and stack:\n{}".format(
|
||||
f, e, str(stack_info)))
|
||||
raise e
|
||||
|
||||
for f in self.strong_augs_cls:
|
||||
try:
|
||||
strong_data = f(strong_data)
|
||||
except Exception as e:
|
||||
stack_info = traceback.format_exc()
|
||||
logger.warning("fail to map strong aug [{}] "
|
||||
"with error: {} and stack:\n{}".format(
|
||||
f, e, str(stack_info)))
|
||||
raise e
|
||||
|
||||
weak_data['strong_aug'] = strong_data
|
||||
return weak_data
|
||||
|
||||
|
||||
class BatchCompose_SSOD(Compose):
|
||||
def __init__(self, transforms, num_classes=80, collate_batch=True):
|
||||
super(BatchCompose_SSOD, self).__init__(transforms, num_classes)
|
||||
self.collate_batch = collate_batch
|
||||
|
||||
def __call__(self, data):
|
||||
# split strong_data from data(weak_data)
|
||||
strong_data = []
|
||||
for sample in data:
|
||||
strong_data.append(sample['strong_aug'])
|
||||
sample.pop('strong_aug')
|
||||
|
||||
for f in self.transforms_cls:
|
||||
try:
|
||||
data = f(data)
|
||||
if 'BatchRandomResizeForSSOD' in f._id:
|
||||
strong_data = f(strong_data, data[1])[0]
|
||||
data = data[0]
|
||||
else:
|
||||
strong_data = f(strong_data)
|
||||
except Exception as e:
|
||||
stack_info = traceback.format_exc()
|
||||
logger.warning("fail to map batch transform [{}] "
|
||||
"with error: {} and stack:\n{}".format(
|
||||
f, e, str(stack_info)))
|
||||
raise e
|
||||
|
||||
# remove keys which is not needed by model
|
||||
extra_key = ['h', 'w', 'flipped']
|
||||
for k in extra_key:
|
||||
for sample in data:
|
||||
if k in sample:
|
||||
sample.pop(k)
|
||||
for sample in strong_data:
|
||||
if k in sample:
|
||||
sample.pop(k)
|
||||
|
||||
# batch data, if user-define batch function needed
|
||||
# use user-defined here
|
||||
if self.collate_batch:
|
||||
batch_data = default_collate_fn(data)
|
||||
strong_batch_data = default_collate_fn(strong_data)
|
||||
return batch_data, strong_batch_data
|
||||
else:
|
||||
batch_data = {}
|
||||
for k in data[0].keys():
|
||||
tmp_data = []
|
||||
for i in range(len(data)):
|
||||
tmp_data.append(data[i][k])
|
||||
if not 'gt_' in k and not 'is_crowd' in k and not 'difficult' in k:
|
||||
tmp_data = np.stack(tmp_data, axis=0)
|
||||
batch_data[k] = tmp_data
|
||||
|
||||
strong_batch_data = {}
|
||||
for k in strong_data[0].keys():
|
||||
tmp_data = []
|
||||
for i in range(len(strong_data)):
|
||||
tmp_data.append(strong_data[i][k])
|
||||
if not 'gt_' in k and not 'is_crowd' in k and not 'difficult' in k:
|
||||
tmp_data = np.stack(tmp_data, axis=0)
|
||||
strong_batch_data[k] = tmp_data
|
||||
|
||||
return batch_data, strong_batch_data
|
||||
|
||||
|
||||
class CombineSSODLoader(object):
|
||||
def __init__(self, label_loader, unlabel_loader):
|
||||
self.label_loader = label_loader
|
||||
self.unlabel_loader = unlabel_loader
|
||||
|
||||
def __iter__(self):
|
||||
while True:
|
||||
try:
|
||||
label_samples = next(self.label_loader_iter)
|
||||
except:
|
||||
self.label_loader_iter = iter(self.label_loader)
|
||||
label_samples = next(self.label_loader_iter)
|
||||
|
||||
try:
|
||||
unlabel_samples = next(self.unlabel_loader_iter)
|
||||
except:
|
||||
self.unlabel_loader_iter = iter(self.unlabel_loader)
|
||||
unlabel_samples = next(self.unlabel_loader_iter)
|
||||
|
||||
yield (
|
||||
label_samples[0], # sup weak
|
||||
label_samples[1], # sup strong
|
||||
unlabel_samples[0], # unsup weak
|
||||
unlabel_samples[1] # unsup strong
|
||||
)
|
||||
|
||||
def __call__(self):
|
||||
return self.__iter__()
|
||||
|
||||
|
||||
class BaseSemiDataLoader(object):
|
||||
def __init__(self,
|
||||
sample_transforms=[],
|
||||
weak_aug=[],
|
||||
strong_aug=[],
|
||||
sup_batch_transforms=[],
|
||||
unsup_batch_transforms=[],
|
||||
sup_batch_size=1,
|
||||
unsup_batch_size=1,
|
||||
shuffle=True,
|
||||
drop_last=True,
|
||||
num_classes=80,
|
||||
collate_batch=True,
|
||||
use_shared_memory=False,
|
||||
**kwargs):
|
||||
# sup transforms
|
||||
self._sample_transforms_label = Compose_SSOD(
|
||||
sample_transforms, weak_aug, strong_aug, num_classes=num_classes)
|
||||
self._batch_transforms_label = BatchCompose_SSOD(
|
||||
sup_batch_transforms, num_classes, collate_batch)
|
||||
self.batch_size_label = sup_batch_size
|
||||
|
||||
# unsup transforms
|
||||
self._sample_transforms_unlabel = Compose_SSOD(
|
||||
sample_transforms, weak_aug, strong_aug, num_classes=num_classes)
|
||||
self._batch_transforms_unlabel = BatchCompose_SSOD(
|
||||
unsup_batch_transforms, num_classes, collate_batch)
|
||||
self.batch_size_unlabel = unsup_batch_size
|
||||
|
||||
# common
|
||||
self.shuffle = shuffle
|
||||
self.drop_last = drop_last
|
||||
self.use_shared_memory = use_shared_memory
|
||||
self.kwargs = kwargs
|
||||
|
||||
def __call__(self,
|
||||
dataset_label,
|
||||
dataset_unlabel,
|
||||
worker_num,
|
||||
batch_sampler_label=None,
|
||||
batch_sampler_unlabel=None,
|
||||
return_list=False):
|
||||
# sup dataset
|
||||
self.dataset_label = dataset_label
|
||||
self.dataset_label.check_or_download_dataset()
|
||||
self.dataset_label.parse_dataset()
|
||||
self.dataset_label.set_transform(self._sample_transforms_label)
|
||||
self.dataset_label.set_kwargs(**self.kwargs)
|
||||
if batch_sampler_label is None:
|
||||
self._batch_sampler_label = DistributedBatchSampler(
|
||||
self.dataset_label,
|
||||
batch_size=self.batch_size_label,
|
||||
shuffle=self.shuffle,
|
||||
drop_last=self.drop_last)
|
||||
else:
|
||||
self._batch_sampler_label = batch_sampler_label
|
||||
|
||||
# unsup dataset
|
||||
self.dataset_unlabel = dataset_unlabel
|
||||
self.dataset_unlabel.length = self.dataset_label.__len__()
|
||||
self.dataset_unlabel.check_or_download_dataset()
|
||||
self.dataset_unlabel.parse_dataset()
|
||||
self.dataset_unlabel.set_transform(self._sample_transforms_unlabel)
|
||||
self.dataset_unlabel.set_kwargs(**self.kwargs)
|
||||
if batch_sampler_unlabel is None:
|
||||
self._batch_sampler_unlabel = DistributedBatchSampler(
|
||||
self.dataset_unlabel,
|
||||
batch_size=self.batch_size_unlabel,
|
||||
shuffle=self.shuffle,
|
||||
drop_last=self.drop_last)
|
||||
else:
|
||||
self._batch_sampler_unlabel = batch_sampler_unlabel
|
||||
|
||||
# DataLoader do not start sub-process in Windows and Mac
|
||||
# system, do not need to use shared memory
|
||||
use_shared_memory = self.use_shared_memory and \
|
||||
sys.platform not in ['win32', 'darwin']
|
||||
# check whether shared memory size is bigger than 1G(1024M)
|
||||
if use_shared_memory:
|
||||
shm_size = _get_shared_memory_size_in_M()
|
||||
if shm_size is not None and shm_size < 1024.:
|
||||
logger.warning("Shared memory size is less than 1G, "
|
||||
"disable shared_memory in DataLoader")
|
||||
use_shared_memory = False
|
||||
|
||||
self.dataloader_label = DataLoader(
|
||||
dataset=self.dataset_label,
|
||||
batch_sampler=self._batch_sampler_label,
|
||||
collate_fn=self._batch_transforms_label,
|
||||
num_workers=worker_num,
|
||||
return_list=return_list,
|
||||
use_shared_memory=use_shared_memory)
|
||||
|
||||
self.dataloader_unlabel = DataLoader(
|
||||
dataset=self.dataset_unlabel,
|
||||
batch_sampler=self._batch_sampler_unlabel,
|
||||
collate_fn=self._batch_transforms_unlabel,
|
||||
num_workers=worker_num,
|
||||
return_list=return_list,
|
||||
use_shared_memory=use_shared_memory)
|
||||
|
||||
self.dataloader = CombineSSODLoader(self.dataloader_label,
|
||||
self.dataloader_unlabel)
|
||||
self.loader = iter(self.dataloader)
|
||||
return self
|
||||
|
||||
def __len__(self):
|
||||
return len(self._batch_sampler_label)
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
return next(self.loader)
|
||||
|
||||
def next(self):
|
||||
# python2 compatibility
|
||||
return self.__next__()
|
||||
|
||||
|
||||
@register
|
||||
class SemiTrainReader(BaseSemiDataLoader):
|
||||
__shared__ = ['num_classes']
|
||||
|
||||
def __init__(self,
|
||||
sample_transforms=[],
|
||||
weak_aug=[],
|
||||
strong_aug=[],
|
||||
sup_batch_transforms=[],
|
||||
unsup_batch_transforms=[],
|
||||
sup_batch_size=1,
|
||||
unsup_batch_size=1,
|
||||
shuffle=True,
|
||||
drop_last=True,
|
||||
num_classes=80,
|
||||
collate_batch=True,
|
||||
**kwargs):
|
||||
super(SemiTrainReader, self).__init__(
|
||||
sample_transforms, weak_aug, strong_aug, sup_batch_transforms,
|
||||
unsup_batch_transforms, sup_batch_size, unsup_batch_size, shuffle,
|
||||
drop_last, num_classes, collate_batch, **kwargs)
|
||||
70
paddle_detection/ppdet/data/shm_utils.py
Normal file
70
paddle_detection/ppdet/data/shm_utils.py
Normal file
@@ -0,0 +1,70 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
|
||||
SIZE_UNIT = ['K', 'M', 'G', 'T']
|
||||
SHM_QUERY_CMD = 'df -h'
|
||||
SHM_KEY = 'shm'
|
||||
SHM_DEFAULT_MOUNT = '/dev/shm'
|
||||
|
||||
# [ shared memory size check ]
|
||||
# In detection models, image/target data occupies a lot of memory, and
|
||||
# will occupy lots of shared memory in multi-process DataLoader, we use
|
||||
# following code to get shared memory size and perform a size check to
|
||||
# disable shared memory use if shared memory size is not enough.
|
||||
# Shared memory getting process as follows:
|
||||
# 1. use `df -h` get all mount info
|
||||
# 2. pick up spaces whose mount info contains 'shm'
|
||||
# 3. if 'shm' space number is only 1, return its size
|
||||
# 4. if there are multiple 'shm' space, try to find the default mount
|
||||
# directory '/dev/shm' is Linux-like system, otherwise return the
|
||||
# biggest space size.
|
||||
|
||||
|
||||
def _parse_size_in_M(size_str):
|
||||
if size_str[-1] == 'B':
|
||||
num, unit = size_str[:-2], size_str[-2]
|
||||
else:
|
||||
num, unit = size_str[:-1], size_str[-1]
|
||||
assert unit in SIZE_UNIT, \
|
||||
"unknown shm size unit {}".format(unit)
|
||||
return float(num) * \
|
||||
(1024 ** (SIZE_UNIT.index(unit) - 1))
|
||||
|
||||
|
||||
def _get_shared_memory_size_in_M():
|
||||
try:
|
||||
df_infos = os.popen(SHM_QUERY_CMD).readlines()
|
||||
except:
|
||||
return None
|
||||
else:
|
||||
shm_infos = []
|
||||
for df_info in df_infos:
|
||||
info = df_info.strip()
|
||||
if info.find(SHM_KEY) >= 0:
|
||||
shm_infos.append(info.split())
|
||||
|
||||
if len(shm_infos) == 0:
|
||||
return None
|
||||
elif len(shm_infos) == 1:
|
||||
return _parse_size_in_M(shm_infos[0][3])
|
||||
else:
|
||||
default_mount_infos = [
|
||||
si for si in shm_infos if si[-1] == SHM_DEFAULT_MOUNT
|
||||
]
|
||||
if default_mount_infos:
|
||||
return _parse_size_in_M(default_mount_infos[0][3])
|
||||
else:
|
||||
return max([_parse_size_in_M(si[3]) for si in shm_infos])
|
||||
33
paddle_detection/ppdet/data/source/__init__.py
Normal file
33
paddle_detection/ppdet/data/source/__init__.py
Normal file
@@ -0,0 +1,33 @@
|
||||
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from . import coco
|
||||
from . import voc
|
||||
from . import widerface
|
||||
from . import category
|
||||
from . import keypoint_coco
|
||||
from . import mot
|
||||
from . import sniper_coco
|
||||
from . import culane
|
||||
|
||||
from .coco import *
|
||||
from .voc import *
|
||||
from .widerface import *
|
||||
from .category import *
|
||||
from .keypoint_coco import *
|
||||
from .mot import *
|
||||
from .sniper_coco import SniperCOCODataSet
|
||||
from .dataset import ImageFolder
|
||||
from .pose3d_cmb import *
|
||||
from .culane import *
|
||||
942
paddle_detection/ppdet/data/source/category.py
Normal file
942
paddle_detection/ppdet/data/source/category.py
Normal file
@@ -0,0 +1,942 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
|
||||
from ppdet.data.source.voc import pascalvoc_label
|
||||
from ppdet.data.source.widerface import widerface_label
|
||||
from ppdet.utils.logger import setup_logger
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
__all__ = ['get_categories']
|
||||
|
||||
|
||||
def get_categories(metric_type, anno_file=None, arch=None):
|
||||
"""
|
||||
Get class id to category id map and category id
|
||||
to category name map from annotation file.
|
||||
|
||||
Args:
|
||||
metric_type (str): metric type, currently support 'coco', 'voc', 'oid'
|
||||
and 'widerface'.
|
||||
anno_file (str): annotation file path
|
||||
"""
|
||||
if arch == 'keypoint_arch':
|
||||
return (None, {'id': 'keypoint'})
|
||||
|
||||
if anno_file == None or (not os.path.isfile(anno_file)):
|
||||
logger.warning(
|
||||
"anno_file '{}' is None or not set or not exist, "
|
||||
"please recheck TrainDataset/EvalDataset/TestDataset.anno_path, "
|
||||
"otherwise the default categories will be used by metric_type.".
|
||||
format(anno_file))
|
||||
|
||||
if metric_type.lower() == 'coco' or metric_type.lower(
|
||||
) == 'rbox' or metric_type.lower() == 'snipercoco':
|
||||
if anno_file and os.path.isfile(anno_file):
|
||||
if anno_file.endswith('json'):
|
||||
# lazy import pycocotools here
|
||||
from pycocotools.coco import COCO
|
||||
coco = COCO(anno_file)
|
||||
cats = coco.loadCats(coco.getCatIds())
|
||||
|
||||
clsid2catid = {i: cat['id'] for i, cat in enumerate(cats)}
|
||||
catid2name = {cat['id']: cat['name'] for cat in cats}
|
||||
|
||||
elif anno_file.endswith('txt'):
|
||||
cats = []
|
||||
with open(anno_file) as f:
|
||||
for line in f.readlines():
|
||||
cats.append(line.strip())
|
||||
if cats[0] == 'background': cats = cats[1:]
|
||||
|
||||
clsid2catid = {i: i for i in range(len(cats))}
|
||||
catid2name = {i: name for i, name in enumerate(cats)}
|
||||
|
||||
else:
|
||||
raise ValueError("anno_file {} should be json or txt.".format(
|
||||
anno_file))
|
||||
return clsid2catid, catid2name
|
||||
|
||||
# anno file not exist, load default categories of COCO17
|
||||
else:
|
||||
if metric_type.lower() == 'rbox':
|
||||
logger.warning(
|
||||
"metric_type: {}, load default categories of DOTA.".format(
|
||||
metric_type))
|
||||
return _dota_category()
|
||||
logger.warning("metric_type: {}, load default categories of COCO.".
|
||||
format(metric_type))
|
||||
return _coco17_category()
|
||||
|
||||
elif metric_type.lower() == 'voc':
|
||||
if anno_file and os.path.isfile(anno_file):
|
||||
cats = []
|
||||
with open(anno_file) as f:
|
||||
for line in f.readlines():
|
||||
cats.append(line.strip())
|
||||
|
||||
if cats[0] == 'background':
|
||||
cats = cats[1:]
|
||||
|
||||
clsid2catid = {i: i for i in range(len(cats))}
|
||||
catid2name = {i: name for i, name in enumerate(cats)}
|
||||
|
||||
return clsid2catid, catid2name
|
||||
|
||||
# anno file not exist, load default categories of
|
||||
# VOC all 20 categories
|
||||
else:
|
||||
logger.warning("metric_type: {}, load default categories of VOC.".
|
||||
format(metric_type))
|
||||
return _vocall_category()
|
||||
|
||||
elif metric_type.lower() == 'oid':
|
||||
if anno_file and os.path.isfile(anno_file):
|
||||
logger.warning("only default categories support for OID19")
|
||||
return _oid19_category()
|
||||
|
||||
elif metric_type.lower() == 'widerface':
|
||||
return _widerface_category()
|
||||
|
||||
elif metric_type.lower() in [
|
||||
'keypointtopdowncocoeval', 'keypointtopdownmpiieval',
|
||||
'keypointtopdowncocowholebadyhandeval'
|
||||
]:
|
||||
return (None, {'id': 'keypoint'})
|
||||
|
||||
elif metric_type.lower() == 'pose3deval':
|
||||
return (None, {'id': 'pose3d'})
|
||||
|
||||
elif metric_type.lower() in ['mot', 'motdet', 'reid']:
|
||||
if anno_file and os.path.isfile(anno_file):
|
||||
cats = []
|
||||
with open(anno_file) as f:
|
||||
for line in f.readlines():
|
||||
cats.append(line.strip())
|
||||
if cats[0] == 'background':
|
||||
cats = cats[1:]
|
||||
clsid2catid = {i: i for i in range(len(cats))}
|
||||
catid2name = {i: name for i, name in enumerate(cats)}
|
||||
return clsid2catid, catid2name
|
||||
# anno file not exist, load default category 'pedestrian'.
|
||||
else:
|
||||
logger.warning(
|
||||
"metric_type: {}, load default categories of pedestrian MOT.".
|
||||
format(metric_type))
|
||||
return _mot_category(category='pedestrian')
|
||||
|
||||
elif metric_type.lower() in ['kitti', 'bdd100kmot']:
|
||||
return _mot_category(category='vehicle')
|
||||
|
||||
elif metric_type.lower() in ['mcmot']:
|
||||
if anno_file and os.path.isfile(anno_file):
|
||||
cats = []
|
||||
with open(anno_file) as f:
|
||||
for line in f.readlines():
|
||||
cats.append(line.strip())
|
||||
if cats[0] == 'background':
|
||||
cats = cats[1:]
|
||||
clsid2catid = {i: i for i in range(len(cats))}
|
||||
catid2name = {i: name for i, name in enumerate(cats)}
|
||||
return clsid2catid, catid2name
|
||||
# anno file not exist, load default categories of visdrone all 10 categories
|
||||
else:
|
||||
logger.warning(
|
||||
"metric_type: {}, load default categories of VisDrone.".format(
|
||||
metric_type))
|
||||
return _visdrone_category()
|
||||
|
||||
else:
|
||||
raise ValueError("unknown metric type {}".format(metric_type))
|
||||
|
||||
|
||||
def _mot_category(category='pedestrian'):
|
||||
"""
|
||||
Get class id to category id map and category id
|
||||
to category name map of mot dataset
|
||||
"""
|
||||
label_map = {category: 0}
|
||||
label_map = sorted(label_map.items(), key=lambda x: x[1])
|
||||
cats = [l[0] for l in label_map]
|
||||
|
||||
clsid2catid = {i: i for i in range(len(cats))}
|
||||
catid2name = {i: name for i, name in enumerate(cats)}
|
||||
|
||||
return clsid2catid, catid2name
|
||||
|
||||
|
||||
def _coco17_category():
|
||||
"""
|
||||
Get class id to category id map and category id
|
||||
to category name map of COCO2017 dataset
|
||||
|
||||
"""
|
||||
clsid2catid = {
|
||||
1: 1,
|
||||
2: 2,
|
||||
3: 3,
|
||||
4: 4,
|
||||
5: 5,
|
||||
6: 6,
|
||||
7: 7,
|
||||
8: 8,
|
||||
9: 9,
|
||||
10: 10,
|
||||
11: 11,
|
||||
12: 13,
|
||||
13: 14,
|
||||
14: 15,
|
||||
15: 16,
|
||||
16: 17,
|
||||
17: 18,
|
||||
18: 19,
|
||||
19: 20,
|
||||
20: 21,
|
||||
21: 22,
|
||||
22: 23,
|
||||
23: 24,
|
||||
24: 25,
|
||||
25: 27,
|
||||
26: 28,
|
||||
27: 31,
|
||||
28: 32,
|
||||
29: 33,
|
||||
30: 34,
|
||||
31: 35,
|
||||
32: 36,
|
||||
33: 37,
|
||||
34: 38,
|
||||
35: 39,
|
||||
36: 40,
|
||||
37: 41,
|
||||
38: 42,
|
||||
39: 43,
|
||||
40: 44,
|
||||
41: 46,
|
||||
42: 47,
|
||||
43: 48,
|
||||
44: 49,
|
||||
45: 50,
|
||||
46: 51,
|
||||
47: 52,
|
||||
48: 53,
|
||||
49: 54,
|
||||
50: 55,
|
||||
51: 56,
|
||||
52: 57,
|
||||
53: 58,
|
||||
54: 59,
|
||||
55: 60,
|
||||
56: 61,
|
||||
57: 62,
|
||||
58: 63,
|
||||
59: 64,
|
||||
60: 65,
|
||||
61: 67,
|
||||
62: 70,
|
||||
63: 72,
|
||||
64: 73,
|
||||
65: 74,
|
||||
66: 75,
|
||||
67: 76,
|
||||
68: 77,
|
||||
69: 78,
|
||||
70: 79,
|
||||
71: 80,
|
||||
72: 81,
|
||||
73: 82,
|
||||
74: 84,
|
||||
75: 85,
|
||||
76: 86,
|
||||
77: 87,
|
||||
78: 88,
|
||||
79: 89,
|
||||
80: 90
|
||||
}
|
||||
|
||||
catid2name = {
|
||||
0: 'background',
|
||||
1: 'person',
|
||||
2: 'bicycle',
|
||||
3: 'car',
|
||||
4: 'motorcycle',
|
||||
5: 'airplane',
|
||||
6: 'bus',
|
||||
7: 'train',
|
||||
8: 'truck',
|
||||
9: 'boat',
|
||||
10: 'traffic light',
|
||||
11: 'fire hydrant',
|
||||
13: 'stop sign',
|
||||
14: 'parking meter',
|
||||
15: 'bench',
|
||||
16: 'bird',
|
||||
17: 'cat',
|
||||
18: 'dog',
|
||||
19: 'horse',
|
||||
20: 'sheep',
|
||||
21: 'cow',
|
||||
22: 'elephant',
|
||||
23: 'bear',
|
||||
24: 'zebra',
|
||||
25: 'giraffe',
|
||||
27: 'backpack',
|
||||
28: 'umbrella',
|
||||
31: 'handbag',
|
||||
32: 'tie',
|
||||
33: 'suitcase',
|
||||
34: 'frisbee',
|
||||
35: 'skis',
|
||||
36: 'snowboard',
|
||||
37: 'sports ball',
|
||||
38: 'kite',
|
||||
39: 'baseball bat',
|
||||
40: 'baseball glove',
|
||||
41: 'skateboard',
|
||||
42: 'surfboard',
|
||||
43: 'tennis racket',
|
||||
44: 'bottle',
|
||||
46: 'wine glass',
|
||||
47: 'cup',
|
||||
48: 'fork',
|
||||
49: 'knife',
|
||||
50: 'spoon',
|
||||
51: 'bowl',
|
||||
52: 'banana',
|
||||
53: 'apple',
|
||||
54: 'sandwich',
|
||||
55: 'orange',
|
||||
56: 'broccoli',
|
||||
57: 'carrot',
|
||||
58: 'hot dog',
|
||||
59: 'pizza',
|
||||
60: 'donut',
|
||||
61: 'cake',
|
||||
62: 'chair',
|
||||
63: 'couch',
|
||||
64: 'potted plant',
|
||||
65: 'bed',
|
||||
67: 'dining table',
|
||||
70: 'toilet',
|
||||
72: 'tv',
|
||||
73: 'laptop',
|
||||
74: 'mouse',
|
||||
75: 'remote',
|
||||
76: 'keyboard',
|
||||
77: 'cell phone',
|
||||
78: 'microwave',
|
||||
79: 'oven',
|
||||
80: 'toaster',
|
||||
81: 'sink',
|
||||
82: 'refrigerator',
|
||||
84: 'book',
|
||||
85: 'clock',
|
||||
86: 'vase',
|
||||
87: 'scissors',
|
||||
88: 'teddy bear',
|
||||
89: 'hair drier',
|
||||
90: 'toothbrush'
|
||||
}
|
||||
|
||||
clsid2catid = {k - 1: v for k, v in clsid2catid.items()}
|
||||
catid2name.pop(0)
|
||||
|
||||
return clsid2catid, catid2name
|
||||
|
||||
|
||||
def _dota_category():
|
||||
"""
|
||||
Get class id to category id map and category id
|
||||
to category name map of dota dataset
|
||||
"""
|
||||
catid2name = {
|
||||
0: 'background',
|
||||
1: 'plane',
|
||||
2: 'baseball-diamond',
|
||||
3: 'bridge',
|
||||
4: 'ground-track-field',
|
||||
5: 'small-vehicle',
|
||||
6: 'large-vehicle',
|
||||
7: 'ship',
|
||||
8: 'tennis-court',
|
||||
9: 'basketball-court',
|
||||
10: 'storage-tank',
|
||||
11: 'soccer-ball-field',
|
||||
12: 'roundabout',
|
||||
13: 'harbor',
|
||||
14: 'swimming-pool',
|
||||
15: 'helicopter'
|
||||
}
|
||||
catid2name.pop(0)
|
||||
clsid2catid = {i: i + 1 for i in range(len(catid2name))}
|
||||
return clsid2catid, catid2name
|
||||
|
||||
|
||||
def _vocall_category():
|
||||
"""
|
||||
Get class id to category id map and category id
|
||||
to category name map of mixup voc dataset
|
||||
|
||||
"""
|
||||
label_map = pascalvoc_label()
|
||||
label_map = sorted(label_map.items(), key=lambda x: x[1])
|
||||
cats = [l[0] for l in label_map]
|
||||
|
||||
clsid2catid = {i: i for i in range(len(cats))}
|
||||
catid2name = {i: name for i, name in enumerate(cats)}
|
||||
|
||||
return clsid2catid, catid2name
|
||||
|
||||
|
||||
def _widerface_category():
|
||||
label_map = widerface_label()
|
||||
label_map = sorted(label_map.items(), key=lambda x: x[1])
|
||||
cats = [l[0] for l in label_map]
|
||||
clsid2catid = {i: i for i in range(len(cats))}
|
||||
catid2name = {i: name for i, name in enumerate(cats)}
|
||||
|
||||
return clsid2catid, catid2name
|
||||
|
||||
|
||||
def _oid19_category():
|
||||
clsid2catid = {k: k + 1 for k in range(500)}
|
||||
|
||||
catid2name = {
|
||||
0: "background",
|
||||
1: "Infant bed",
|
||||
2: "Rose",
|
||||
3: "Flag",
|
||||
4: "Flashlight",
|
||||
5: "Sea turtle",
|
||||
6: "Camera",
|
||||
7: "Animal",
|
||||
8: "Glove",
|
||||
9: "Crocodile",
|
||||
10: "Cattle",
|
||||
11: "House",
|
||||
12: "Guacamole",
|
||||
13: "Penguin",
|
||||
14: "Vehicle registration plate",
|
||||
15: "Bench",
|
||||
16: "Ladybug",
|
||||
17: "Human nose",
|
||||
18: "Watermelon",
|
||||
19: "Flute",
|
||||
20: "Butterfly",
|
||||
21: "Washing machine",
|
||||
22: "Raccoon",
|
||||
23: "Segway",
|
||||
24: "Taco",
|
||||
25: "Jellyfish",
|
||||
26: "Cake",
|
||||
27: "Pen",
|
||||
28: "Cannon",
|
||||
29: "Bread",
|
||||
30: "Tree",
|
||||
31: "Shellfish",
|
||||
32: "Bed",
|
||||
33: "Hamster",
|
||||
34: "Hat",
|
||||
35: "Toaster",
|
||||
36: "Sombrero",
|
||||
37: "Tiara",
|
||||
38: "Bowl",
|
||||
39: "Dragonfly",
|
||||
40: "Moths and butterflies",
|
||||
41: "Antelope",
|
||||
42: "Vegetable",
|
||||
43: "Torch",
|
||||
44: "Building",
|
||||
45: "Power plugs and sockets",
|
||||
46: "Blender",
|
||||
47: "Billiard table",
|
||||
48: "Cutting board",
|
||||
49: "Bronze sculpture",
|
||||
50: "Turtle",
|
||||
51: "Broccoli",
|
||||
52: "Tiger",
|
||||
53: "Mirror",
|
||||
54: "Bear",
|
||||
55: "Zucchini",
|
||||
56: "Dress",
|
||||
57: "Volleyball",
|
||||
58: "Guitar",
|
||||
59: "Reptile",
|
||||
60: "Golf cart",
|
||||
61: "Tart",
|
||||
62: "Fedora",
|
||||
63: "Carnivore",
|
||||
64: "Car",
|
||||
65: "Lighthouse",
|
||||
66: "Coffeemaker",
|
||||
67: "Food processor",
|
||||
68: "Truck",
|
||||
69: "Bookcase",
|
||||
70: "Surfboard",
|
||||
71: "Footwear",
|
||||
72: "Bench",
|
||||
73: "Necklace",
|
||||
74: "Flower",
|
||||
75: "Radish",
|
||||
76: "Marine mammal",
|
||||
77: "Frying pan",
|
||||
78: "Tap",
|
||||
79: "Peach",
|
||||
80: "Knife",
|
||||
81: "Handbag",
|
||||
82: "Laptop",
|
||||
83: "Tent",
|
||||
84: "Ambulance",
|
||||
85: "Christmas tree",
|
||||
86: "Eagle",
|
||||
87: "Limousine",
|
||||
88: "Kitchen & dining room table",
|
||||
89: "Polar bear",
|
||||
90: "Tower",
|
||||
91: "Football",
|
||||
92: "Willow",
|
||||
93: "Human head",
|
||||
94: "Stop sign",
|
||||
95: "Banana",
|
||||
96: "Mixer",
|
||||
97: "Binoculars",
|
||||
98: "Dessert",
|
||||
99: "Bee",
|
||||
100: "Chair",
|
||||
101: "Wood-burning stove",
|
||||
102: "Flowerpot",
|
||||
103: "Beaker",
|
||||
104: "Oyster",
|
||||
105: "Woodpecker",
|
||||
106: "Harp",
|
||||
107: "Bathtub",
|
||||
108: "Wall clock",
|
||||
109: "Sports uniform",
|
||||
110: "Rhinoceros",
|
||||
111: "Beehive",
|
||||
112: "Cupboard",
|
||||
113: "Chicken",
|
||||
114: "Man",
|
||||
115: "Blue jay",
|
||||
116: "Cucumber",
|
||||
117: "Balloon",
|
||||
118: "Kite",
|
||||
119: "Fireplace",
|
||||
120: "Lantern",
|
||||
121: "Missile",
|
||||
122: "Book",
|
||||
123: "Spoon",
|
||||
124: "Grapefruit",
|
||||
125: "Squirrel",
|
||||
126: "Orange",
|
||||
127: "Coat",
|
||||
128: "Punching bag",
|
||||
129: "Zebra",
|
||||
130: "Billboard",
|
||||
131: "Bicycle",
|
||||
132: "Door handle",
|
||||
133: "Mechanical fan",
|
||||
134: "Ring binder",
|
||||
135: "Table",
|
||||
136: "Parrot",
|
||||
137: "Sock",
|
||||
138: "Vase",
|
||||
139: "Weapon",
|
||||
140: "Shotgun",
|
||||
141: "Glasses",
|
||||
142: "Seahorse",
|
||||
143: "Belt",
|
||||
144: "Watercraft",
|
||||
145: "Window",
|
||||
146: "Giraffe",
|
||||
147: "Lion",
|
||||
148: "Tire",
|
||||
149: "Vehicle",
|
||||
150: "Canoe",
|
||||
151: "Tie",
|
||||
152: "Shelf",
|
||||
153: "Picture frame",
|
||||
154: "Printer",
|
||||
155: "Human leg",
|
||||
156: "Boat",
|
||||
157: "Slow cooker",
|
||||
158: "Croissant",
|
||||
159: "Candle",
|
||||
160: "Pancake",
|
||||
161: "Pillow",
|
||||
162: "Coin",
|
||||
163: "Stretcher",
|
||||
164: "Sandal",
|
||||
165: "Woman",
|
||||
166: "Stairs",
|
||||
167: "Harpsichord",
|
||||
168: "Stool",
|
||||
169: "Bus",
|
||||
170: "Suitcase",
|
||||
171: "Human mouth",
|
||||
172: "Juice",
|
||||
173: "Skull",
|
||||
174: "Door",
|
||||
175: "Violin",
|
||||
176: "Chopsticks",
|
||||
177: "Digital clock",
|
||||
178: "Sunflower",
|
||||
179: "Leopard",
|
||||
180: "Bell pepper",
|
||||
181: "Harbor seal",
|
||||
182: "Snake",
|
||||
183: "Sewing machine",
|
||||
184: "Goose",
|
||||
185: "Helicopter",
|
||||
186: "Seat belt",
|
||||
187: "Coffee cup",
|
||||
188: "Microwave oven",
|
||||
189: "Hot dog",
|
||||
190: "Countertop",
|
||||
191: "Serving tray",
|
||||
192: "Dog bed",
|
||||
193: "Beer",
|
||||
194: "Sunglasses",
|
||||
195: "Golf ball",
|
||||
196: "Waffle",
|
||||
197: "Palm tree",
|
||||
198: "Trumpet",
|
||||
199: "Ruler",
|
||||
200: "Helmet",
|
||||
201: "Ladder",
|
||||
202: "Office building",
|
||||
203: "Tablet computer",
|
||||
204: "Toilet paper",
|
||||
205: "Pomegranate",
|
||||
206: "Skirt",
|
||||
207: "Gas stove",
|
||||
208: "Cookie",
|
||||
209: "Cart",
|
||||
210: "Raven",
|
||||
211: "Egg",
|
||||
212: "Burrito",
|
||||
213: "Goat",
|
||||
214: "Kitchen knife",
|
||||
215: "Skateboard",
|
||||
216: "Salt and pepper shakers",
|
||||
217: "Lynx",
|
||||
218: "Boot",
|
||||
219: "Platter",
|
||||
220: "Ski",
|
||||
221: "Swimwear",
|
||||
222: "Swimming pool",
|
||||
223: "Drinking straw",
|
||||
224: "Wrench",
|
||||
225: "Drum",
|
||||
226: "Ant",
|
||||
227: "Human ear",
|
||||
228: "Headphones",
|
||||
229: "Fountain",
|
||||
230: "Bird",
|
||||
231: "Jeans",
|
||||
232: "Television",
|
||||
233: "Crab",
|
||||
234: "Microphone",
|
||||
235: "Home appliance",
|
||||
236: "Snowplow",
|
||||
237: "Beetle",
|
||||
238: "Artichoke",
|
||||
239: "Jet ski",
|
||||
240: "Stationary bicycle",
|
||||
241: "Human hair",
|
||||
242: "Brown bear",
|
||||
243: "Starfish",
|
||||
244: "Fork",
|
||||
245: "Lobster",
|
||||
246: "Corded phone",
|
||||
247: "Drink",
|
||||
248: "Saucer",
|
||||
249: "Carrot",
|
||||
250: "Insect",
|
||||
251: "Clock",
|
||||
252: "Castle",
|
||||
253: "Tennis racket",
|
||||
254: "Ceiling fan",
|
||||
255: "Asparagus",
|
||||
256: "Jaguar",
|
||||
257: "Musical instrument",
|
||||
258: "Train",
|
||||
259: "Cat",
|
||||
260: "Rifle",
|
||||
261: "Dumbbell",
|
||||
262: "Mobile phone",
|
||||
263: "Taxi",
|
||||
264: "Shower",
|
||||
265: "Pitcher",
|
||||
266: "Lemon",
|
||||
267: "Invertebrate",
|
||||
268: "Turkey",
|
||||
269: "High heels",
|
||||
270: "Bust",
|
||||
271: "Elephant",
|
||||
272: "Scarf",
|
||||
273: "Barrel",
|
||||
274: "Trombone",
|
||||
275: "Pumpkin",
|
||||
276: "Box",
|
||||
277: "Tomato",
|
||||
278: "Frog",
|
||||
279: "Bidet",
|
||||
280: "Human face",
|
||||
281: "Houseplant",
|
||||
282: "Van",
|
||||
283: "Shark",
|
||||
284: "Ice cream",
|
||||
285: "Swim cap",
|
||||
286: "Falcon",
|
||||
287: "Ostrich",
|
||||
288: "Handgun",
|
||||
289: "Whiteboard",
|
||||
290: "Lizard",
|
||||
291: "Pasta",
|
||||
292: "Snowmobile",
|
||||
293: "Light bulb",
|
||||
294: "Window blind",
|
||||
295: "Muffin",
|
||||
296: "Pretzel",
|
||||
297: "Computer monitor",
|
||||
298: "Horn",
|
||||
299: "Furniture",
|
||||
300: "Sandwich",
|
||||
301: "Fox",
|
||||
302: "Convenience store",
|
||||
303: "Fish",
|
||||
304: "Fruit",
|
||||
305: "Earrings",
|
||||
306: "Curtain",
|
||||
307: "Grape",
|
||||
308: "Sofa bed",
|
||||
309: "Horse",
|
||||
310: "Luggage and bags",
|
||||
311: "Desk",
|
||||
312: "Crutch",
|
||||
313: "Bicycle helmet",
|
||||
314: "Tick",
|
||||
315: "Airplane",
|
||||
316: "Canary",
|
||||
317: "Spatula",
|
||||
318: "Watch",
|
||||
319: "Lily",
|
||||
320: "Kitchen appliance",
|
||||
321: "Filing cabinet",
|
||||
322: "Aircraft",
|
||||
323: "Cake stand",
|
||||
324: "Candy",
|
||||
325: "Sink",
|
||||
326: "Mouse",
|
||||
327: "Wine",
|
||||
328: "Wheelchair",
|
||||
329: "Goldfish",
|
||||
330: "Refrigerator",
|
||||
331: "French fries",
|
||||
332: "Drawer",
|
||||
333: "Treadmill",
|
||||
334: "Picnic basket",
|
||||
335: "Dice",
|
||||
336: "Cabbage",
|
||||
337: "Football helmet",
|
||||
338: "Pig",
|
||||
339: "Person",
|
||||
340: "Shorts",
|
||||
341: "Gondola",
|
||||
342: "Honeycomb",
|
||||
343: "Doughnut",
|
||||
344: "Chest of drawers",
|
||||
345: "Land vehicle",
|
||||
346: "Bat",
|
||||
347: "Monkey",
|
||||
348: "Dagger",
|
||||
349: "Tableware",
|
||||
350: "Human foot",
|
||||
351: "Mug",
|
||||
352: "Alarm clock",
|
||||
353: "Pressure cooker",
|
||||
354: "Human hand",
|
||||
355: "Tortoise",
|
||||
356: "Baseball glove",
|
||||
357: "Sword",
|
||||
358: "Pear",
|
||||
359: "Miniskirt",
|
||||
360: "Traffic sign",
|
||||
361: "Girl",
|
||||
362: "Roller skates",
|
||||
363: "Dinosaur",
|
||||
364: "Porch",
|
||||
365: "Human beard",
|
||||
366: "Submarine sandwich",
|
||||
367: "Screwdriver",
|
||||
368: "Strawberry",
|
||||
369: "Wine glass",
|
||||
370: "Seafood",
|
||||
371: "Racket",
|
||||
372: "Wheel",
|
||||
373: "Sea lion",
|
||||
374: "Toy",
|
||||
375: "Tea",
|
||||
376: "Tennis ball",
|
||||
377: "Waste container",
|
||||
378: "Mule",
|
||||
379: "Cricket ball",
|
||||
380: "Pineapple",
|
||||
381: "Coconut",
|
||||
382: "Doll",
|
||||
383: "Coffee table",
|
||||
384: "Snowman",
|
||||
385: "Lavender",
|
||||
386: "Shrimp",
|
||||
387: "Maple",
|
||||
388: "Cowboy hat",
|
||||
389: "Goggles",
|
||||
390: "Rugby ball",
|
||||
391: "Caterpillar",
|
||||
392: "Poster",
|
||||
393: "Rocket",
|
||||
394: "Organ",
|
||||
395: "Saxophone",
|
||||
396: "Traffic light",
|
||||
397: "Cocktail",
|
||||
398: "Plastic bag",
|
||||
399: "Squash",
|
||||
400: "Mushroom",
|
||||
401: "Hamburger",
|
||||
402: "Light switch",
|
||||
403: "Parachute",
|
||||
404: "Teddy bear",
|
||||
405: "Winter melon",
|
||||
406: "Deer",
|
||||
407: "Musical keyboard",
|
||||
408: "Plumbing fixture",
|
||||
409: "Scoreboard",
|
||||
410: "Baseball bat",
|
||||
411: "Envelope",
|
||||
412: "Adhesive tape",
|
||||
413: "Briefcase",
|
||||
414: "Paddle",
|
||||
415: "Bow and arrow",
|
||||
416: "Telephone",
|
||||
417: "Sheep",
|
||||
418: "Jacket",
|
||||
419: "Boy",
|
||||
420: "Pizza",
|
||||
421: "Otter",
|
||||
422: "Office supplies",
|
||||
423: "Couch",
|
||||
424: "Cello",
|
||||
425: "Bull",
|
||||
426: "Camel",
|
||||
427: "Ball",
|
||||
428: "Duck",
|
||||
429: "Whale",
|
||||
430: "Shirt",
|
||||
431: "Tank",
|
||||
432: "Motorcycle",
|
||||
433: "Accordion",
|
||||
434: "Owl",
|
||||
435: "Porcupine",
|
||||
436: "Sun hat",
|
||||
437: "Nail",
|
||||
438: "Scissors",
|
||||
439: "Swan",
|
||||
440: "Lamp",
|
||||
441: "Crown",
|
||||
442: "Piano",
|
||||
443: "Sculpture",
|
||||
444: "Cheetah",
|
||||
445: "Oboe",
|
||||
446: "Tin can",
|
||||
447: "Mango",
|
||||
448: "Tripod",
|
||||
449: "Oven",
|
||||
450: "Mouse",
|
||||
451: "Barge",
|
||||
452: "Coffee",
|
||||
453: "Snowboard",
|
||||
454: "Common fig",
|
||||
455: "Salad",
|
||||
456: "Marine invertebrates",
|
||||
457: "Umbrella",
|
||||
458: "Kangaroo",
|
||||
459: "Human arm",
|
||||
460: "Measuring cup",
|
||||
461: "Snail",
|
||||
462: "Loveseat",
|
||||
463: "Suit",
|
||||
464: "Teapot",
|
||||
465: "Bottle",
|
||||
466: "Alpaca",
|
||||
467: "Kettle",
|
||||
468: "Trousers",
|
||||
469: "Popcorn",
|
||||
470: "Centipede",
|
||||
471: "Spider",
|
||||
472: "Sparrow",
|
||||
473: "Plate",
|
||||
474: "Bagel",
|
||||
475: "Personal care",
|
||||
476: "Apple",
|
||||
477: "Brassiere",
|
||||
478: "Bathroom cabinet",
|
||||
479: "studio couch",
|
||||
480: "Computer keyboard",
|
||||
481: "Table tennis racket",
|
||||
482: "Sushi",
|
||||
483: "Cabinetry",
|
||||
484: "Street light",
|
||||
485: "Towel",
|
||||
486: "Nightstand",
|
||||
487: "Rabbit",
|
||||
488: "Dolphin",
|
||||
489: "Dog",
|
||||
490: "Jug",
|
||||
491: "Wok",
|
||||
492: "Fire hydrant",
|
||||
493: "Human eye",
|
||||
494: "Skyscraper",
|
||||
495: "Backpack",
|
||||
496: "Potato",
|
||||
497: "Paper towel",
|
||||
498: "Lifejacket",
|
||||
499: "Bicycle wheel",
|
||||
500: "Toilet",
|
||||
}
|
||||
|
||||
return clsid2catid, catid2name
|
||||
|
||||
|
||||
def _visdrone_category():
|
||||
clsid2catid = {i: i for i in range(10)}
|
||||
|
||||
catid2name = {
|
||||
0: 'pedestrian',
|
||||
1: 'people',
|
||||
2: 'bicycle',
|
||||
3: 'car',
|
||||
4: 'van',
|
||||
5: 'truck',
|
||||
6: 'tricycle',
|
||||
7: 'awning-tricycle',
|
||||
8: 'bus',
|
||||
9: 'motor'
|
||||
}
|
||||
return clsid2catid, catid2name
|
||||
596
paddle_detection/ppdet/data/source/coco.py
Normal file
596
paddle_detection/ppdet/data/source/coco.py
Normal file
@@ -0,0 +1,596 @@
|
||||
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import copy
|
||||
try:
|
||||
from collections.abc import Sequence
|
||||
except Exception:
|
||||
from collections import Sequence
|
||||
import numpy as np
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from .dataset import DetDataset
|
||||
|
||||
from ppdet.utils.logger import setup_logger
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
__all__ = [
|
||||
'COCODataSet', 'SlicedCOCODataSet', 'SemiCOCODataSet', 'COCODetDataset'
|
||||
]
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class COCODataSet(DetDataset):
|
||||
"""
|
||||
Load dataset with COCO format.
|
||||
|
||||
Args:
|
||||
dataset_dir (str): root directory for dataset.
|
||||
image_dir (str): directory for images.
|
||||
anno_path (str): coco annotation file path.
|
||||
data_fields (list): key name of data dictionary, at least have 'image'.
|
||||
sample_num (int): number of samples to load, -1 means all.
|
||||
load_crowd (bool): whether to load crowded ground-truth.
|
||||
False as default
|
||||
allow_empty (bool): whether to load empty entry. False as default
|
||||
empty_ratio (float): the ratio of empty record number to total
|
||||
record's, if empty_ratio is out of [0. ,1.), do not sample the
|
||||
records and use all the empty entries. 1. as default
|
||||
repeat (int): repeat times for dataset, use in benchmark.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir=None,
|
||||
image_dir=None,
|
||||
anno_path=None,
|
||||
data_fields=['image'],
|
||||
sample_num=-1,
|
||||
load_crowd=False,
|
||||
allow_empty=False,
|
||||
empty_ratio=1.,
|
||||
repeat=1):
|
||||
super(COCODataSet, self).__init__(
|
||||
dataset_dir,
|
||||
image_dir,
|
||||
anno_path,
|
||||
data_fields,
|
||||
sample_num,
|
||||
repeat=repeat)
|
||||
self.load_image_only = False
|
||||
self.load_semantic = False
|
||||
self.load_crowd = load_crowd
|
||||
self.allow_empty = allow_empty
|
||||
self.empty_ratio = empty_ratio
|
||||
|
||||
def _sample_empty(self, records, num):
|
||||
# if empty_ratio is out of [0. ,1.), do not sample the records
|
||||
if self.empty_ratio < 0. or self.empty_ratio >= 1.:
|
||||
return records
|
||||
import random
|
||||
sample_num = min(
|
||||
int(num * self.empty_ratio / (1 - self.empty_ratio)), len(records))
|
||||
records = random.sample(records, sample_num)
|
||||
return records
|
||||
|
||||
def parse_dataset(self):
|
||||
anno_path = os.path.join(self.dataset_dir, self.anno_path)
|
||||
image_dir = os.path.join(self.dataset_dir, self.image_dir)
|
||||
|
||||
assert anno_path.endswith('.json'), \
|
||||
'invalid coco annotation file: ' + anno_path
|
||||
from pycocotools.coco import COCO
|
||||
coco = COCO(anno_path)
|
||||
img_ids = coco.getImgIds()
|
||||
img_ids.sort()
|
||||
cat_ids = coco.getCatIds()
|
||||
records = []
|
||||
empty_records = []
|
||||
ct = 0
|
||||
|
||||
self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
|
||||
self.cname2cid = dict({
|
||||
coco.loadCats(catid)[0]['name']: clsid
|
||||
for catid, clsid in self.catid2clsid.items()
|
||||
})
|
||||
|
||||
if 'annotations' not in coco.dataset:
|
||||
self.load_image_only = True
|
||||
logger.warning('Annotation file: {} does not contains ground truth '
|
||||
'and load image information only.'.format(anno_path))
|
||||
|
||||
for img_id in img_ids:
|
||||
img_anno = coco.loadImgs([img_id])[0]
|
||||
im_fname = img_anno['file_name']
|
||||
im_w = float(img_anno['width'])
|
||||
im_h = float(img_anno['height'])
|
||||
|
||||
im_path = os.path.join(image_dir,
|
||||
im_fname) if image_dir else im_fname
|
||||
is_empty = False
|
||||
if not os.path.exists(im_path):
|
||||
logger.warning('Illegal image file: {}, and it will be '
|
||||
'ignored'.format(im_path))
|
||||
continue
|
||||
|
||||
if im_w < 0 or im_h < 0:
|
||||
logger.warning('Illegal width: {} or height: {} in annotation, '
|
||||
'and im_id: {} will be ignored'.format(
|
||||
im_w, im_h, img_id))
|
||||
continue
|
||||
|
||||
coco_rec = {
|
||||
'im_file': im_path,
|
||||
'im_id': np.array([img_id]),
|
||||
'h': im_h,
|
||||
'w': im_w,
|
||||
} if 'image' in self.data_fields else {}
|
||||
|
||||
if not self.load_image_only:
|
||||
ins_anno_ids = coco.getAnnIds(
|
||||
imgIds=[img_id], iscrowd=None if self.load_crowd else False)
|
||||
instances = coco.loadAnns(ins_anno_ids)
|
||||
|
||||
bboxes = []
|
||||
is_rbox_anno = False
|
||||
for inst in instances:
|
||||
# check gt bbox
|
||||
if inst.get('ignore', False):
|
||||
continue
|
||||
if 'bbox' not in inst.keys():
|
||||
continue
|
||||
else:
|
||||
if not any(np.array(inst['bbox'])):
|
||||
continue
|
||||
|
||||
x1, y1, box_w, box_h = inst['bbox']
|
||||
x2 = x1 + box_w
|
||||
y2 = y1 + box_h
|
||||
eps = 1e-5
|
||||
if inst['area'] > 0 and x2 - x1 > eps and y2 - y1 > eps:
|
||||
inst['clean_bbox'] = [
|
||||
round(float(x), 3) for x in [x1, y1, x2, y2]
|
||||
]
|
||||
bboxes.append(inst)
|
||||
else:
|
||||
logger.warning(
|
||||
'Found an invalid bbox in annotations: im_id: {}, '
|
||||
'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format(
|
||||
img_id, float(inst['area']), x1, y1, x2, y2))
|
||||
|
||||
num_bbox = len(bboxes)
|
||||
if num_bbox <= 0 and not self.allow_empty:
|
||||
continue
|
||||
elif num_bbox <= 0:
|
||||
is_empty = True
|
||||
|
||||
gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
|
||||
gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
|
||||
is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
|
||||
gt_poly = [None] * num_bbox
|
||||
gt_track_id = -np.ones((num_bbox, 1), dtype=np.int32)
|
||||
|
||||
has_segmentation = False
|
||||
has_track_id = False
|
||||
for i, box in enumerate(bboxes):
|
||||
catid = box['category_id']
|
||||
gt_class[i][0] = self.catid2clsid[catid]
|
||||
gt_bbox[i, :] = box['clean_bbox']
|
||||
is_crowd[i][0] = box['iscrowd']
|
||||
# check RLE format
|
||||
if 'segmentation' in box and box['iscrowd'] == 1:
|
||||
gt_poly[i] = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
|
||||
elif 'segmentation' in box and box['segmentation']:
|
||||
if not np.array(
|
||||
box['segmentation'],
|
||||
dtype=object).size > 0 and not self.allow_empty:
|
||||
bboxes.pop(i)
|
||||
gt_poly.pop(i)
|
||||
np.delete(is_crowd, i)
|
||||
np.delete(gt_class, i)
|
||||
np.delete(gt_bbox, i)
|
||||
else:
|
||||
gt_poly[i] = box['segmentation']
|
||||
has_segmentation = True
|
||||
|
||||
if 'track_id' in box:
|
||||
gt_track_id[i][0] = box['track_id']
|
||||
has_track_id = True
|
||||
|
||||
if has_segmentation and not any(
|
||||
gt_poly) and not self.allow_empty:
|
||||
continue
|
||||
|
||||
gt_rec = {
|
||||
'is_crowd': is_crowd,
|
||||
'gt_class': gt_class,
|
||||
'gt_bbox': gt_bbox,
|
||||
'gt_poly': gt_poly,
|
||||
}
|
||||
if has_track_id:
|
||||
gt_rec.update({'gt_track_id': gt_track_id})
|
||||
|
||||
for k, v in gt_rec.items():
|
||||
if k in self.data_fields:
|
||||
coco_rec[k] = v
|
||||
|
||||
# TODO: remove load_semantic
|
||||
if self.load_semantic and 'semantic' in self.data_fields:
|
||||
seg_path = os.path.join(self.dataset_dir, 'stuffthingmaps',
|
||||
'train2017', im_fname[:-3] + 'png')
|
||||
coco_rec.update({'semantic': seg_path})
|
||||
|
||||
logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format(
|
||||
im_path, img_id, im_h, im_w))
|
||||
if is_empty:
|
||||
empty_records.append(coco_rec)
|
||||
else:
|
||||
records.append(coco_rec)
|
||||
ct += 1
|
||||
if self.sample_num > 0 and ct >= self.sample_num:
|
||||
break
|
||||
assert ct > 0, 'not found any coco record in %s' % (anno_path)
|
||||
logger.info('Load [{} samples valid, {} samples invalid] in file {}.'.
|
||||
format(ct, len(img_ids) - ct, anno_path))
|
||||
if self.allow_empty and len(empty_records) > 0:
|
||||
empty_records = self._sample_empty(empty_records, len(records))
|
||||
records += empty_records
|
||||
self.roidbs = records
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class SlicedCOCODataSet(COCODataSet):
|
||||
"""Sliced COCODataSet"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
dataset_dir=None,
|
||||
image_dir=None,
|
||||
anno_path=None,
|
||||
data_fields=['image'],
|
||||
sample_num=-1,
|
||||
load_crowd=False,
|
||||
allow_empty=False,
|
||||
empty_ratio=1.,
|
||||
repeat=1,
|
||||
sliced_size=[640, 640],
|
||||
overlap_ratio=[0.25, 0.25], ):
|
||||
super(SlicedCOCODataSet, self).__init__(
|
||||
dataset_dir=dataset_dir,
|
||||
image_dir=image_dir,
|
||||
anno_path=anno_path,
|
||||
data_fields=data_fields,
|
||||
sample_num=sample_num,
|
||||
load_crowd=load_crowd,
|
||||
allow_empty=allow_empty,
|
||||
empty_ratio=empty_ratio,
|
||||
repeat=repeat, )
|
||||
self.sliced_size = sliced_size
|
||||
self.overlap_ratio = overlap_ratio
|
||||
|
||||
def parse_dataset(self):
|
||||
anno_path = os.path.join(self.dataset_dir, self.anno_path)
|
||||
image_dir = os.path.join(self.dataset_dir, self.image_dir)
|
||||
|
||||
assert anno_path.endswith('.json'), \
|
||||
'invalid coco annotation file: ' + anno_path
|
||||
from pycocotools.coco import COCO
|
||||
coco = COCO(anno_path)
|
||||
img_ids = coco.getImgIds()
|
||||
img_ids.sort()
|
||||
cat_ids = coco.getCatIds()
|
||||
records = []
|
||||
empty_records = []
|
||||
ct = 0
|
||||
ct_sub = 0
|
||||
|
||||
self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
|
||||
self.cname2cid = dict({
|
||||
coco.loadCats(catid)[0]['name']: clsid
|
||||
for catid, clsid in self.catid2clsid.items()
|
||||
})
|
||||
|
||||
if 'annotations' not in coco.dataset:
|
||||
self.load_image_only = True
|
||||
logger.warning('Annotation file: {} does not contains ground truth '
|
||||
'and load image information only.'.format(anno_path))
|
||||
try:
|
||||
import sahi
|
||||
from sahi.slicing import slice_image
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'sahi not found, plaese install sahi. '
|
||||
'for example: `pip install sahi`, see https://github.com/obss/sahi.'
|
||||
)
|
||||
raise e
|
||||
|
||||
sub_img_ids = 0
|
||||
for img_id in img_ids:
|
||||
img_anno = coco.loadImgs([img_id])[0]
|
||||
im_fname = img_anno['file_name']
|
||||
im_w = float(img_anno['width'])
|
||||
im_h = float(img_anno['height'])
|
||||
|
||||
im_path = os.path.join(image_dir,
|
||||
im_fname) if image_dir else im_fname
|
||||
is_empty = False
|
||||
if not os.path.exists(im_path):
|
||||
logger.warning('Illegal image file: {}, and it will be '
|
||||
'ignored'.format(im_path))
|
||||
continue
|
||||
|
||||
if im_w < 0 or im_h < 0:
|
||||
logger.warning('Illegal width: {} or height: {} in annotation, '
|
||||
'and im_id: {} will be ignored'.format(
|
||||
im_w, im_h, img_id))
|
||||
continue
|
||||
|
||||
slice_image_result = sahi.slicing.slice_image(
|
||||
image=im_path,
|
||||
slice_height=self.sliced_size[0],
|
||||
slice_width=self.sliced_size[1],
|
||||
overlap_height_ratio=self.overlap_ratio[0],
|
||||
overlap_width_ratio=self.overlap_ratio[1])
|
||||
|
||||
sub_img_num = len(slice_image_result)
|
||||
for _ind in range(sub_img_num):
|
||||
im = slice_image_result.images[_ind]
|
||||
coco_rec = {
|
||||
'image': im,
|
||||
'im_id': np.array([sub_img_ids + _ind]),
|
||||
'h': im.shape[0],
|
||||
'w': im.shape[1],
|
||||
'ori_im_id': np.array([img_id]),
|
||||
'st_pix': np.array(
|
||||
slice_image_result.starting_pixels[_ind],
|
||||
dtype=np.float32),
|
||||
'is_last': 1 if _ind == sub_img_num - 1 else 0,
|
||||
} if 'image' in self.data_fields else {}
|
||||
records.append(coco_rec)
|
||||
ct_sub += sub_img_num
|
||||
ct += 1
|
||||
if self.sample_num > 0 and ct >= self.sample_num:
|
||||
break
|
||||
assert ct > 0, 'not found any coco record in %s' % (anno_path)
|
||||
logger.info('{} samples and slice to {} sub_samples in file {}'.format(
|
||||
ct, ct_sub, anno_path))
|
||||
if self.allow_empty and len(empty_records) > 0:
|
||||
empty_records = self._sample_empty(empty_records, len(records))
|
||||
records += empty_records
|
||||
self.roidbs = records
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class SemiCOCODataSet(COCODataSet):
|
||||
"""Semi-COCODataSet used for supervised and unsupervised dataSet"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir=None,
|
||||
image_dir=None,
|
||||
anno_path=None,
|
||||
data_fields=['image'],
|
||||
sample_num=-1,
|
||||
load_crowd=False,
|
||||
allow_empty=False,
|
||||
empty_ratio=1.,
|
||||
repeat=1,
|
||||
supervised=True):
|
||||
super(SemiCOCODataSet, self).__init__(
|
||||
dataset_dir, image_dir, anno_path, data_fields, sample_num,
|
||||
load_crowd, allow_empty, empty_ratio, repeat)
|
||||
self.supervised = supervised
|
||||
self.length = -1 # defalut -1 means all
|
||||
|
||||
def parse_dataset(self):
|
||||
anno_path = os.path.join(self.dataset_dir, self.anno_path)
|
||||
image_dir = os.path.join(self.dataset_dir, self.image_dir)
|
||||
|
||||
assert anno_path.endswith('.json'), \
|
||||
'invalid coco annotation file: ' + anno_path
|
||||
from pycocotools.coco import COCO
|
||||
coco = COCO(anno_path)
|
||||
img_ids = coco.getImgIds()
|
||||
img_ids.sort()
|
||||
cat_ids = coco.getCatIds()
|
||||
records = []
|
||||
empty_records = []
|
||||
ct = 0
|
||||
|
||||
self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
|
||||
self.cname2cid = dict({
|
||||
coco.loadCats(catid)[0]['name']: clsid
|
||||
for catid, clsid in self.catid2clsid.items()
|
||||
})
|
||||
|
||||
if 'annotations' not in coco.dataset or self.supervised == False:
|
||||
self.load_image_only = True
|
||||
logger.warning('Annotation file: {} does not contains ground truth '
|
||||
'and load image information only.'.format(anno_path))
|
||||
|
||||
for img_id in img_ids:
|
||||
img_anno = coco.loadImgs([img_id])[0]
|
||||
im_fname = img_anno['file_name']
|
||||
im_w = float(img_anno['width'])
|
||||
im_h = float(img_anno['height'])
|
||||
|
||||
im_path = os.path.join(image_dir,
|
||||
im_fname) if image_dir else im_fname
|
||||
is_empty = False
|
||||
if not os.path.exists(im_path):
|
||||
logger.warning('Illegal image file: {}, and it will be '
|
||||
'ignored'.format(im_path))
|
||||
continue
|
||||
|
||||
if im_w < 0 or im_h < 0:
|
||||
logger.warning('Illegal width: {} or height: {} in annotation, '
|
||||
'and im_id: {} will be ignored'.format(
|
||||
im_w, im_h, img_id))
|
||||
continue
|
||||
|
||||
coco_rec = {
|
||||
'im_file': im_path,
|
||||
'im_id': np.array([img_id]),
|
||||
'h': im_h,
|
||||
'w': im_w,
|
||||
} if 'image' in self.data_fields else {}
|
||||
|
||||
if not self.load_image_only:
|
||||
ins_anno_ids = coco.getAnnIds(
|
||||
imgIds=[img_id], iscrowd=None if self.load_crowd else False)
|
||||
instances = coco.loadAnns(ins_anno_ids)
|
||||
|
||||
bboxes = []
|
||||
is_rbox_anno = False
|
||||
for inst in instances:
|
||||
# check gt bbox
|
||||
if inst.get('ignore', False):
|
||||
continue
|
||||
if 'bbox' not in inst.keys():
|
||||
continue
|
||||
else:
|
||||
if not any(np.array(inst['bbox'])):
|
||||
continue
|
||||
|
||||
x1, y1, box_w, box_h = inst['bbox']
|
||||
x2 = x1 + box_w
|
||||
y2 = y1 + box_h
|
||||
eps = 1e-5
|
||||
if inst['area'] > 0 and x2 - x1 > eps and y2 - y1 > eps:
|
||||
inst['clean_bbox'] = [
|
||||
round(float(x), 3) for x in [x1, y1, x2, y2]
|
||||
]
|
||||
bboxes.append(inst)
|
||||
else:
|
||||
logger.warning(
|
||||
'Found an invalid bbox in annotations: im_id: {}, '
|
||||
'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format(
|
||||
img_id, float(inst['area']), x1, y1, x2, y2))
|
||||
|
||||
num_bbox = len(bboxes)
|
||||
if num_bbox <= 0 and not self.allow_empty:
|
||||
continue
|
||||
elif num_bbox <= 0:
|
||||
is_empty = True
|
||||
|
||||
gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
|
||||
gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
|
||||
is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
|
||||
gt_poly = [None] * num_bbox
|
||||
|
||||
has_segmentation = False
|
||||
for i, box in enumerate(bboxes):
|
||||
catid = box['category_id']
|
||||
gt_class[i][0] = self.catid2clsid[catid]
|
||||
gt_bbox[i, :] = box['clean_bbox']
|
||||
is_crowd[i][0] = box['iscrowd']
|
||||
# check RLE format
|
||||
if 'segmentation' in box and box['iscrowd'] == 1:
|
||||
gt_poly[i] = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
|
||||
elif 'segmentation' in box and box['segmentation']:
|
||||
if not np.array(box['segmentation']
|
||||
).size > 0 and not self.allow_empty:
|
||||
bboxes.pop(i)
|
||||
gt_poly.pop(i)
|
||||
np.delete(is_crowd, i)
|
||||
np.delete(gt_class, i)
|
||||
np.delete(gt_bbox, i)
|
||||
else:
|
||||
gt_poly[i] = box['segmentation']
|
||||
has_segmentation = True
|
||||
|
||||
if has_segmentation and not any(
|
||||
gt_poly) and not self.allow_empty:
|
||||
continue
|
||||
|
||||
gt_rec = {
|
||||
'is_crowd': is_crowd,
|
||||
'gt_class': gt_class,
|
||||
'gt_bbox': gt_bbox,
|
||||
'gt_poly': gt_poly,
|
||||
}
|
||||
|
||||
for k, v in gt_rec.items():
|
||||
if k in self.data_fields:
|
||||
coco_rec[k] = v
|
||||
|
||||
# TODO: remove load_semantic
|
||||
if self.load_semantic and 'semantic' in self.data_fields:
|
||||
seg_path = os.path.join(self.dataset_dir, 'stuffthingmaps',
|
||||
'train2017', im_fname[:-3] + 'png')
|
||||
coco_rec.update({'semantic': seg_path})
|
||||
|
||||
logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format(
|
||||
im_path, img_id, im_h, im_w))
|
||||
if is_empty:
|
||||
empty_records.append(coco_rec)
|
||||
else:
|
||||
records.append(coco_rec)
|
||||
ct += 1
|
||||
if self.sample_num > 0 and ct >= self.sample_num:
|
||||
break
|
||||
assert ct > 0, 'not found any coco record in %s' % (anno_path)
|
||||
logger.info('Load [{} samples valid, {} samples invalid] in file {}.'.
|
||||
format(ct, len(img_ids) - ct, anno_path))
|
||||
if self.allow_empty and len(empty_records) > 0:
|
||||
empty_records = self._sample_empty(empty_records, len(records))
|
||||
records += empty_records
|
||||
self.roidbs = records
|
||||
|
||||
if self.supervised:
|
||||
logger.info(f'Use {len(self.roidbs)} sup_samples data as LABELED')
|
||||
else:
|
||||
if self.length > 0: # unsup length will be decide by sup length
|
||||
all_roidbs = self.roidbs.copy()
|
||||
selected_idxs = [
|
||||
np.random.choice(len(all_roidbs))
|
||||
for _ in range(self.length)
|
||||
]
|
||||
self.roidbs = [all_roidbs[i] for i in selected_idxs]
|
||||
logger.info(
|
||||
f'Use {len(self.roidbs)} unsup_samples data as UNLABELED')
|
||||
|
||||
def __getitem__(self, idx):
|
||||
n = len(self.roidbs)
|
||||
if self.repeat > 1:
|
||||
idx %= n
|
||||
# data batch
|
||||
roidb = copy.deepcopy(self.roidbs[idx])
|
||||
if self.mixup_epoch == 0 or self._epoch < self.mixup_epoch:
|
||||
idx = np.random.randint(n)
|
||||
roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
|
||||
elif self.cutmix_epoch == 0 or self._epoch < self.cutmix_epoch:
|
||||
idx = np.random.randint(n)
|
||||
roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
|
||||
elif self.mosaic_epoch == 0 or self._epoch < self.mosaic_epoch:
|
||||
roidb = [roidb, ] + [
|
||||
copy.deepcopy(self.roidbs[np.random.randint(n)])
|
||||
for _ in range(4)
|
||||
]
|
||||
if isinstance(roidb, Sequence):
|
||||
for r in roidb:
|
||||
r['curr_iter'] = self._curr_iter
|
||||
else:
|
||||
roidb['curr_iter'] = self._curr_iter
|
||||
self._curr_iter += 1
|
||||
|
||||
return self.transform(roidb)
|
||||
|
||||
|
||||
# for PaddleX
|
||||
@register
|
||||
@serializable
|
||||
class COCODetDataset(COCODataSet):
|
||||
pass
|
||||
206
paddle_detection/ppdet/data/source/culane.py
Normal file
206
paddle_detection/ppdet/data/source/culane.py
Normal file
@@ -0,0 +1,206 @@
|
||||
from ppdet.core.workspace import register, serializable
|
||||
import cv2
|
||||
import os
|
||||
import tarfile
|
||||
import numpy as np
|
||||
import os.path as osp
|
||||
from ppdet.data.source.dataset import DetDataset
|
||||
from imgaug.augmentables.lines import LineStringsOnImage
|
||||
from imgaug.augmentables.segmaps import SegmentationMapsOnImage
|
||||
from ppdet.data.culane_utils import lane_to_linestrings
|
||||
import pickle as pkl
|
||||
from ppdet.utils.logger import setup_logger
|
||||
try:
|
||||
from collections.abc import Sequence
|
||||
except Exception:
|
||||
from collections import Sequence
|
||||
from .dataset import DetDataset, _make_dataset, _is_valid_file
|
||||
from ppdet.utils.download import download_dataset
|
||||
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class CULaneDataSet(DetDataset):
|
||||
def __init__(
|
||||
self,
|
||||
dataset_dir,
|
||||
cut_height,
|
||||
list_path,
|
||||
split='train',
|
||||
data_fields=['image'],
|
||||
video_file=None,
|
||||
frame_rate=-1, ):
|
||||
super(CULaneDataSet, self).__init__(
|
||||
dataset_dir=dataset_dir,
|
||||
cut_height=cut_height,
|
||||
split=split,
|
||||
data_fields=data_fields)
|
||||
self.dataset_dir = dataset_dir
|
||||
self.list_path = osp.join(dataset_dir, list_path)
|
||||
self.cut_height = cut_height
|
||||
self.data_fields = data_fields
|
||||
self.split = split
|
||||
self.training = 'train' in split
|
||||
self.data_infos = []
|
||||
self.video_file = video_file
|
||||
self.frame_rate = frame_rate
|
||||
self._imid2path = {}
|
||||
self.predict_dir = None
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data_infos)
|
||||
|
||||
def check_or_download_dataset(self):
|
||||
if not osp.exists(self.dataset_dir):
|
||||
download_dataset("dataset", dataset="culane")
|
||||
# extract .tar files in self.dataset_dir
|
||||
for fname in os.listdir(self.dataset_dir):
|
||||
logger.info("Decompressing {}...".format(fname))
|
||||
# ignore .* files
|
||||
if fname.startswith('.'):
|
||||
continue
|
||||
if fname.find('.tar.gz') >= 0:
|
||||
with tarfile.open(osp.join(self.dataset_dir, fname)) as tf:
|
||||
tf.extractall(path=self.dataset_dir)
|
||||
logger.info("Dataset files are ready.")
|
||||
|
||||
def parse_dataset(self):
|
||||
logger.info('Loading CULane annotations...')
|
||||
if self.predict_dir is not None:
|
||||
logger.info('switch to predict mode')
|
||||
return
|
||||
# Waiting for the dataset to load is tedious, let's cache it
|
||||
os.makedirs('cache', exist_ok=True)
|
||||
cache_path = 'cache/culane_paddle_{}.pkl'.format(self.split)
|
||||
if os.path.exists(cache_path):
|
||||
with open(cache_path, 'rb') as cache_file:
|
||||
self.data_infos = pkl.load(cache_file)
|
||||
self.max_lanes = max(
|
||||
len(anno['lanes']) for anno in self.data_infos)
|
||||
return
|
||||
|
||||
with open(self.list_path) as list_file:
|
||||
for line in list_file:
|
||||
infos = self.load_annotation(line.split())
|
||||
self.data_infos.append(infos)
|
||||
|
||||
# cache data infos to file
|
||||
with open(cache_path, 'wb') as cache_file:
|
||||
pkl.dump(self.data_infos, cache_file)
|
||||
|
||||
def load_annotation(self, line):
|
||||
infos = {}
|
||||
img_line = line[0]
|
||||
img_line = img_line[1 if img_line[0] == '/' else 0::]
|
||||
img_path = os.path.join(self.dataset_dir, img_line)
|
||||
infos['img_name'] = img_line
|
||||
infos['img_path'] = img_path
|
||||
if len(line) > 1:
|
||||
mask_line = line[1]
|
||||
mask_line = mask_line[1 if mask_line[0] == '/' else 0::]
|
||||
mask_path = os.path.join(self.dataset_dir, mask_line)
|
||||
infos['mask_path'] = mask_path
|
||||
|
||||
if len(line) > 2:
|
||||
exist_list = [int(l) for l in line[2:]]
|
||||
infos['lane_exist'] = np.array(exist_list)
|
||||
|
||||
anno_path = img_path[:
|
||||
-3] + 'lines.txt' # remove sufix jpg and add lines.txt
|
||||
with open(anno_path, 'r') as anno_file:
|
||||
data = [
|
||||
list(map(float, line.split())) for line in anno_file.readlines()
|
||||
]
|
||||
lanes = [[(lane[i], lane[i + 1]) for i in range(0, len(lane), 2)
|
||||
if lane[i] >= 0 and lane[i + 1] >= 0] for lane in data]
|
||||
lanes = [list(set(lane)) for lane in lanes] # remove duplicated points
|
||||
lanes = [lane for lane in lanes
|
||||
if len(lane) > 2] # remove lanes with less than 2 points
|
||||
|
||||
lanes = [sorted(
|
||||
lane, key=lambda x: x[1]) for lane in lanes] # sort by y
|
||||
infos['lanes'] = lanes
|
||||
|
||||
return infos
|
||||
|
||||
def set_images(self, images):
|
||||
self.predict_dir = images
|
||||
self.data_infos = self._load_images()
|
||||
|
||||
def _find_images(self):
|
||||
predict_dir = self.predict_dir
|
||||
if not isinstance(predict_dir, Sequence):
|
||||
predict_dir = [predict_dir]
|
||||
images = []
|
||||
for im_dir in predict_dir:
|
||||
if os.path.isdir(im_dir):
|
||||
im_dir = os.path.join(self.predict_dir, im_dir)
|
||||
images.extend(_make_dataset(im_dir))
|
||||
elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
|
||||
images.append(im_dir)
|
||||
return images
|
||||
|
||||
def _load_images(self):
|
||||
images = self._find_images()
|
||||
ct = 0
|
||||
records = []
|
||||
for image in images:
|
||||
assert image != '' and os.path.isfile(image), \
|
||||
"Image {} not found".format(image)
|
||||
if self.sample_num > 0 and ct >= self.sample_num:
|
||||
break
|
||||
rec = {
|
||||
'im_id': np.array([ct]),
|
||||
"img_path": os.path.abspath(image),
|
||||
"img_name": os.path.basename(image),
|
||||
"lanes": []
|
||||
}
|
||||
self._imid2path[ct] = image
|
||||
ct += 1
|
||||
records.append(rec)
|
||||
assert len(records) > 0, "No image file found"
|
||||
return records
|
||||
|
||||
def get_imid2path(self):
|
||||
return self._imid2path
|
||||
|
||||
def __getitem__(self, idx):
|
||||
data_info = self.data_infos[idx]
|
||||
img = cv2.imread(data_info['img_path'])
|
||||
img = img[self.cut_height:, :, :]
|
||||
sample = data_info.copy()
|
||||
sample.update({'image': img})
|
||||
img_org = sample['image']
|
||||
|
||||
if self.training:
|
||||
label = cv2.imread(sample['mask_path'], cv2.IMREAD_UNCHANGED)
|
||||
if len(label.shape) > 2:
|
||||
label = label[:, :, 0]
|
||||
label = label.squeeze()
|
||||
label = label[self.cut_height:, :]
|
||||
sample.update({'mask': label})
|
||||
if self.cut_height != 0:
|
||||
new_lanes = []
|
||||
for i in sample['lanes']:
|
||||
lanes = []
|
||||
for p in i:
|
||||
lanes.append((p[0], p[1] - self.cut_height))
|
||||
new_lanes.append(lanes)
|
||||
sample.update({'lanes': new_lanes})
|
||||
|
||||
sample['mask'] = SegmentationMapsOnImage(
|
||||
sample['mask'], shape=img_org.shape)
|
||||
|
||||
sample['full_img_path'] = data_info['img_path']
|
||||
sample['img_name'] = data_info['img_name']
|
||||
sample['im_id'] = np.array([idx])
|
||||
|
||||
sample['image'] = sample['image'].copy().astype(np.uint8)
|
||||
sample['lanes'] = lane_to_linestrings(sample['lanes'])
|
||||
sample['lanes'] = LineStringsOnImage(
|
||||
sample['lanes'], shape=img_org.shape)
|
||||
sample['seg'] = np.zeros(img_org.shape)
|
||||
|
||||
return sample
|
||||
307
paddle_detection/ppdet/data/source/dataset.py
Normal file
307
paddle_detection/ppdet/data/source/dataset.py
Normal file
@@ -0,0 +1,307 @@
|
||||
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import copy
|
||||
import numpy as np
|
||||
try:
|
||||
from collections.abc import Sequence
|
||||
except Exception:
|
||||
from collections import Sequence
|
||||
from paddle.io import Dataset
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from ppdet.utils.download import get_dataset_path
|
||||
from ppdet.data import source
|
||||
|
||||
from ppdet.utils.logger import setup_logger
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
|
||||
@serializable
|
||||
class DetDataset(Dataset):
|
||||
"""
|
||||
Load detection dataset.
|
||||
|
||||
Args:
|
||||
dataset_dir (str): root directory for dataset.
|
||||
image_dir (str): directory for images.
|
||||
anno_path (str): annotation file path.
|
||||
data_fields (list): key name of data dictionary, at least have 'image'.
|
||||
sample_num (int): number of samples to load, -1 means all.
|
||||
use_default_label (bool): whether to load default label list.
|
||||
repeat (int): repeat times for dataset, use in benchmark.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir=None,
|
||||
image_dir=None,
|
||||
anno_path=None,
|
||||
data_fields=['image'],
|
||||
sample_num=-1,
|
||||
use_default_label=None,
|
||||
repeat=1,
|
||||
**kwargs):
|
||||
super(DetDataset, self).__init__()
|
||||
self.dataset_dir = dataset_dir if dataset_dir is not None else ''
|
||||
self.anno_path = anno_path
|
||||
self.image_dir = image_dir if image_dir is not None else ''
|
||||
self.data_fields = data_fields
|
||||
self.sample_num = sample_num
|
||||
self.use_default_label = use_default_label
|
||||
self.repeat = repeat
|
||||
self._epoch = 0
|
||||
self._curr_iter = 0
|
||||
|
||||
def __len__(self, ):
|
||||
return len(self.roidbs) * self.repeat
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
return self
|
||||
|
||||
def __getitem__(self, idx):
|
||||
n = len(self.roidbs)
|
||||
if self.repeat > 1:
|
||||
idx %= n
|
||||
# data batch
|
||||
roidb = copy.deepcopy(self.roidbs[idx])
|
||||
if self.mixup_epoch == 0 or self._epoch < self.mixup_epoch:
|
||||
idx = np.random.randint(n)
|
||||
roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
|
||||
elif self.cutmix_epoch == 0 or self._epoch < self.cutmix_epoch:
|
||||
idx = np.random.randint(n)
|
||||
roidb = [roidb, copy.deepcopy(self.roidbs[idx])]
|
||||
elif self.mosaic_epoch == 0 or self._epoch < self.mosaic_epoch:
|
||||
roidb = [roidb, ] + [
|
||||
copy.deepcopy(self.roidbs[np.random.randint(n)])
|
||||
for _ in range(4)
|
||||
]
|
||||
elif self.pre_img_epoch == 0 or self._epoch < self.pre_img_epoch:
|
||||
# Add previous image as input, only used in CenterTrack
|
||||
idx_pre_img = idx - 1
|
||||
if idx_pre_img < 0:
|
||||
idx_pre_img = idx + 1
|
||||
roidb = [roidb, ] + [copy.deepcopy(self.roidbs[idx_pre_img])]
|
||||
if isinstance(roidb, Sequence):
|
||||
for r in roidb:
|
||||
r['curr_iter'] = self._curr_iter
|
||||
else:
|
||||
roidb['curr_iter'] = self._curr_iter
|
||||
self._curr_iter += 1
|
||||
|
||||
return self.transform(roidb)
|
||||
|
||||
def check_or_download_dataset(self):
|
||||
self.dataset_dir = get_dataset_path(self.dataset_dir, self.anno_path,
|
||||
self.image_dir)
|
||||
|
||||
def set_kwargs(self, **kwargs):
|
||||
self.mixup_epoch = kwargs.get('mixup_epoch', -1)
|
||||
self.cutmix_epoch = kwargs.get('cutmix_epoch', -1)
|
||||
self.mosaic_epoch = kwargs.get('mosaic_epoch', -1)
|
||||
self.pre_img_epoch = kwargs.get('pre_img_epoch', -1)
|
||||
|
||||
def set_transform(self, transform):
|
||||
self.transform = transform
|
||||
|
||||
def set_epoch(self, epoch_id):
|
||||
self._epoch = epoch_id
|
||||
|
||||
def parse_dataset(self, ):
|
||||
raise NotImplementedError(
|
||||
"Need to implement parse_dataset method of Dataset")
|
||||
|
||||
def get_anno(self):
|
||||
if self.anno_path is None:
|
||||
return
|
||||
return os.path.join(self.dataset_dir, self.anno_path)
|
||||
|
||||
|
||||
def _is_valid_file(f, extensions=('.jpg', '.jpeg', '.png', '.bmp')):
|
||||
return f.lower().endswith(extensions)
|
||||
|
||||
|
||||
def _make_dataset(dir):
|
||||
dir = os.path.expanduser(dir)
|
||||
if not os.path.isdir(dir):
|
||||
raise ('{} should be a dir'.format(dir))
|
||||
images = []
|
||||
for root, _, fnames in sorted(os.walk(dir, followlinks=True)):
|
||||
for fname in sorted(fnames):
|
||||
path = os.path.join(root, fname)
|
||||
if _is_valid_file(path):
|
||||
images.append(path)
|
||||
return images
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class ImageFolder(DetDataset):
|
||||
def __init__(self,
|
||||
dataset_dir=None,
|
||||
image_dir=None,
|
||||
anno_path=None,
|
||||
sample_num=-1,
|
||||
use_default_label=None,
|
||||
**kwargs):
|
||||
super(ImageFolder, self).__init__(
|
||||
dataset_dir,
|
||||
image_dir,
|
||||
anno_path,
|
||||
sample_num=sample_num,
|
||||
use_default_label=use_default_label)
|
||||
self._imid2path = {}
|
||||
self.roidbs = None
|
||||
self.sample_num = sample_num
|
||||
|
||||
def check_or_download_dataset(self):
|
||||
return
|
||||
|
||||
def get_anno(self):
|
||||
if self.anno_path is None:
|
||||
return
|
||||
if self.dataset_dir:
|
||||
return os.path.join(self.dataset_dir, self.anno_path)
|
||||
else:
|
||||
return self.anno_path
|
||||
|
||||
def parse_dataset(self, ):
|
||||
if not self.roidbs:
|
||||
self.roidbs = self._load_images()
|
||||
|
||||
def _parse(self):
|
||||
image_dir = self.image_dir
|
||||
if not isinstance(image_dir, Sequence):
|
||||
image_dir = [image_dir]
|
||||
images = []
|
||||
for im_dir in image_dir:
|
||||
if os.path.isdir(im_dir):
|
||||
im_dir = os.path.join(self.dataset_dir, im_dir)
|
||||
images.extend(_make_dataset(im_dir))
|
||||
elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
|
||||
images.append(im_dir)
|
||||
return images
|
||||
|
||||
def _load_images(self):
|
||||
images = self._parse()
|
||||
ct = 0
|
||||
records = []
|
||||
for image in images:
|
||||
assert image != '' and os.path.isfile(image), \
|
||||
"Image {} not found".format(image)
|
||||
if self.sample_num > 0 and ct >= self.sample_num:
|
||||
break
|
||||
rec = {'im_id': np.array([ct]), 'im_file': image}
|
||||
self._imid2path[ct] = image
|
||||
ct += 1
|
||||
records.append(rec)
|
||||
assert len(records) > 0, "No image file found"
|
||||
return records
|
||||
|
||||
def get_imid2path(self):
|
||||
return self._imid2path
|
||||
|
||||
def set_images(self, images):
|
||||
self.image_dir = images
|
||||
self.roidbs = self._load_images()
|
||||
|
||||
def set_slice_images(self,
|
||||
images,
|
||||
slice_size=[640, 640],
|
||||
overlap_ratio=[0.25, 0.25]):
|
||||
self.image_dir = images
|
||||
ori_records = self._load_images()
|
||||
try:
|
||||
import sahi
|
||||
from sahi.slicing import slice_image
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
'sahi not found, plaese install sahi. '
|
||||
'for example: `pip install sahi`, see https://github.com/obss/sahi.'
|
||||
)
|
||||
raise e
|
||||
|
||||
sub_img_ids = 0
|
||||
ct = 0
|
||||
ct_sub = 0
|
||||
records = []
|
||||
for i, ori_rec in enumerate(ori_records):
|
||||
im_path = ori_rec['im_file']
|
||||
slice_image_result = sahi.slicing.slice_image(
|
||||
image=im_path,
|
||||
slice_height=slice_size[0],
|
||||
slice_width=slice_size[1],
|
||||
overlap_height_ratio=overlap_ratio[0],
|
||||
overlap_width_ratio=overlap_ratio[1])
|
||||
|
||||
sub_img_num = len(slice_image_result)
|
||||
for _ind in range(sub_img_num):
|
||||
im = slice_image_result.images[_ind]
|
||||
rec = {
|
||||
'image': im,
|
||||
'im_id': np.array([sub_img_ids + _ind]),
|
||||
'h': im.shape[0],
|
||||
'w': im.shape[1],
|
||||
'ori_im_id': np.array([ori_rec['im_id'][0]]),
|
||||
'st_pix': np.array(
|
||||
slice_image_result.starting_pixels[_ind],
|
||||
dtype=np.float32),
|
||||
'is_last': 1 if _ind == sub_img_num - 1 else 0,
|
||||
} if 'image' in self.data_fields else {}
|
||||
records.append(rec)
|
||||
ct_sub += sub_img_num
|
||||
ct += 1
|
||||
logger.info('{} samples and slice to {} sub_samples.'.format(ct,
|
||||
ct_sub))
|
||||
self.roidbs = records
|
||||
|
||||
def get_label_list(self):
|
||||
# Only VOC dataset needs label list in ImageFold
|
||||
return self.anno_path
|
||||
|
||||
|
||||
@register
|
||||
class CommonDataset(object):
|
||||
def __init__(self, **dataset_args):
|
||||
super(CommonDataset, self).__init__()
|
||||
dataset_args = copy.deepcopy(dataset_args)
|
||||
type = dataset_args.pop("name")
|
||||
self.dataset = getattr(source, type)(**dataset_args)
|
||||
|
||||
def __call__(self):
|
||||
return self.dataset
|
||||
|
||||
|
||||
@register
|
||||
class TrainDataset(CommonDataset):
|
||||
pass
|
||||
|
||||
|
||||
@register
|
||||
class EvalMOTDataset(CommonDataset):
|
||||
pass
|
||||
|
||||
|
||||
@register
|
||||
class TestMOTDataset(CommonDataset):
|
||||
pass
|
||||
|
||||
|
||||
@register
|
||||
class EvalDataset(CommonDataset):
|
||||
pass
|
||||
|
||||
|
||||
@register
|
||||
class TestDataset(CommonDataset):
|
||||
pass
|
||||
845
paddle_detection/ppdet/data/source/keypoint_coco.py
Normal file
845
paddle_detection/ppdet/data/source/keypoint_coco.py
Normal file
@@ -0,0 +1,845 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
this code is base on https://github.com/open-mmlab/mmpose
|
||||
"""
|
||||
import os
|
||||
import cv2
|
||||
import numpy as np
|
||||
import json
|
||||
import copy
|
||||
import pycocotools
|
||||
from pycocotools.coco import COCO
|
||||
from .dataset import DetDataset
|
||||
from ppdet.core.workspace import register, serializable
|
||||
|
||||
|
||||
@serializable
|
||||
class KeypointBottomUpBaseDataset(DetDataset):
|
||||
"""Base class for bottom-up datasets.
|
||||
|
||||
All datasets should subclass it.
|
||||
All subclasses should overwrite:
|
||||
Methods:`_get_imganno`
|
||||
|
||||
Args:
|
||||
dataset_dir (str): Root path to the dataset.
|
||||
anno_path (str): Relative path to the annotation file.
|
||||
image_dir (str): Path to a directory where images are held.
|
||||
Default: None.
|
||||
num_joints (int): keypoint numbers
|
||||
transform (composed(operators)): A sequence of data transforms.
|
||||
shard (list): [rank, worldsize], the distributed env params
|
||||
test_mode (bool): Store True when building test or
|
||||
validation dataset. Default: False.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir,
|
||||
image_dir,
|
||||
anno_path,
|
||||
num_joints,
|
||||
transform=[],
|
||||
shard=[0, 1],
|
||||
test_mode=False):
|
||||
super().__init__(dataset_dir, image_dir, anno_path)
|
||||
self.image_info = {}
|
||||
self.ann_info = {}
|
||||
|
||||
self.img_prefix = os.path.join(dataset_dir, image_dir)
|
||||
self.transform = transform
|
||||
self.test_mode = test_mode
|
||||
|
||||
self.ann_info['num_joints'] = num_joints
|
||||
self.img_ids = []
|
||||
|
||||
def parse_dataset(self):
|
||||
pass
|
||||
|
||||
def __len__(self):
|
||||
"""Get dataset length."""
|
||||
return len(self.img_ids)
|
||||
|
||||
def _get_imganno(self, idx):
|
||||
"""Get anno for a single image."""
|
||||
raise NotImplementedError
|
||||
|
||||
def __getitem__(self, idx):
|
||||
"""Prepare image for training given the index."""
|
||||
records = copy.deepcopy(self._get_imganno(idx))
|
||||
records['image'] = cv2.imread(records['image_file'])
|
||||
records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB)
|
||||
if 'mask' in records:
|
||||
records['mask'] = (records['mask'] + 0).astype('uint8')
|
||||
records = self.transform(records)
|
||||
return records
|
||||
|
||||
def parse_dataset(self):
|
||||
return
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class KeypointBottomUpCocoDataset(KeypointBottomUpBaseDataset):
|
||||
"""COCO dataset for bottom-up pose estimation.
|
||||
|
||||
The dataset loads raw features and apply specified transforms
|
||||
to return a dict containing the image tensors and other information.
|
||||
|
||||
COCO keypoint indexes::
|
||||
|
||||
0: 'nose',
|
||||
1: 'left_eye',
|
||||
2: 'right_eye',
|
||||
3: 'left_ear',
|
||||
4: 'right_ear',
|
||||
5: 'left_shoulder',
|
||||
6: 'right_shoulder',
|
||||
7: 'left_elbow',
|
||||
8: 'right_elbow',
|
||||
9: 'left_wrist',
|
||||
10: 'right_wrist',
|
||||
11: 'left_hip',
|
||||
12: 'right_hip',
|
||||
13: 'left_knee',
|
||||
14: 'right_knee',
|
||||
15: 'left_ankle',
|
||||
16: 'right_ankle'
|
||||
|
||||
Args:
|
||||
dataset_dir (str): Root path to the dataset.
|
||||
anno_path (str): Relative path to the annotation file.
|
||||
image_dir (str): Path to a directory where images are held.
|
||||
Default: None.
|
||||
num_joints (int): keypoint numbers
|
||||
transform (composed(operators)): A sequence of data transforms.
|
||||
shard (list): [rank, worldsize], the distributed env params
|
||||
test_mode (bool): Store True when building test or
|
||||
validation dataset. Default: False.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir,
|
||||
image_dir,
|
||||
anno_path,
|
||||
num_joints,
|
||||
transform=[],
|
||||
shard=[0, 1],
|
||||
test_mode=False,
|
||||
return_mask=True,
|
||||
return_bbox=True,
|
||||
return_area=True,
|
||||
return_class=True):
|
||||
super().__init__(dataset_dir, image_dir, anno_path, num_joints,
|
||||
transform, shard, test_mode)
|
||||
|
||||
self.ann_file = os.path.join(dataset_dir, anno_path)
|
||||
self.shard = shard
|
||||
self.test_mode = test_mode
|
||||
self.return_mask = return_mask
|
||||
self.return_bbox = return_bbox
|
||||
self.return_area = return_area
|
||||
self.return_class = return_class
|
||||
|
||||
def parse_dataset(self):
|
||||
self.coco = COCO(self.ann_file)
|
||||
|
||||
self.img_ids = self.coco.getImgIds()
|
||||
if not self.test_mode:
|
||||
self.img_ids_tmp = []
|
||||
for img_id in self.img_ids:
|
||||
ann_ids = self.coco.getAnnIds(imgIds=img_id)
|
||||
anno = self.coco.loadAnns(ann_ids)
|
||||
anno = [obj for obj in anno if obj['iscrowd'] == 0]
|
||||
if len(anno) == 0:
|
||||
continue
|
||||
self.img_ids_tmp.append(img_id)
|
||||
self.img_ids = self.img_ids_tmp
|
||||
|
||||
blocknum = int(len(self.img_ids) / self.shard[1])
|
||||
self.img_ids = self.img_ids[(blocknum * self.shard[0]):(blocknum * (
|
||||
self.shard[0] + 1))]
|
||||
self.num_images = len(self.img_ids)
|
||||
self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)
|
||||
self.dataset_name = 'coco'
|
||||
|
||||
cat_ids = self.coco.getCatIds()
|
||||
self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
|
||||
print('=> num_images: {}'.format(self.num_images))
|
||||
|
||||
@staticmethod
|
||||
def _get_mapping_id_name(imgs):
|
||||
"""
|
||||
Args:
|
||||
imgs (dict): dict of image info.
|
||||
|
||||
Returns:
|
||||
tuple: Image name & id mapping dicts.
|
||||
|
||||
- id2name (dict): Mapping image id to name.
|
||||
- name2id (dict): Mapping image name to id.
|
||||
"""
|
||||
id2name = {}
|
||||
name2id = {}
|
||||
for image_id, image in imgs.items():
|
||||
file_name = image['file_name']
|
||||
id2name[image_id] = file_name
|
||||
name2id[file_name] = image_id
|
||||
|
||||
return id2name, name2id
|
||||
|
||||
def _get_imganno(self, idx):
|
||||
"""Get anno for a single image.
|
||||
|
||||
Args:
|
||||
idx (int): image idx
|
||||
|
||||
Returns:
|
||||
dict: info for model training
|
||||
"""
|
||||
coco = self.coco
|
||||
img_id = self.img_ids[idx]
|
||||
ann_ids = coco.getAnnIds(imgIds=img_id)
|
||||
anno = coco.loadAnns(ann_ids)
|
||||
|
||||
anno = [
|
||||
obj for obj in anno
|
||||
if obj['iscrowd'] == 0 and obj['num_keypoints'] > 0
|
||||
]
|
||||
|
||||
db_rec = {}
|
||||
joints, orgsize = self._get_joints(anno, idx)
|
||||
db_rec['gt_joints'] = joints
|
||||
db_rec['im_shape'] = orgsize
|
||||
|
||||
if self.return_bbox:
|
||||
db_rec['gt_bbox'] = self._get_bboxs(anno, idx)
|
||||
|
||||
if self.return_class:
|
||||
db_rec['gt_class'] = self._get_labels(anno, idx)
|
||||
|
||||
if self.return_area:
|
||||
db_rec['gt_areas'] = self._get_areas(anno, idx)
|
||||
|
||||
if self.return_mask:
|
||||
db_rec['mask'] = self._get_mask(anno, idx)
|
||||
|
||||
db_rec['im_id'] = img_id
|
||||
db_rec['image_file'] = os.path.join(self.img_prefix,
|
||||
self.id2name[img_id])
|
||||
|
||||
return db_rec
|
||||
|
||||
def _get_joints(self, anno, idx):
|
||||
"""Get joints for all people in an image."""
|
||||
num_people = len(anno)
|
||||
|
||||
joints = np.zeros(
|
||||
(num_people, self.ann_info['num_joints'], 3), dtype=np.float32)
|
||||
|
||||
for i, obj in enumerate(anno):
|
||||
joints[i, :self.ann_info['num_joints'], :3] = \
|
||||
np.array(obj['keypoints']).reshape([-1, 3])
|
||||
|
||||
img_info = self.coco.loadImgs(self.img_ids[idx])[0]
|
||||
orgsize = np.array([img_info['height'], img_info['width'], 1])
|
||||
|
||||
return joints, orgsize
|
||||
|
||||
def _get_bboxs(self, anno, idx):
|
||||
num_people = len(anno)
|
||||
gt_bboxes = np.zeros((num_people, 4), dtype=np.float32)
|
||||
|
||||
for idx, obj in enumerate(anno):
|
||||
if 'bbox' in obj:
|
||||
gt_bboxes[idx, :] = obj['bbox']
|
||||
|
||||
gt_bboxes[:, 2] += gt_bboxes[:, 0]
|
||||
gt_bboxes[:, 3] += gt_bboxes[:, 1]
|
||||
return gt_bboxes
|
||||
|
||||
def _get_labels(self, anno, idx):
|
||||
num_people = len(anno)
|
||||
gt_labels = np.zeros((num_people, 1), dtype=np.float32)
|
||||
|
||||
for idx, obj in enumerate(anno):
|
||||
if 'category_id' in obj:
|
||||
catid = obj['category_id']
|
||||
gt_labels[idx, 0] = self.catid2clsid[catid]
|
||||
return gt_labels
|
||||
|
||||
def _get_areas(self, anno, idx):
|
||||
num_people = len(anno)
|
||||
gt_areas = np.zeros((num_people, ), dtype=np.float32)
|
||||
|
||||
for idx, obj in enumerate(anno):
|
||||
if 'area' in obj:
|
||||
gt_areas[idx, ] = obj['area']
|
||||
return gt_areas
|
||||
|
||||
def _get_mask(self, anno, idx):
|
||||
"""Get ignore masks to mask out losses."""
|
||||
coco = self.coco
|
||||
img_info = coco.loadImgs(self.img_ids[idx])[0]
|
||||
|
||||
m = np.zeros((img_info['height'], img_info['width']), dtype=np.float32)
|
||||
|
||||
for obj in anno:
|
||||
if 'segmentation' in obj:
|
||||
if obj['iscrowd']:
|
||||
rle = pycocotools.mask.frPyObjects(obj['segmentation'],
|
||||
img_info['height'],
|
||||
img_info['width'])
|
||||
m += pycocotools.mask.decode(rle)
|
||||
elif obj['num_keypoints'] == 0:
|
||||
rles = pycocotools.mask.frPyObjects(obj['segmentation'],
|
||||
img_info['height'],
|
||||
img_info['width'])
|
||||
for rle in rles:
|
||||
m += pycocotools.mask.decode(rle)
|
||||
|
||||
return m < 0.5
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class KeypointBottomUpCrowdPoseDataset(KeypointBottomUpCocoDataset):
|
||||
"""CrowdPose dataset for bottom-up pose estimation.
|
||||
|
||||
The dataset loads raw features and apply specified transforms
|
||||
to return a dict containing the image tensors and other information.
|
||||
|
||||
CrowdPose keypoint indexes::
|
||||
|
||||
0: 'left_shoulder',
|
||||
1: 'right_shoulder',
|
||||
2: 'left_elbow',
|
||||
3: 'right_elbow',
|
||||
4: 'left_wrist',
|
||||
5: 'right_wrist',
|
||||
6: 'left_hip',
|
||||
7: 'right_hip',
|
||||
8: 'left_knee',
|
||||
9: 'right_knee',
|
||||
10: 'left_ankle',
|
||||
11: 'right_ankle',
|
||||
12: 'top_head',
|
||||
13: 'neck'
|
||||
|
||||
Args:
|
||||
dataset_dir (str): Root path to the dataset.
|
||||
anno_path (str): Relative path to the annotation file.
|
||||
image_dir (str): Path to a directory where images are held.
|
||||
Default: None.
|
||||
num_joints (int): keypoint numbers
|
||||
transform (composed(operators)): A sequence of data transforms.
|
||||
shard (list): [rank, worldsize], the distributed env params
|
||||
test_mode (bool): Store True when building test or
|
||||
validation dataset. Default: False.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir,
|
||||
image_dir,
|
||||
anno_path,
|
||||
num_joints,
|
||||
transform=[],
|
||||
shard=[0, 1],
|
||||
test_mode=False):
|
||||
super().__init__(dataset_dir, image_dir, anno_path, num_joints,
|
||||
transform, shard, test_mode)
|
||||
|
||||
self.ann_file = os.path.join(dataset_dir, anno_path)
|
||||
self.shard = shard
|
||||
self.test_mode = test_mode
|
||||
|
||||
def parse_dataset(self):
|
||||
self.coco = COCO(self.ann_file)
|
||||
|
||||
self.img_ids = self.coco.getImgIds()
|
||||
if not self.test_mode:
|
||||
self.img_ids = [
|
||||
img_id for img_id in self.img_ids
|
||||
if len(self.coco.getAnnIds(
|
||||
imgIds=img_id, iscrowd=None)) > 0
|
||||
]
|
||||
blocknum = int(len(self.img_ids) / self.shard[1])
|
||||
self.img_ids = self.img_ids[(blocknum * self.shard[0]):(blocknum * (
|
||||
self.shard[0] + 1))]
|
||||
self.num_images = len(self.img_ids)
|
||||
self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)
|
||||
|
||||
self.dataset_name = 'crowdpose'
|
||||
print('=> num_images: {}'.format(self.num_images))
|
||||
|
||||
|
||||
@serializable
|
||||
class KeypointTopDownBaseDataset(DetDataset):
|
||||
"""Base class for top_down datasets.
|
||||
|
||||
All datasets should subclass it.
|
||||
All subclasses should overwrite:
|
||||
Methods:`_get_db`
|
||||
|
||||
Args:
|
||||
dataset_dir (str): Root path to the dataset.
|
||||
image_dir (str): Path to a directory where images are held.
|
||||
anno_path (str): Relative path to the annotation file.
|
||||
num_joints (int): keypoint numbers
|
||||
transform (composed(operators)): A sequence of data transforms.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir,
|
||||
image_dir,
|
||||
anno_path,
|
||||
num_joints,
|
||||
transform=[]):
|
||||
super().__init__(dataset_dir, image_dir, anno_path)
|
||||
self.image_info = {}
|
||||
self.ann_info = {}
|
||||
|
||||
self.img_prefix = os.path.join(dataset_dir, image_dir)
|
||||
self.transform = transform
|
||||
|
||||
self.ann_info['num_joints'] = num_joints
|
||||
self.db = []
|
||||
|
||||
def __len__(self):
|
||||
"""Get dataset length."""
|
||||
return len(self.db)
|
||||
|
||||
def _get_db(self):
|
||||
"""Get a sample"""
|
||||
raise NotImplementedError
|
||||
|
||||
def __getitem__(self, idx):
|
||||
"""Prepare sample for training given the index."""
|
||||
records = copy.deepcopy(self.db[idx])
|
||||
records['image'] = cv2.imread(records['image_file'], cv2.IMREAD_COLOR |
|
||||
cv2.IMREAD_IGNORE_ORIENTATION)
|
||||
records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB)
|
||||
records['score'] = records['score'] if 'score' in records else 1
|
||||
records = self.transform(records)
|
||||
# print('records', records)
|
||||
return records
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class KeypointTopDownCocoDataset(KeypointTopDownBaseDataset):
|
||||
"""COCO dataset for top-down pose estimation.
|
||||
|
||||
The dataset loads raw features and apply specified transforms
|
||||
to return a dict containing the image tensors and other information.
|
||||
|
||||
COCO keypoint indexes:
|
||||
|
||||
0: 'nose',
|
||||
1: 'left_eye',
|
||||
2: 'right_eye',
|
||||
3: 'left_ear',
|
||||
4: 'right_ear',
|
||||
5: 'left_shoulder',
|
||||
6: 'right_shoulder',
|
||||
7: 'left_elbow',
|
||||
8: 'right_elbow',
|
||||
9: 'left_wrist',
|
||||
10: 'right_wrist',
|
||||
11: 'left_hip',
|
||||
12: 'right_hip',
|
||||
13: 'left_knee',
|
||||
14: 'right_knee',
|
||||
15: 'left_ankle',
|
||||
16: 'right_ankle'
|
||||
|
||||
Args:
|
||||
dataset_dir (str): Root path to the dataset.
|
||||
image_dir (str): Path to a directory where images are held.
|
||||
anno_path (str): Relative path to the annotation file.
|
||||
num_joints (int): Keypoint numbers
|
||||
trainsize (list):[w, h] Image target size
|
||||
transform (composed(operators)): A sequence of data transforms.
|
||||
bbox_file (str): Path to a detection bbox file
|
||||
Default: None.
|
||||
use_gt_bbox (bool): Whether to use ground truth bbox
|
||||
Default: True.
|
||||
pixel_std (int): The pixel std of the scale
|
||||
Default: 200.
|
||||
image_thre (float): The threshold to filter the detection box
|
||||
Default: 0.0.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir,
|
||||
image_dir,
|
||||
anno_path,
|
||||
num_joints,
|
||||
trainsize,
|
||||
transform=[],
|
||||
bbox_file=None,
|
||||
use_gt_bbox=True,
|
||||
pixel_std=200,
|
||||
image_thre=0.0,
|
||||
center_scale=None):
|
||||
super().__init__(dataset_dir, image_dir, anno_path, num_joints,
|
||||
transform)
|
||||
|
||||
self.bbox_file = bbox_file
|
||||
self.use_gt_bbox = use_gt_bbox
|
||||
self.trainsize = trainsize
|
||||
self.pixel_std = pixel_std
|
||||
self.image_thre = image_thre
|
||||
self.center_scale = center_scale
|
||||
self.dataset_name = 'coco'
|
||||
|
||||
def parse_dataset(self):
|
||||
if self.use_gt_bbox:
|
||||
self.db = self._load_coco_keypoint_annotations()
|
||||
else:
|
||||
self.db = self._load_coco_person_detection_results()
|
||||
|
||||
def _load_coco_keypoint_annotations(self):
|
||||
coco = COCO(self.get_anno())
|
||||
img_ids = coco.getImgIds()
|
||||
gt_db = []
|
||||
for index in img_ids:
|
||||
im_ann = coco.loadImgs(index)[0]
|
||||
width = im_ann['width']
|
||||
height = im_ann['height']
|
||||
file_name = im_ann['file_name']
|
||||
im_id = int(im_ann["id"])
|
||||
|
||||
annIds = coco.getAnnIds(imgIds=index, iscrowd=False)
|
||||
objs = coco.loadAnns(annIds)
|
||||
|
||||
valid_objs = []
|
||||
for obj in objs:
|
||||
x, y, w, h = obj['bbox']
|
||||
x1 = np.max((0, x))
|
||||
y1 = np.max((0, y))
|
||||
x2 = np.min((width - 1, x1 + np.max((0, w - 1))))
|
||||
y2 = np.min((height - 1, y1 + np.max((0, h - 1))))
|
||||
if obj['area'] > 0 and x2 >= x1 and y2 >= y1:
|
||||
obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
|
||||
valid_objs.append(obj)
|
||||
objs = valid_objs
|
||||
|
||||
rec = []
|
||||
for obj in objs:
|
||||
if max(obj['keypoints']) == 0:
|
||||
continue
|
||||
|
||||
joints = np.zeros(
|
||||
(self.ann_info['num_joints'], 3), dtype=np.float32)
|
||||
joints_vis = np.zeros(
|
||||
(self.ann_info['num_joints'], 3), dtype=np.float32)
|
||||
for ipt in range(self.ann_info['num_joints']):
|
||||
joints[ipt, 0] = obj['keypoints'][ipt * 3 + 0]
|
||||
joints[ipt, 1] = obj['keypoints'][ipt * 3 + 1]
|
||||
joints[ipt, 2] = 0
|
||||
t_vis = obj['keypoints'][ipt * 3 + 2]
|
||||
if t_vis > 1:
|
||||
t_vis = 1
|
||||
joints_vis[ipt, 0] = t_vis
|
||||
joints_vis[ipt, 1] = t_vis
|
||||
joints_vis[ipt, 2] = 0
|
||||
|
||||
center, scale = self._box2cs(obj['clean_bbox'][:4])
|
||||
rec.append({
|
||||
'image_file': os.path.join(self.img_prefix, file_name),
|
||||
'center': center,
|
||||
'scale': scale,
|
||||
'gt_joints': joints,
|
||||
'joints_vis': joints_vis,
|
||||
'im_id': im_id,
|
||||
})
|
||||
gt_db.extend(rec)
|
||||
|
||||
return gt_db
|
||||
|
||||
def _box2cs(self, box):
|
||||
x, y, w, h = box[:4]
|
||||
center = np.zeros((2), dtype=np.float32)
|
||||
center[0] = x + w * 0.5
|
||||
center[1] = y + h * 0.5
|
||||
aspect_ratio = self.trainsize[0] * 1.0 / self.trainsize[1]
|
||||
|
||||
if self.center_scale is not None and np.random.rand() < 0.3:
|
||||
center += self.center_scale * (np.random.rand(2) - 0.5) * [w, h]
|
||||
|
||||
if w > aspect_ratio * h:
|
||||
h = w * 1.0 / aspect_ratio
|
||||
elif w < aspect_ratio * h:
|
||||
w = h * aspect_ratio
|
||||
scale = np.array(
|
||||
[w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
|
||||
dtype=np.float32)
|
||||
if center[0] != -1:
|
||||
scale = scale * 1.25
|
||||
|
||||
return center, scale
|
||||
|
||||
def _load_coco_person_detection_results(self):
|
||||
all_boxes = None
|
||||
bbox_file_path = os.path.join(self.dataset_dir, self.bbox_file)
|
||||
with open(bbox_file_path, 'r') as f:
|
||||
all_boxes = json.load(f)
|
||||
|
||||
if not all_boxes:
|
||||
print('=> Load %s fail!' % bbox_file_path)
|
||||
return None
|
||||
|
||||
kpt_db = []
|
||||
for n_img in range(0, len(all_boxes)):
|
||||
det_res = all_boxes[n_img]
|
||||
if det_res['category_id'] != 1:
|
||||
continue
|
||||
file_name = det_res[
|
||||
'filename'] if 'filename' in det_res else '%012d.jpg' % det_res[
|
||||
'image_id']
|
||||
img_name = os.path.join(self.img_prefix, file_name)
|
||||
box = det_res['bbox']
|
||||
score = det_res['score']
|
||||
im_id = int(det_res['image_id'])
|
||||
|
||||
if score < self.image_thre:
|
||||
continue
|
||||
|
||||
center, scale = self._box2cs(box)
|
||||
joints = np.zeros(
|
||||
(self.ann_info['num_joints'], 3), dtype=np.float32)
|
||||
joints_vis = np.ones(
|
||||
(self.ann_info['num_joints'], 3), dtype=np.float32)
|
||||
kpt_db.append({
|
||||
'image_file': img_name,
|
||||
'im_id': im_id,
|
||||
'center': center,
|
||||
'scale': scale,
|
||||
'score': score,
|
||||
'gt_joints': joints,
|
||||
'joints_vis': joints_vis,
|
||||
})
|
||||
|
||||
return kpt_db
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class KeypointTopDownCocoWholeBodyHandDataset(KeypointTopDownBaseDataset):
|
||||
"""CocoWholeBody dataset for top-down hand pose estimation.
|
||||
|
||||
The dataset loads raw features and apply specified transforms
|
||||
to return a dict containing the image tensors and other information.
|
||||
|
||||
COCO-WholeBody Hand keypoint indexes:
|
||||
|
||||
0: 'wrist',
|
||||
1: 'thumb1',
|
||||
2: 'thumb2',
|
||||
3: 'thumb3',
|
||||
4: 'thumb4',
|
||||
5: 'forefinger1',
|
||||
6: 'forefinger2',
|
||||
7: 'forefinger3',
|
||||
8: 'forefinger4',
|
||||
9: 'middle_finger1',
|
||||
10: 'middle_finger2',
|
||||
11: 'middle_finger3',
|
||||
12: 'middle_finger4',
|
||||
13: 'ring_finger1',
|
||||
14: 'ring_finger2',
|
||||
15: 'ring_finger3',
|
||||
16: 'ring_finger4',
|
||||
17: 'pinky_finger1',
|
||||
18: 'pinky_finger2',
|
||||
19: 'pinky_finger3',
|
||||
20: 'pinky_finger4'
|
||||
|
||||
Args:
|
||||
dataset_dir (str): Root path to the dataset.
|
||||
image_dir (str): Path to a directory where images are held.
|
||||
anno_path (str): Relative path to the annotation file.
|
||||
num_joints (int): Keypoint numbers
|
||||
trainsize (list):[w, h] Image target size
|
||||
transform (composed(operators)): A sequence of data transforms.
|
||||
pixel_std (int): The pixel std of the scale
|
||||
Default: 200.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir,
|
||||
image_dir,
|
||||
anno_path,
|
||||
num_joints,
|
||||
trainsize,
|
||||
transform=[],
|
||||
pixel_std=200):
|
||||
super().__init__(dataset_dir, image_dir, anno_path, num_joints,
|
||||
transform)
|
||||
|
||||
self.trainsize = trainsize
|
||||
self.pixel_std = pixel_std
|
||||
self.dataset_name = 'coco_wholebady_hand'
|
||||
|
||||
def _box2cs(self, box):
|
||||
x, y, w, h = box[:4]
|
||||
center = np.zeros((2), dtype=np.float32)
|
||||
center[0] = x + w * 0.5
|
||||
center[1] = y + h * 0.5
|
||||
aspect_ratio = self.trainsize[0] * 1.0 / self.trainsize[1]
|
||||
|
||||
if w > aspect_ratio * h:
|
||||
h = w * 1.0 / aspect_ratio
|
||||
elif w < aspect_ratio * h:
|
||||
w = h * aspect_ratio
|
||||
scale = np.array(
|
||||
[w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
|
||||
dtype=np.float32)
|
||||
if center[0] != -1:
|
||||
scale = scale * 1.25
|
||||
|
||||
return center, scale
|
||||
|
||||
def parse_dataset(self):
|
||||
gt_db = []
|
||||
num_joints = self.ann_info['num_joints']
|
||||
coco = COCO(self.get_anno())
|
||||
img_ids = list(coco.imgs.keys())
|
||||
for img_id in img_ids:
|
||||
im_ann = coco.loadImgs(img_id)[0]
|
||||
image_file = os.path.join(self.img_prefix, im_ann['file_name'])
|
||||
im_id = int(im_ann["id"])
|
||||
|
||||
ann_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False)
|
||||
objs = coco.loadAnns(ann_ids)
|
||||
|
||||
for obj in objs:
|
||||
for type in ['left', 'right']:
|
||||
if (obj[f'{type}hand_valid'] and
|
||||
max(obj[f'{type}hand_kpts']) > 0):
|
||||
|
||||
joints = np.zeros((num_joints, 3), dtype=np.float32)
|
||||
joints_vis = np.zeros((num_joints, 3), dtype=np.float32)
|
||||
|
||||
keypoints = np.array(obj[f'{type}hand_kpts'])
|
||||
keypoints = keypoints.reshape(-1, 3)
|
||||
joints[:, :2] = keypoints[:, :2]
|
||||
joints_vis[:, :2] = np.minimum(1, keypoints[:, 2:3])
|
||||
|
||||
center, scale = self._box2cs(obj[f'{type}hand_box'][:4])
|
||||
gt_db.append({
|
||||
'image_file': image_file,
|
||||
'center': center,
|
||||
'scale': scale,
|
||||
'gt_joints': joints,
|
||||
'joints_vis': joints_vis,
|
||||
'im_id': im_id,
|
||||
})
|
||||
|
||||
self.db = gt_db
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class KeypointTopDownMPIIDataset(KeypointTopDownBaseDataset):
|
||||
"""MPII dataset for topdown pose estimation.
|
||||
|
||||
The dataset loads raw features and apply specified transforms
|
||||
to return a dict containing the image tensors and other information.
|
||||
|
||||
MPII keypoint indexes::
|
||||
|
||||
0: 'right_ankle',
|
||||
1: 'right_knee',
|
||||
2: 'right_hip',
|
||||
3: 'left_hip',
|
||||
4: 'left_knee',
|
||||
5: 'left_ankle',
|
||||
6: 'pelvis',
|
||||
7: 'thorax',
|
||||
8: 'upper_neck',
|
||||
9: 'head_top',
|
||||
10: 'right_wrist',
|
||||
11: 'right_elbow',
|
||||
12: 'right_shoulder',
|
||||
13: 'left_shoulder',
|
||||
14: 'left_elbow',
|
||||
15: 'left_wrist',
|
||||
|
||||
Args:
|
||||
dataset_dir (str): Root path to the dataset.
|
||||
image_dir (str): Path to a directory where images are held.
|
||||
anno_path (str): Relative path to the annotation file.
|
||||
num_joints (int): Keypoint numbers
|
||||
trainsize (list):[w, h] Image target size
|
||||
transform (composed(operators)): A sequence of data transforms.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir,
|
||||
image_dir,
|
||||
anno_path,
|
||||
num_joints,
|
||||
transform=[]):
|
||||
super().__init__(dataset_dir, image_dir, anno_path, num_joints,
|
||||
transform)
|
||||
|
||||
self.dataset_name = 'mpii'
|
||||
|
||||
def parse_dataset(self):
|
||||
with open(self.get_anno()) as anno_file:
|
||||
anno = json.load(anno_file)
|
||||
|
||||
gt_db = []
|
||||
for a in anno:
|
||||
image_name = a['image']
|
||||
im_id = a['image_id'] if 'image_id' in a else int(
|
||||
os.path.splitext(image_name)[0])
|
||||
|
||||
c = np.array(a['center'], dtype=np.float32)
|
||||
s = np.array([a['scale'], a['scale']], dtype=np.float32)
|
||||
|
||||
# Adjust center/scale slightly to avoid cropping limbs
|
||||
if c[0] != -1:
|
||||
c[1] = c[1] + 15 * s[1]
|
||||
s = s * 1.25
|
||||
c = c - 1
|
||||
|
||||
joints = np.zeros(
|
||||
(self.ann_info['num_joints'], 3), dtype=np.float32)
|
||||
joints_vis = np.zeros(
|
||||
(self.ann_info['num_joints'], 3), dtype=np.float32)
|
||||
if 'gt_joints' in a:
|
||||
joints_ = np.array(a['gt_joints'])
|
||||
joints_[:, 0:2] = joints_[:, 0:2] - 1
|
||||
joints_vis_ = np.array(a['joints_vis'])
|
||||
assert len(joints_) == self.ann_info[
|
||||
'num_joints'], 'joint num diff: {} vs {}'.format(
|
||||
len(joints_), self.ann_info['num_joints'])
|
||||
|
||||
joints[:, 0:2] = joints_[:, 0:2]
|
||||
joints_vis[:, 0] = joints_vis_[:]
|
||||
joints_vis[:, 1] = joints_vis_[:]
|
||||
|
||||
gt_db.append({
|
||||
'image_file': os.path.join(self.img_prefix, image_name),
|
||||
'im_id': im_id,
|
||||
'center': c,
|
||||
'scale': s,
|
||||
'gt_joints': joints,
|
||||
'joints_vis': joints_vis
|
||||
})
|
||||
print("number length: {}".format(len(gt_db)))
|
||||
self.db = gt_db
|
||||
638
paddle_detection/ppdet/data/source/mot.py
Normal file
638
paddle_detection/ppdet/data/source/mot.py
Normal file
@@ -0,0 +1,638 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import cv2
|
||||
import glob
|
||||
import numpy as np
|
||||
from collections import OrderedDict, defaultdict
|
||||
try:
|
||||
from collections.abc import Sequence
|
||||
except Exception:
|
||||
from collections import Sequence
|
||||
from .dataset import DetDataset, _make_dataset, _is_valid_file
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from ppdet.utils.logger import setup_logger
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class MOTDataSet(DetDataset):
|
||||
"""
|
||||
Load dataset with MOT format, only support single class MOT.
|
||||
|
||||
Args:
|
||||
dataset_dir (str): root directory for dataset.
|
||||
image_lists (str|list): mot data image lists, muiti-source mot dataset.
|
||||
data_fields (list): key name of data dictionary, at least have 'image'.
|
||||
sample_num (int): number of samples to load, -1 means all.
|
||||
repeat (int): repeat times for dataset, use in benchmark.
|
||||
|
||||
Notes:
|
||||
MOT datasets root directory following this:
|
||||
dataset/mot
|
||||
|——————image_lists
|
||||
| |——————caltech.train
|
||||
| |——————caltech.val
|
||||
| |——————mot16.train
|
||||
| |——————mot17.train
|
||||
| ......
|
||||
|——————Caltech
|
||||
|——————MOT17
|
||||
|——————......
|
||||
|
||||
All the MOT datasets have the following structure:
|
||||
Caltech
|
||||
|——————images
|
||||
| └——————00001.jpg
|
||||
| |—————— ...
|
||||
| └——————0000N.jpg
|
||||
└——————labels_with_ids
|
||||
└——————00001.txt
|
||||
|—————— ...
|
||||
└——————0000N.txt
|
||||
or
|
||||
|
||||
MOT17
|
||||
|——————images
|
||||
| └——————train
|
||||
| └——————test
|
||||
└——————labels_with_ids
|
||||
└——————train
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir=None,
|
||||
image_lists=[],
|
||||
data_fields=['image'],
|
||||
sample_num=-1,
|
||||
repeat=1):
|
||||
super(MOTDataSet, self).__init__(
|
||||
dataset_dir=dataset_dir,
|
||||
data_fields=data_fields,
|
||||
sample_num=sample_num,
|
||||
repeat=repeat)
|
||||
self.dataset_dir = dataset_dir
|
||||
self.image_lists = image_lists
|
||||
if isinstance(self.image_lists, str):
|
||||
self.image_lists = [self.image_lists]
|
||||
self.roidbs = None
|
||||
self.cname2cid = None
|
||||
|
||||
def get_anno(self):
|
||||
if self.image_lists == []:
|
||||
return
|
||||
# only used to get categories and metric
|
||||
# only check first data, but the label_list of all data should be same.
|
||||
first_mot_data = self.image_lists[0].split('.')[0]
|
||||
anno_file = os.path.join(self.dataset_dir, first_mot_data,
|
||||
'label_list.txt')
|
||||
return anno_file
|
||||
|
||||
def parse_dataset(self):
|
||||
self.img_files = OrderedDict()
|
||||
self.img_start_index = OrderedDict()
|
||||
self.label_files = OrderedDict()
|
||||
self.tid_num = OrderedDict()
|
||||
self.tid_start_index = OrderedDict()
|
||||
|
||||
img_index = 0
|
||||
for data_name in self.image_lists:
|
||||
# check every data image list
|
||||
image_lists_dir = os.path.join(self.dataset_dir, 'image_lists')
|
||||
assert os.path.isdir(image_lists_dir), \
|
||||
"The {} is not a directory.".format(image_lists_dir)
|
||||
|
||||
list_path = os.path.join(image_lists_dir, data_name)
|
||||
assert os.path.exists(list_path), \
|
||||
"The list path {} does not exist.".format(list_path)
|
||||
|
||||
# record img_files, filter out empty ones
|
||||
with open(list_path, 'r') as file:
|
||||
self.img_files[data_name] = file.readlines()
|
||||
self.img_files[data_name] = [
|
||||
os.path.join(self.dataset_dir, x.strip())
|
||||
for x in self.img_files[data_name]
|
||||
]
|
||||
self.img_files[data_name] = list(
|
||||
filter(lambda x: len(x) > 0, self.img_files[data_name]))
|
||||
|
||||
self.img_start_index[data_name] = img_index
|
||||
img_index += len(self.img_files[data_name])
|
||||
|
||||
# record label_files
|
||||
self.label_files[data_name] = [
|
||||
x.replace('images', 'labels_with_ids').replace(
|
||||
'.png', '.txt').replace('.jpg', '.txt')
|
||||
for x in self.img_files[data_name]
|
||||
]
|
||||
|
||||
for data_name, label_paths in self.label_files.items():
|
||||
max_index = -1
|
||||
for lp in label_paths:
|
||||
lb = np.loadtxt(lp)
|
||||
if len(lb) < 1:
|
||||
continue
|
||||
if len(lb.shape) < 2:
|
||||
img_max = lb[1]
|
||||
else:
|
||||
img_max = np.max(lb[:, 1])
|
||||
if img_max > max_index:
|
||||
max_index = img_max
|
||||
self.tid_num[data_name] = int(max_index + 1)
|
||||
|
||||
last_index = 0
|
||||
for i, (k, v) in enumerate(self.tid_num.items()):
|
||||
self.tid_start_index[k] = last_index
|
||||
last_index += v
|
||||
|
||||
self.num_identities_dict = defaultdict(int)
|
||||
self.num_identities_dict[0] = int(last_index + 1) # single class
|
||||
self.num_imgs_each_data = [len(x) for x in self.img_files.values()]
|
||||
self.total_imgs = sum(self.num_imgs_each_data)
|
||||
|
||||
logger.info('MOT dataset summary: ')
|
||||
logger.info(self.tid_num)
|
||||
logger.info('Total images: {}'.format(self.total_imgs))
|
||||
logger.info('Image start index: {}'.format(self.img_start_index))
|
||||
logger.info('Total identities: {}'.format(self.num_identities_dict[0]))
|
||||
logger.info('Identity start index: {}'.format(self.tid_start_index))
|
||||
|
||||
records = []
|
||||
cname2cid = mot_label()
|
||||
|
||||
for img_index in range(self.total_imgs):
|
||||
for i, (k, v) in enumerate(self.img_start_index.items()):
|
||||
if img_index >= v:
|
||||
data_name = list(self.label_files.keys())[i]
|
||||
start_index = v
|
||||
img_file = self.img_files[data_name][img_index - start_index]
|
||||
lbl_file = self.label_files[data_name][img_index - start_index]
|
||||
|
||||
if not os.path.exists(img_file):
|
||||
logger.warning('Illegal image file: {}, and it will be ignored'.
|
||||
format(img_file))
|
||||
continue
|
||||
if not os.path.isfile(lbl_file):
|
||||
logger.warning('Illegal label file: {}, and it will be ignored'.
|
||||
format(lbl_file))
|
||||
continue
|
||||
|
||||
labels = np.loadtxt(lbl_file, dtype=np.float32).reshape(-1, 6)
|
||||
# each row in labels (N, 6) is [gt_class, gt_identity, cx, cy, w, h]
|
||||
|
||||
cx, cy = labels[:, 2], labels[:, 3]
|
||||
w, h = labels[:, 4], labels[:, 5]
|
||||
gt_bbox = np.stack((cx, cy, w, h)).T.astype('float32')
|
||||
gt_class = labels[:, 0:1].astype('int32')
|
||||
gt_score = np.ones((len(labels), 1)).astype('float32')
|
||||
gt_ide = labels[:, 1:2].astype('int32')
|
||||
for i, _ in enumerate(gt_ide):
|
||||
if gt_ide[i] > -1:
|
||||
gt_ide[i] += self.tid_start_index[data_name]
|
||||
|
||||
mot_rec = {
|
||||
'im_file': img_file,
|
||||
'im_id': img_index,
|
||||
} if 'image' in self.data_fields else {}
|
||||
|
||||
gt_rec = {
|
||||
'gt_class': gt_class,
|
||||
'gt_score': gt_score,
|
||||
'gt_bbox': gt_bbox,
|
||||
'gt_ide': gt_ide,
|
||||
}
|
||||
|
||||
for k, v in gt_rec.items():
|
||||
if k in self.data_fields:
|
||||
mot_rec[k] = v
|
||||
|
||||
records.append(mot_rec)
|
||||
if self.sample_num > 0 and img_index >= self.sample_num:
|
||||
break
|
||||
assert len(records) > 0, 'not found any mot record in %s' % (
|
||||
self.image_lists)
|
||||
self.roidbs, self.cname2cid = records, cname2cid
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class MCMOTDataSet(DetDataset):
|
||||
"""
|
||||
Load dataset with MOT format, support multi-class MOT.
|
||||
|
||||
Args:
|
||||
dataset_dir (str): root directory for dataset.
|
||||
image_lists (list(str)): mcmot data image lists, muiti-source mcmot dataset.
|
||||
data_fields (list): key name of data dictionary, at least have 'image'.
|
||||
label_list (str): if use_default_label is False, will load
|
||||
mapping between category and class index.
|
||||
sample_num (int): number of samples to load, -1 means all.
|
||||
|
||||
Notes:
|
||||
MCMOT datasets root directory following this:
|
||||
dataset/mot
|
||||
|——————image_lists
|
||||
| |——————visdrone_mcmot.train
|
||||
| |——————visdrone_mcmot.val
|
||||
visdrone_mcmot
|
||||
|——————images
|
||||
| └——————train
|
||||
| └——————val
|
||||
└——————labels_with_ids
|
||||
└——————train
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir=None,
|
||||
image_lists=[],
|
||||
data_fields=['image'],
|
||||
label_list=None,
|
||||
sample_num=-1):
|
||||
super(MCMOTDataSet, self).__init__(
|
||||
dataset_dir=dataset_dir,
|
||||
data_fields=data_fields,
|
||||
sample_num=sample_num)
|
||||
self.dataset_dir = dataset_dir
|
||||
self.image_lists = image_lists
|
||||
if isinstance(self.image_lists, str):
|
||||
self.image_lists = [self.image_lists]
|
||||
self.label_list = label_list
|
||||
self.roidbs = None
|
||||
self.cname2cid = None
|
||||
|
||||
def get_anno(self):
|
||||
if self.image_lists == []:
|
||||
return
|
||||
# only used to get categories and metric
|
||||
# only check first data, but the label_list of all data should be same.
|
||||
first_mot_data = self.image_lists[0].split('.')[0]
|
||||
anno_file = os.path.join(self.dataset_dir, first_mot_data,
|
||||
'label_list.txt')
|
||||
return anno_file
|
||||
|
||||
def parse_dataset(self):
|
||||
self.img_files = OrderedDict()
|
||||
self.img_start_index = OrderedDict()
|
||||
self.label_files = OrderedDict()
|
||||
self.tid_num = OrderedDict()
|
||||
self.tid_start_idx_of_cls_ids = defaultdict(dict) # for MCMOT
|
||||
|
||||
img_index = 0
|
||||
for data_name in self.image_lists:
|
||||
# check every data image list
|
||||
image_lists_dir = os.path.join(self.dataset_dir, 'image_lists')
|
||||
assert os.path.isdir(image_lists_dir), \
|
||||
"The {} is not a directory.".format(image_lists_dir)
|
||||
|
||||
list_path = os.path.join(image_lists_dir, data_name)
|
||||
assert os.path.exists(list_path), \
|
||||
"The list path {} does not exist.".format(list_path)
|
||||
|
||||
# record img_files, filter out empty ones
|
||||
with open(list_path, 'r') as file:
|
||||
self.img_files[data_name] = file.readlines()
|
||||
self.img_files[data_name] = [
|
||||
os.path.join(self.dataset_dir, x.strip())
|
||||
for x in self.img_files[data_name]
|
||||
]
|
||||
self.img_files[data_name] = list(
|
||||
filter(lambda x: len(x) > 0, self.img_files[data_name]))
|
||||
|
||||
self.img_start_index[data_name] = img_index
|
||||
img_index += len(self.img_files[data_name])
|
||||
|
||||
# record label_files
|
||||
self.label_files[data_name] = [
|
||||
x.replace('images', 'labels_with_ids').replace(
|
||||
'.png', '.txt').replace('.jpg', '.txt')
|
||||
for x in self.img_files[data_name]
|
||||
]
|
||||
|
||||
for data_name, label_paths in self.label_files.items():
|
||||
# using max_ids_dict rather than max_index
|
||||
max_ids_dict = defaultdict(int)
|
||||
for lp in label_paths:
|
||||
lb = np.loadtxt(lp)
|
||||
if len(lb) < 1:
|
||||
continue
|
||||
lb = lb.reshape(-1, 6)
|
||||
for item in lb:
|
||||
if item[1] > max_ids_dict[int(item[0])]:
|
||||
# item[0]: cls_id
|
||||
# item[1]: track id
|
||||
max_ids_dict[int(item[0])] = int(item[1])
|
||||
# track id number
|
||||
self.tid_num[data_name] = max_ids_dict
|
||||
|
||||
last_idx_dict = defaultdict(int)
|
||||
for i, (k, v) in enumerate(self.tid_num.items()): # each sub dataset
|
||||
for cls_id, id_num in v.items(): # v is a max_ids_dict
|
||||
self.tid_start_idx_of_cls_ids[k][cls_id] = last_idx_dict[cls_id]
|
||||
last_idx_dict[cls_id] += id_num
|
||||
|
||||
self.num_identities_dict = defaultdict(int)
|
||||
for k, v in last_idx_dict.items():
|
||||
self.num_identities_dict[k] = int(v) # total ids of each category
|
||||
|
||||
self.num_imgs_each_data = [len(x) for x in self.img_files.values()]
|
||||
self.total_imgs = sum(self.num_imgs_each_data)
|
||||
|
||||
# cname2cid and cid2cname
|
||||
cname2cid = {}
|
||||
if self.label_list is not None:
|
||||
# if use label_list for multi source mix dataset,
|
||||
# please make sure label_list in the first sub_dataset at least.
|
||||
sub_dataset = self.image_lists[0].split('.')[0]
|
||||
label_path = os.path.join(self.dataset_dir, sub_dataset,
|
||||
self.label_list)
|
||||
if not os.path.exists(label_path):
|
||||
logger.info(
|
||||
"Note: label_list {} does not exists, use VisDrone 10 classes labels as default.".
|
||||
format(label_path))
|
||||
cname2cid = visdrone_mcmot_label()
|
||||
else:
|
||||
with open(label_path, 'r') as fr:
|
||||
label_id = 0
|
||||
for line in fr.readlines():
|
||||
cname2cid[line.strip()] = label_id
|
||||
label_id += 1
|
||||
else:
|
||||
cname2cid = visdrone_mcmot_label()
|
||||
|
||||
cid2cname = dict([(v, k) for (k, v) in cname2cid.items()])
|
||||
|
||||
logger.info('MCMOT dataset summary: ')
|
||||
logger.info(self.tid_num)
|
||||
logger.info('Total images: {}'.format(self.total_imgs))
|
||||
logger.info('Image start index: {}'.format(self.img_start_index))
|
||||
|
||||
logger.info('Total identities of each category: ')
|
||||
num_identities_dict = sorted(
|
||||
self.num_identities_dict.items(), key=lambda x: x[0])
|
||||
total_IDs_all_cats = 0
|
||||
for (k, v) in num_identities_dict:
|
||||
logger.info('Category {} [{}] has {} IDs.'.format(k, cid2cname[k],
|
||||
v))
|
||||
total_IDs_all_cats += v
|
||||
logger.info('Total identities of all categories: {}'.format(
|
||||
total_IDs_all_cats))
|
||||
|
||||
logger.info('Identity start index of each category: ')
|
||||
for k, v in self.tid_start_idx_of_cls_ids.items():
|
||||
sorted_v = sorted(v.items(), key=lambda x: x[0])
|
||||
for (cls_id, start_idx) in sorted_v:
|
||||
logger.info('Start index of dataset {} category {:d} is {:d}'
|
||||
.format(k, cls_id, start_idx))
|
||||
|
||||
records = []
|
||||
for img_index in range(self.total_imgs):
|
||||
for i, (k, v) in enumerate(self.img_start_index.items()):
|
||||
if img_index >= v:
|
||||
data_name = list(self.label_files.keys())[i]
|
||||
start_index = v
|
||||
img_file = self.img_files[data_name][img_index - start_index]
|
||||
lbl_file = self.label_files[data_name][img_index - start_index]
|
||||
|
||||
if not os.path.exists(img_file):
|
||||
logger.warning('Illegal image file: {}, and it will be ignored'.
|
||||
format(img_file))
|
||||
continue
|
||||
if not os.path.isfile(lbl_file):
|
||||
logger.warning('Illegal label file: {}, and it will be ignored'.
|
||||
format(lbl_file))
|
||||
continue
|
||||
|
||||
labels = np.loadtxt(lbl_file, dtype=np.float32).reshape(-1, 6)
|
||||
# each row in labels (N, 6) is [gt_class, gt_identity, cx, cy, w, h]
|
||||
|
||||
cx, cy = labels[:, 2], labels[:, 3]
|
||||
w, h = labels[:, 4], labels[:, 5]
|
||||
gt_bbox = np.stack((cx, cy, w, h)).T.astype('float32')
|
||||
gt_class = labels[:, 0:1].astype('int32')
|
||||
gt_score = np.ones((len(labels), 1)).astype('float32')
|
||||
gt_ide = labels[:, 1:2].astype('int32')
|
||||
for i, _ in enumerate(gt_ide):
|
||||
if gt_ide[i] > -1:
|
||||
cls_id = int(gt_class[i])
|
||||
start_idx = self.tid_start_idx_of_cls_ids[data_name][cls_id]
|
||||
gt_ide[i] += start_idx
|
||||
|
||||
mot_rec = {
|
||||
'im_file': img_file,
|
||||
'im_id': img_index,
|
||||
} if 'image' in self.data_fields else {}
|
||||
|
||||
gt_rec = {
|
||||
'gt_class': gt_class,
|
||||
'gt_score': gt_score,
|
||||
'gt_bbox': gt_bbox,
|
||||
'gt_ide': gt_ide,
|
||||
}
|
||||
|
||||
for k, v in gt_rec.items():
|
||||
if k in self.data_fields:
|
||||
mot_rec[k] = v
|
||||
|
||||
records.append(mot_rec)
|
||||
if self.sample_num > 0 and img_index >= self.sample_num:
|
||||
break
|
||||
assert len(records) > 0, 'not found any mot record in %s' % (
|
||||
self.image_lists)
|
||||
self.roidbs, self.cname2cid = records, cname2cid
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class MOTImageFolder(DetDataset):
|
||||
"""
|
||||
Load MOT dataset with MOT format from image folder or video .
|
||||
Args:
|
||||
video_file (str): path of the video file, default ''.
|
||||
frame_rate (int): frame rate of the video, use cv2 VideoCapture if not set.
|
||||
dataset_dir (str): root directory for dataset.
|
||||
keep_ori_im (bool): whether to keep original image, default False.
|
||||
Set True when used during MOT model inference while saving
|
||||
images or video, or used in DeepSORT.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
video_file=None,
|
||||
frame_rate=-1,
|
||||
dataset_dir=None,
|
||||
data_root=None,
|
||||
image_dir=None,
|
||||
sample_num=-1,
|
||||
keep_ori_im=False,
|
||||
anno_path=None,
|
||||
**kwargs):
|
||||
super(MOTImageFolder, self).__init__(
|
||||
dataset_dir, image_dir, sample_num=sample_num)
|
||||
self.video_file = video_file
|
||||
self.data_root = data_root
|
||||
self.keep_ori_im = keep_ori_im
|
||||
self._imid2path = {}
|
||||
self.roidbs = None
|
||||
self.frame_rate = frame_rate
|
||||
self.anno_path = anno_path
|
||||
|
||||
def check_or_download_dataset(self):
|
||||
return
|
||||
|
||||
def parse_dataset(self, ):
|
||||
if not self.roidbs:
|
||||
if self.video_file is None:
|
||||
self.frame_rate = 30 # set as default if infer image folder
|
||||
self.roidbs = self._load_images()
|
||||
else:
|
||||
self.roidbs = self._load_video_images()
|
||||
|
||||
def _load_video_images(self):
|
||||
if self.frame_rate == -1:
|
||||
# if frame_rate is not set for video, use cv2.VideoCapture
|
||||
cap = cv2.VideoCapture(self.video_file)
|
||||
self.frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
|
||||
|
||||
extension = self.video_file.split('.')[-1]
|
||||
output_path = self.video_file.replace('.{}'.format(extension), '')
|
||||
frames_path = video2frames(self.video_file, output_path,
|
||||
self.frame_rate)
|
||||
self.video_frames = sorted(
|
||||
glob.glob(os.path.join(frames_path, '*.png')))
|
||||
|
||||
self.video_length = len(self.video_frames)
|
||||
logger.info('Length of the video: {:d} frames.'.format(
|
||||
self.video_length))
|
||||
ct = 0
|
||||
records = []
|
||||
for image in self.video_frames:
|
||||
assert image != '' and os.path.isfile(image), \
|
||||
"Image {} not found".format(image)
|
||||
if self.sample_num > 0 and ct >= self.sample_num:
|
||||
break
|
||||
rec = {'im_id': np.array([ct]), 'im_file': image}
|
||||
if self.keep_ori_im:
|
||||
rec.update({'keep_ori_im': 1})
|
||||
self._imid2path[ct] = image
|
||||
ct += 1
|
||||
records.append(rec)
|
||||
assert len(records) > 0, "No image file found"
|
||||
return records
|
||||
|
||||
def _find_images(self):
|
||||
image_dir = self.image_dir
|
||||
if not isinstance(image_dir, Sequence):
|
||||
image_dir = [image_dir]
|
||||
images = []
|
||||
for im_dir in image_dir:
|
||||
if os.path.isdir(im_dir):
|
||||
im_dir = os.path.join(self.dataset_dir, im_dir)
|
||||
images.extend(_make_dataset(im_dir))
|
||||
elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
|
||||
images.append(im_dir)
|
||||
return images
|
||||
|
||||
def _load_images(self):
|
||||
images = self._find_images()
|
||||
ct = 0
|
||||
records = []
|
||||
for image in images:
|
||||
assert image != '' and os.path.isfile(image), \
|
||||
"Image {} not found".format(image)
|
||||
if self.sample_num > 0 and ct >= self.sample_num:
|
||||
break
|
||||
rec = {'im_id': np.array([ct]), 'im_file': image}
|
||||
if self.keep_ori_im:
|
||||
rec.update({'keep_ori_im': 1})
|
||||
self._imid2path[ct] = image
|
||||
ct += 1
|
||||
records.append(rec)
|
||||
assert len(records) > 0, "No image file found"
|
||||
return records
|
||||
|
||||
def get_imid2path(self):
|
||||
return self._imid2path
|
||||
|
||||
def set_images(self, images):
|
||||
self.image_dir = images
|
||||
self.roidbs = self._load_images()
|
||||
|
||||
def set_video(self, video_file, frame_rate):
|
||||
# update video_file and frame_rate by command line of tools/infer_mot.py
|
||||
self.video_file = video_file
|
||||
self.frame_rate = frame_rate
|
||||
assert os.path.isfile(self.video_file) and _is_valid_video(self.video_file), \
|
||||
"wrong or unsupported file format: {}".format(self.video_file)
|
||||
self.roidbs = self._load_video_images()
|
||||
|
||||
def get_anno(self):
|
||||
return self.anno_path
|
||||
|
||||
|
||||
def _is_valid_video(f, extensions=('.mp4', '.avi', '.mov', '.rmvb', 'flv')):
|
||||
return f.lower().endswith(extensions)
|
||||
|
||||
|
||||
def video2frames(video_path, outpath, frame_rate, **kargs):
|
||||
def _dict2str(kargs):
|
||||
cmd_str = ''
|
||||
for k, v in kargs.items():
|
||||
cmd_str += (' ' + str(k) + ' ' + str(v))
|
||||
return cmd_str
|
||||
|
||||
ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error ']
|
||||
vid_name = os.path.basename(video_path).split('.')[0]
|
||||
out_full_path = os.path.join(outpath, vid_name)
|
||||
|
||||
if not os.path.exists(out_full_path):
|
||||
os.makedirs(out_full_path)
|
||||
|
||||
# video file name
|
||||
outformat = os.path.join(out_full_path, '%08d.png')
|
||||
|
||||
cmd = ffmpeg
|
||||
cmd = ffmpeg + [
|
||||
' -i ', video_path, ' -r ', str(frame_rate), ' -f image2 ', outformat
|
||||
]
|
||||
cmd = ''.join(cmd) + _dict2str(kargs)
|
||||
|
||||
if os.system(cmd) != 0:
|
||||
raise RuntimeError('ffmpeg process video: {} error'.format(video_path))
|
||||
sys.exit(-1)
|
||||
|
||||
sys.stdout.flush()
|
||||
return out_full_path
|
||||
|
||||
|
||||
def mot_label():
|
||||
labels_map = {'person': 0}
|
||||
return labels_map
|
||||
|
||||
|
||||
def visdrone_mcmot_label():
|
||||
labels_map = {
|
||||
'pedestrian': 0,
|
||||
'people': 1,
|
||||
'bicycle': 2,
|
||||
'car': 3,
|
||||
'van': 4,
|
||||
'truck': 5,
|
||||
'tricycle': 6,
|
||||
'awning-tricycle': 7,
|
||||
'bus': 8,
|
||||
'motor': 9,
|
||||
}
|
||||
return labels_map
|
||||
380
paddle_detection/ppdet/data/source/pose3d_cmb.py
Normal file
380
paddle_detection/ppdet/data/source/pose3d_cmb.py
Normal file
@@ -0,0 +1,380 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import numpy as np
|
||||
import json
|
||||
import copy
|
||||
import pycocotools
|
||||
from pycocotools.coco import COCO
|
||||
from .dataset import DetDataset
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from paddle.io import Dataset
|
||||
|
||||
|
||||
@serializable
|
||||
class Pose3DDataset(DetDataset):
|
||||
"""Pose3D Dataset class.
|
||||
|
||||
Args:
|
||||
dataset_dir (str): Root path to the dataset.
|
||||
anno_list (list of str): each of the element is a relative path to the annotation file.
|
||||
image_dirs (list of str): each of path is a relative path where images are held.
|
||||
transform (composed(operators)): A sequence of data transforms.
|
||||
test_mode (bool): Store True when building test or
|
||||
validation dataset. Default: False.
|
||||
24 joints order:
|
||||
0-2: 'R_Ankle', 'R_Knee', 'R_Hip',
|
||||
3-5:'L_Hip', 'L_Knee', 'L_Ankle',
|
||||
6-8:'R_Wrist', 'R_Elbow', 'R_Shoulder',
|
||||
9-11:'L_Shoulder','L_Elbow','L_Wrist',
|
||||
12-14:'Neck','Top_of_Head','Pelvis',
|
||||
15-18:'Thorax','Spine','Jaw','Head',
|
||||
19-23:'Nose','L_Eye','R_Eye','L_Ear','R_Ear'
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir,
|
||||
image_dirs,
|
||||
anno_list,
|
||||
transform=[],
|
||||
num_joints=24,
|
||||
test_mode=False):
|
||||
super().__init__(dataset_dir, image_dirs, anno_list)
|
||||
self.image_info = {}
|
||||
self.ann_info = {}
|
||||
self.num_joints = num_joints
|
||||
|
||||
self.transform = transform
|
||||
self.test_mode = test_mode
|
||||
|
||||
self.img_ids = []
|
||||
self.dataset_dir = dataset_dir
|
||||
self.image_dirs = image_dirs
|
||||
self.anno_list = anno_list
|
||||
|
||||
def get_mask(self, mvm_percent=0.3):
|
||||
num_joints = self.num_joints
|
||||
mjm_mask = np.ones((num_joints, 1)).astype(np.float32)
|
||||
if self.test_mode == False:
|
||||
pb = np.random.random_sample()
|
||||
masked_num = int(
|
||||
pb * mvm_percent *
|
||||
num_joints) # at most x% of the joints could be masked
|
||||
indices = np.random.choice(
|
||||
np.arange(num_joints), replace=False, size=masked_num)
|
||||
mjm_mask[indices, :] = 0.0
|
||||
# return mjm_mask
|
||||
|
||||
num_joints = 10
|
||||
mvm_mask = np.ones((num_joints, 1)).astype(np.float)
|
||||
if self.test_mode == False:
|
||||
num_vertices = num_joints
|
||||
pb = np.random.random_sample()
|
||||
masked_num = int(
|
||||
pb * mvm_percent *
|
||||
num_vertices) # at most x% of the vertices could be masked
|
||||
indices = np.random.choice(
|
||||
np.arange(num_vertices), replace=False, size=masked_num)
|
||||
mvm_mask[indices, :] = 0.0
|
||||
|
||||
mjm_mask = np.concatenate([mjm_mask, mvm_mask], axis=0)
|
||||
return mjm_mask
|
||||
|
||||
def filterjoints(self, x):
|
||||
if self.num_joints == 24:
|
||||
return x
|
||||
elif self.num_joints == 14:
|
||||
return x[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 18], :]
|
||||
elif self.num_joints == 17:
|
||||
return x[
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 18, 19], :]
|
||||
else:
|
||||
raise ValueError(
|
||||
"unsupported joint numbers, only [24 or 17 or 14] is supported!")
|
||||
|
||||
def parse_dataset(self):
|
||||
print("Loading annotations..., please wait")
|
||||
self.annos = []
|
||||
im_id = 0
|
||||
self.human36m_num = 0
|
||||
for idx, annof in enumerate(self.anno_list):
|
||||
img_prefix = os.path.join(self.dataset_dir, self.image_dirs[idx])
|
||||
dataf = os.path.join(self.dataset_dir, annof)
|
||||
with open(dataf, 'r') as rf:
|
||||
anno_data = json.load(rf)
|
||||
annos = anno_data['data']
|
||||
new_annos = []
|
||||
print("{} has annos numbers: {}".format(dataf, len(annos)))
|
||||
for anno in annos:
|
||||
new_anno = {}
|
||||
new_anno['im_id'] = im_id
|
||||
im_id += 1
|
||||
imagename = anno['imageName']
|
||||
if imagename.startswith("COCO_train2014_"):
|
||||
imagename = imagename[len("COCO_train2014_"):]
|
||||
elif imagename.startswith("COCO_val2014_"):
|
||||
imagename = imagename[len("COCO_val2014_"):]
|
||||
imagename = os.path.join(img_prefix, imagename)
|
||||
if not os.path.exists(imagename):
|
||||
if "train2017" in imagename:
|
||||
imagename = imagename.replace("train2017",
|
||||
"val2017")
|
||||
if not os.path.exists(imagename):
|
||||
print("cannot find imagepath:{}".format(
|
||||
imagename))
|
||||
continue
|
||||
else:
|
||||
print("cannot find imagepath:{}".format(imagename))
|
||||
continue
|
||||
new_anno['imageName'] = imagename
|
||||
if 'human3.6m' in imagename:
|
||||
self.human36m_num += 1
|
||||
new_anno['bbox_center'] = anno['bbox_center']
|
||||
new_anno['bbox_scale'] = anno['bbox_scale']
|
||||
new_anno['joints_2d'] = np.array(anno[
|
||||
'gt_keypoint_2d']).astype(np.float32)
|
||||
if new_anno['joints_2d'].shape[0] == 49:
|
||||
#if the joints_2d is in SPIN format(which generated by eft), choose the last 24 public joints
|
||||
#for detail please refer: https://github.com/nkolot/SPIN/blob/master/constants.py
|
||||
new_anno['joints_2d'] = new_anno['joints_2d'][25:]
|
||||
new_anno['joints_3d'] = np.array(anno[
|
||||
'pose3d'])[:, :3].astype(np.float32)
|
||||
new_anno['mjm_mask'] = self.get_mask()
|
||||
if not 'has_3d_joints' in anno:
|
||||
new_anno['has_3d_joints'] = int(1)
|
||||
new_anno['has_2d_joints'] = int(1)
|
||||
else:
|
||||
new_anno['has_3d_joints'] = int(anno['has_3d_joints'])
|
||||
new_anno['has_2d_joints'] = int(anno['has_2d_joints'])
|
||||
new_anno['joints_2d'] = self.filterjoints(new_anno[
|
||||
'joints_2d'])
|
||||
self.annos.append(new_anno)
|
||||
del annos
|
||||
|
||||
def get_temp_num(self):
|
||||
"""get temporal data number, like human3.6m"""
|
||||
return self.human36m_num
|
||||
|
||||
def __len__(self):
|
||||
"""Get dataset length."""
|
||||
return len(self.annos)
|
||||
|
||||
def _get_imganno(self, idx):
|
||||
"""Get anno for a single image."""
|
||||
return self.annos[idx]
|
||||
|
||||
def __getitem__(self, idx):
|
||||
"""Prepare image for training given the index."""
|
||||
records = copy.deepcopy(self._get_imganno(idx))
|
||||
imgpath = records['imageName']
|
||||
assert os.path.exists(imgpath), "cannot find image {}".format(imgpath)
|
||||
records['image'] = cv2.imread(imgpath)
|
||||
records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB)
|
||||
records = self.transform(records)
|
||||
return records
|
||||
|
||||
def check_or_download_dataset(self):
|
||||
alldatafind = True
|
||||
for image_dir in self.image_dirs:
|
||||
image_dir = os.path.join(self.dataset_dir, image_dir)
|
||||
if not os.path.isdir(image_dir):
|
||||
print("dataset [{}] is not found".format(image_dir))
|
||||
alldatafind = False
|
||||
if not alldatafind:
|
||||
raise ValueError(
|
||||
"Some dataset is not valid and cannot download automatically now, please prepare the dataset first"
|
||||
)
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class Keypoint3DMultiFramesDataset(Dataset):
|
||||
"""24 keypoints 3D dataset for pose estimation.
|
||||
|
||||
each item is a list of images
|
||||
|
||||
The dataset loads raw features and apply specified transforms
|
||||
to return a dict containing the image tensors and other information.
|
||||
|
||||
Args:
|
||||
dataset_dir (str): Root path to the dataset.
|
||||
image_dir (str): Path to a directory where images are held.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
dataset_dir, # 数据集根目录
|
||||
image_dir, # 图像文件夹
|
||||
p3d_dir, # 3D关键点文件夹
|
||||
json_path,
|
||||
img_size, #图像resize大小
|
||||
num_frames, # 帧序列长度
|
||||
anno_path=None, ):
|
||||
|
||||
self.dataset_dir = dataset_dir
|
||||
self.image_dir = image_dir
|
||||
self.p3d_dir = p3d_dir
|
||||
self.json_path = json_path
|
||||
self.img_size = img_size
|
||||
self.num_frames = num_frames
|
||||
self.anno_path = anno_path
|
||||
|
||||
self.data_labels, self.mf_inds = self._generate_multi_frames_list()
|
||||
|
||||
def _generate_multi_frames_list(self):
|
||||
act_list = os.listdir(self.dataset_dir) # 动作列表
|
||||
count = 0
|
||||
mf_list = []
|
||||
annos_dict = {'images': [], 'annotations': [], 'act_inds': []}
|
||||
for act in act_list: #对每个动作,生成帧序列
|
||||
if '.' in act:
|
||||
continue
|
||||
|
||||
json_path = os.path.join(self.dataset_dir, act, self.json_path)
|
||||
with open(json_path, 'r') as j:
|
||||
annos = json.load(j)
|
||||
length = len(annos['images'])
|
||||
for k, v in annos.items():
|
||||
if k in annos_dict:
|
||||
annos_dict[k].extend(v)
|
||||
annos_dict['act_inds'].extend([act] * length)
|
||||
|
||||
mf = [[i + j + count for j in range(self.num_frames)]
|
||||
for i in range(0, length - self.num_frames + 1)]
|
||||
mf_list.extend(mf)
|
||||
count += length
|
||||
|
||||
print("total data number:", len(mf_list))
|
||||
return annos_dict, mf_list
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
return self
|
||||
|
||||
def __getitem__(self, index): # 拿一个连续的序列
|
||||
inds = self.mf_inds[
|
||||
index] # 如[568, 569, 570, 571, 572, 573],长度为num_frames
|
||||
|
||||
images = self.data_labels['images'] # all images
|
||||
annots = self.data_labels['annotations'] # all annots
|
||||
|
||||
act = self.data_labels['act_inds'][inds[0]] # 动作名(文件夹名)
|
||||
|
||||
kps3d_list = []
|
||||
kps3d_vis_list = []
|
||||
names = []
|
||||
|
||||
h, w = 0, 0
|
||||
for ind in inds: # one image
|
||||
height = float(images[ind]['height'])
|
||||
width = float(images[ind]['width'])
|
||||
name = images[ind]['file_name'] # 图像名称,带有后缀
|
||||
|
||||
kps3d_name = name.split('.')[0] + '.obj'
|
||||
kps3d_path = os.path.join(self.dataset_dir, act, self.p3d_dir,
|
||||
kps3d_name)
|
||||
|
||||
joints, joints_vis = self.kps3d_process(kps3d_path)
|
||||
joints_vis = np.array(joints_vis, dtype=np.float32)
|
||||
|
||||
kps3d_list.append(joints)
|
||||
kps3d_vis_list.append(joints_vis)
|
||||
names.append(name)
|
||||
|
||||
kps3d = np.array(kps3d_list) # (6, 24, 3),(num_frames, joints_num, 3)
|
||||
kps3d_vis = np.array(kps3d_vis_list)
|
||||
|
||||
# read image
|
||||
imgs = []
|
||||
for name in names:
|
||||
img_path = os.path.join(self.dataset_dir, act, self.image_dir, name)
|
||||
|
||||
image = cv2.imread(img_path, cv2.IMREAD_COLOR |
|
||||
cv2.IMREAD_IGNORE_ORIENTATION)
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
|
||||
imgs.append(np.expand_dims(image, axis=0))
|
||||
|
||||
imgs = np.concatenate(imgs, axis=0)
|
||||
imgs = imgs.astype(
|
||||
np.float32) # (6, 1080, 1920, 3),(num_frames, h, w, c)
|
||||
|
||||
# attention: 此时图像和标注是镜像的
|
||||
records = {
|
||||
'kps3d': kps3d,
|
||||
'kps3d_vis': kps3d_vis,
|
||||
"image": imgs,
|
||||
'act': act,
|
||||
'names': names,
|
||||
'im_id': index
|
||||
}
|
||||
|
||||
return self.transform(records)
|
||||
|
||||
def kps3d_process(self, kps3d_path):
|
||||
count = 0
|
||||
kps = []
|
||||
kps_vis = []
|
||||
|
||||
with open(kps3d_path, 'r') as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
if line[0] == 'v':
|
||||
kps.append([])
|
||||
line = line.strip('\n').split(' ')[1:]
|
||||
for kp in line:
|
||||
kps[-1].append(float(kp))
|
||||
count += 1
|
||||
|
||||
kps_vis.append([1, 1, 1])
|
||||
|
||||
kps = np.array(kps) # 52,3
|
||||
kps_vis = np.array(kps_vis)
|
||||
|
||||
kps *= 10 # scale points
|
||||
kps -= kps[[0], :] # set root point to zero
|
||||
|
||||
kps = np.concatenate((kps[0:23], kps[[37]]), axis=0) # 24,3
|
||||
|
||||
kps *= 10
|
||||
|
||||
kps_vis = np.concatenate((kps_vis[0:23], kps_vis[[37]]), axis=0) # 24,3
|
||||
|
||||
return kps, kps_vis
|
||||
|
||||
def __len__(self):
|
||||
return len(self.mf_inds)
|
||||
|
||||
def get_anno(self):
|
||||
if self.anno_path is None:
|
||||
return
|
||||
return os.path.join(self.dataset_dir, self.anno_path)
|
||||
|
||||
def check_or_download_dataset(self):
|
||||
return
|
||||
|
||||
def parse_dataset(self, ):
|
||||
return
|
||||
|
||||
def set_transform(self, transform):
|
||||
self.transform = transform
|
||||
|
||||
def set_epoch(self, epoch_id):
|
||||
self._epoch = epoch_id
|
||||
|
||||
def set_kwargs(self, **kwargs):
|
||||
self.mixup_epoch = kwargs.get('mixup_epoch', -1)
|
||||
self.cutmix_epoch = kwargs.get('cutmix_epoch', -1)
|
||||
self.mosaic_epoch = kwargs.get('mosaic_epoch', -1)
|
||||
194
paddle_detection/ppdet/data/source/sniper_coco.py
Normal file
194
paddle_detection/ppdet/data/source/sniper_coco.py
Normal file
@@ -0,0 +1,194 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import json
|
||||
import copy
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
from collections.abc import Sequence
|
||||
except Exception:
|
||||
from collections import Sequence
|
||||
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from ppdet.data.crop_utils.annotation_cropper import AnnoCropper
|
||||
from .coco import COCODataSet
|
||||
from .dataset import _make_dataset, _is_valid_file
|
||||
from ppdet.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger('sniper_coco_dataset')
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class SniperCOCODataSet(COCODataSet):
|
||||
"""SniperCOCODataSet"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir=None,
|
||||
image_dir=None,
|
||||
anno_path=None,
|
||||
proposals_file=None,
|
||||
data_fields=['image'],
|
||||
sample_num=-1,
|
||||
load_crowd=False,
|
||||
allow_empty=True,
|
||||
empty_ratio=1.,
|
||||
is_trainset=True,
|
||||
image_target_sizes=[2000, 1000],
|
||||
valid_box_ratio_ranges=[[-1, 0.1],[0.08, -1]],
|
||||
chip_target_size=500,
|
||||
chip_target_stride=200,
|
||||
use_neg_chip=False,
|
||||
max_neg_num_per_im=8,
|
||||
max_per_img=-1,
|
||||
nms_thresh=0.5):
|
||||
super(SniperCOCODataSet, self).__init__(
|
||||
dataset_dir=dataset_dir,
|
||||
image_dir=image_dir,
|
||||
anno_path=anno_path,
|
||||
data_fields=data_fields,
|
||||
sample_num=sample_num,
|
||||
load_crowd=load_crowd,
|
||||
allow_empty=allow_empty,
|
||||
empty_ratio=empty_ratio
|
||||
)
|
||||
self.proposals_file = proposals_file
|
||||
self.proposals = None
|
||||
self.anno_cropper = None
|
||||
self.is_trainset = is_trainset
|
||||
self.image_target_sizes = image_target_sizes
|
||||
self.valid_box_ratio_ranges = valid_box_ratio_ranges
|
||||
self.chip_target_size = chip_target_size
|
||||
self.chip_target_stride = chip_target_stride
|
||||
self.use_neg_chip = use_neg_chip
|
||||
self.max_neg_num_per_im = max_neg_num_per_im
|
||||
self.max_per_img = max_per_img
|
||||
self.nms_thresh = nms_thresh
|
||||
|
||||
|
||||
def parse_dataset(self):
|
||||
if not hasattr(self, "roidbs"):
|
||||
super(SniperCOCODataSet, self).parse_dataset()
|
||||
if self.is_trainset:
|
||||
self._parse_proposals()
|
||||
self._merge_anno_proposals()
|
||||
self.ori_roidbs = copy.deepcopy(self.roidbs)
|
||||
self.init_anno_cropper()
|
||||
self.roidbs = self.generate_chips_roidbs(self.roidbs, self.is_trainset)
|
||||
|
||||
def set_proposals_file(self, file_path):
|
||||
self.proposals_file = file_path
|
||||
|
||||
def init_anno_cropper(self):
|
||||
logger.info("Init AnnoCropper...")
|
||||
self.anno_cropper = AnnoCropper(
|
||||
image_target_sizes=self.image_target_sizes,
|
||||
valid_box_ratio_ranges=self.valid_box_ratio_ranges,
|
||||
chip_target_size=self.chip_target_size,
|
||||
chip_target_stride=self.chip_target_stride,
|
||||
use_neg_chip=self.use_neg_chip,
|
||||
max_neg_num_per_im=self.max_neg_num_per_im,
|
||||
max_per_img=self.max_per_img,
|
||||
nms_thresh=self.nms_thresh
|
||||
)
|
||||
|
||||
def generate_chips_roidbs(self, roidbs, is_trainset):
|
||||
if is_trainset:
|
||||
roidbs = self.anno_cropper.crop_anno_records(roidbs)
|
||||
else:
|
||||
roidbs = self.anno_cropper.crop_infer_anno_records(roidbs)
|
||||
return roidbs
|
||||
|
||||
def _parse_proposals(self):
|
||||
if self.proposals_file:
|
||||
self.proposals = {}
|
||||
logger.info("Parse proposals file:{}".format(self.proposals_file))
|
||||
with open(self.proposals_file, 'r') as f:
|
||||
proposals = json.load(f)
|
||||
for prop in proposals:
|
||||
image_id = prop["image_id"]
|
||||
if image_id not in self.proposals:
|
||||
self.proposals[image_id] = []
|
||||
x, y, w, h = prop["bbox"]
|
||||
self.proposals[image_id].append([x, y, x + w, y + h])
|
||||
|
||||
def _merge_anno_proposals(self):
|
||||
assert self.roidbs
|
||||
if self.proposals and len(self.proposals.keys()) > 0:
|
||||
logger.info("merge proposals to annos")
|
||||
for id, record in enumerate(self.roidbs):
|
||||
image_id = int(record["im_id"])
|
||||
if image_id not in self.proposals.keys():
|
||||
logger.info("image id :{} no proposals".format(image_id))
|
||||
record["proposals"] = np.array(self.proposals.get(image_id, []), dtype=np.float32)
|
||||
self.roidbs[id] = record
|
||||
|
||||
def get_ori_roidbs(self):
|
||||
if not hasattr(self, "ori_roidbs"):
|
||||
return None
|
||||
return self.ori_roidbs
|
||||
|
||||
def get_roidbs(self):
|
||||
if not hasattr(self, "roidbs"):
|
||||
self.parse_dataset()
|
||||
return self.roidbs
|
||||
|
||||
def set_roidbs(self, roidbs):
|
||||
self.roidbs = roidbs
|
||||
|
||||
def check_or_download_dataset(self):
|
||||
return
|
||||
|
||||
def _parse(self):
|
||||
image_dir = self.image_dir
|
||||
if not isinstance(image_dir, Sequence):
|
||||
image_dir = [image_dir]
|
||||
images = []
|
||||
for im_dir in image_dir:
|
||||
if os.path.isdir(im_dir):
|
||||
im_dir = os.path.join(self.dataset_dir, im_dir)
|
||||
images.extend(_make_dataset(im_dir))
|
||||
elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
|
||||
images.append(im_dir)
|
||||
return images
|
||||
|
||||
def _load_images(self):
|
||||
images = self._parse()
|
||||
ct = 0
|
||||
records = []
|
||||
for image in images:
|
||||
assert image != '' and os.path.isfile(image), \
|
||||
"Image {} not found".format(image)
|
||||
if self.sample_num > 0 and ct >= self.sample_num:
|
||||
break
|
||||
im = cv2.imread(image)
|
||||
h, w, c = im.shape
|
||||
rec = {'im_id': np.array([ct]), 'im_file': image, "h": h, "w": w}
|
||||
self._imid2path[ct] = image
|
||||
ct += 1
|
||||
records.append(rec)
|
||||
assert len(records) > 0, "No image file found"
|
||||
return records
|
||||
|
||||
def get_imid2path(self):
|
||||
return self._imid2path
|
||||
|
||||
def set_images(self, images):
|
||||
self._imid2path = {}
|
||||
self.image_dir = images
|
||||
self.roidbs = self._load_images()
|
||||
|
||||
234
paddle_detection/ppdet/data/source/voc.py
Normal file
234
paddle_detection/ppdet/data/source/voc.py
Normal file
@@ -0,0 +1,234 @@
|
||||
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from ppdet.core.workspace import register, serializable
|
||||
|
||||
from .dataset import DetDataset
|
||||
|
||||
from ppdet.utils.logger import setup_logger
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class VOCDataSet(DetDataset):
|
||||
"""
|
||||
Load dataset with PascalVOC format.
|
||||
|
||||
Notes:
|
||||
`anno_path` must contains xml file and image file path for annotations.
|
||||
|
||||
Args:
|
||||
dataset_dir (str): root directory for dataset.
|
||||
image_dir (str): directory for images.
|
||||
anno_path (str): voc annotation file path.
|
||||
data_fields (list): key name of data dictionary, at least have 'image'.
|
||||
sample_num (int): number of samples to load, -1 means all.
|
||||
label_list (str): if use_default_label is False, will load
|
||||
mapping between category and class index.
|
||||
allow_empty (bool): whether to load empty entry. False as default
|
||||
empty_ratio (float): the ratio of empty record number to total
|
||||
record's, if empty_ratio is out of [0. ,1.), do not sample the
|
||||
records and use all the empty entries. 1. as default
|
||||
repeat (int): repeat times for dataset, use in benchmark.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir=None,
|
||||
image_dir=None,
|
||||
anno_path=None,
|
||||
data_fields=['image'],
|
||||
sample_num=-1,
|
||||
label_list=None,
|
||||
allow_empty=False,
|
||||
empty_ratio=1.,
|
||||
repeat=1):
|
||||
super(VOCDataSet, self).__init__(
|
||||
dataset_dir=dataset_dir,
|
||||
image_dir=image_dir,
|
||||
anno_path=anno_path,
|
||||
data_fields=data_fields,
|
||||
sample_num=sample_num,
|
||||
repeat=repeat)
|
||||
self.label_list = label_list
|
||||
self.allow_empty = allow_empty
|
||||
self.empty_ratio = empty_ratio
|
||||
|
||||
def _sample_empty(self, records, num):
|
||||
# if empty_ratio is out of [0. ,1.), do not sample the records
|
||||
if self.empty_ratio < 0. or self.empty_ratio >= 1.:
|
||||
return records
|
||||
import random
|
||||
sample_num = min(
|
||||
int(num * self.empty_ratio / (1 - self.empty_ratio)), len(records))
|
||||
records = random.sample(records, sample_num)
|
||||
return records
|
||||
|
||||
def parse_dataset(self, ):
|
||||
anno_path = os.path.join(self.dataset_dir, self.anno_path)
|
||||
image_dir = os.path.join(self.dataset_dir, self.image_dir)
|
||||
|
||||
# mapping category name to class id
|
||||
# first_class:0, second_class:1, ...
|
||||
records = []
|
||||
empty_records = []
|
||||
ct = 0
|
||||
cname2cid = {}
|
||||
if self.label_list:
|
||||
label_path = os.path.join(self.dataset_dir, self.label_list)
|
||||
if not os.path.exists(label_path):
|
||||
raise ValueError("label_list {} does not exists".format(
|
||||
label_path))
|
||||
with open(label_path, 'r') as fr:
|
||||
label_id = 0
|
||||
for line in fr.readlines():
|
||||
cname2cid[line.strip()] = label_id
|
||||
label_id += 1
|
||||
else:
|
||||
cname2cid = pascalvoc_label()
|
||||
|
||||
with open(anno_path, 'r') as fr:
|
||||
while True:
|
||||
line = fr.readline()
|
||||
if not line:
|
||||
break
|
||||
img_file, xml_file = [os.path.join(image_dir, x) \
|
||||
for x in line.strip().split()[:2]]
|
||||
if not os.path.exists(img_file):
|
||||
logger.warning(
|
||||
'Illegal image file: {}, and it will be ignored'.format(
|
||||
img_file))
|
||||
continue
|
||||
if not os.path.isfile(xml_file):
|
||||
logger.warning(
|
||||
'Illegal xml file: {}, and it will be ignored'.format(
|
||||
xml_file))
|
||||
continue
|
||||
tree = ET.parse(xml_file)
|
||||
if tree.find('id') is None:
|
||||
im_id = np.array([ct])
|
||||
else:
|
||||
im_id = np.array([int(tree.find('id').text)])
|
||||
|
||||
objs = tree.findall('object')
|
||||
im_w = float(tree.find('size').find('width').text)
|
||||
im_h = float(tree.find('size').find('height').text)
|
||||
if im_w < 0 or im_h < 0:
|
||||
logger.warning(
|
||||
'Illegal width: {} or height: {} in annotation, '
|
||||
'and {} will be ignored'.format(im_w, im_h, xml_file))
|
||||
continue
|
||||
|
||||
num_bbox, i = len(objs), 0
|
||||
gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
|
||||
gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
|
||||
gt_score = np.zeros((num_bbox, 1), dtype=np.float32)
|
||||
difficult = np.zeros((num_bbox, 1), dtype=np.int32)
|
||||
for obj in objs:
|
||||
cname = obj.find('name').text
|
||||
|
||||
# user dataset may not contain difficult field
|
||||
_difficult = obj.find('difficult')
|
||||
_difficult = int(
|
||||
_difficult.text) if _difficult is not None else 0
|
||||
|
||||
x1 = float(obj.find('bndbox').find('xmin').text)
|
||||
y1 = float(obj.find('bndbox').find('ymin').text)
|
||||
x2 = float(obj.find('bndbox').find('xmax').text)
|
||||
y2 = float(obj.find('bndbox').find('ymax').text)
|
||||
x1 = max(0, x1)
|
||||
y1 = max(0, y1)
|
||||
x2 = min(im_w - 1, x2)
|
||||
y2 = min(im_h - 1, y2)
|
||||
if x2 > x1 and y2 > y1:
|
||||
gt_bbox[i, :] = [x1, y1, x2, y2]
|
||||
gt_class[i, 0] = cname2cid[cname]
|
||||
gt_score[i, 0] = 1.
|
||||
difficult[i, 0] = _difficult
|
||||
i += 1
|
||||
else:
|
||||
logger.warning(
|
||||
'Found an invalid bbox in annotations: xml_file: {}'
|
||||
', x1: {}, y1: {}, x2: {}, y2: {}.'.format(
|
||||
xml_file, x1, y1, x2, y2))
|
||||
gt_bbox = gt_bbox[:i, :]
|
||||
gt_class = gt_class[:i, :]
|
||||
gt_score = gt_score[:i, :]
|
||||
difficult = difficult[:i, :]
|
||||
|
||||
voc_rec = {
|
||||
'im_file': img_file,
|
||||
'im_id': im_id,
|
||||
'h': im_h,
|
||||
'w': im_w
|
||||
} if 'image' in self.data_fields else {}
|
||||
|
||||
gt_rec = {
|
||||
'gt_class': gt_class,
|
||||
'gt_score': gt_score,
|
||||
'gt_bbox': gt_bbox,
|
||||
'difficult': difficult
|
||||
}
|
||||
for k, v in gt_rec.items():
|
||||
if k in self.data_fields:
|
||||
voc_rec[k] = v
|
||||
|
||||
if len(objs) == 0:
|
||||
empty_records.append(voc_rec)
|
||||
else:
|
||||
records.append(voc_rec)
|
||||
|
||||
ct += 1
|
||||
if self.sample_num > 0 and ct >= self.sample_num:
|
||||
break
|
||||
assert ct > 0, 'not found any voc record in %s' % (self.anno_path)
|
||||
logger.debug('{} samples in file {}'.format(ct, anno_path))
|
||||
if self.allow_empty and len(empty_records) > 0:
|
||||
empty_records = self._sample_empty(empty_records, len(records))
|
||||
records += empty_records
|
||||
self.roidbs, self.cname2cid = records, cname2cid
|
||||
|
||||
def get_label_list(self):
|
||||
return os.path.join(self.dataset_dir, self.label_list)
|
||||
|
||||
|
||||
def pascalvoc_label():
|
||||
labels_map = {
|
||||
'aeroplane': 0,
|
||||
'bicycle': 1,
|
||||
'bird': 2,
|
||||
'boat': 3,
|
||||
'bottle': 4,
|
||||
'bus': 5,
|
||||
'car': 6,
|
||||
'cat': 7,
|
||||
'chair': 8,
|
||||
'cow': 9,
|
||||
'diningtable': 10,
|
||||
'dog': 11,
|
||||
'horse': 12,
|
||||
'motorbike': 13,
|
||||
'person': 14,
|
||||
'pottedplant': 15,
|
||||
'sheep': 16,
|
||||
'sofa': 17,
|
||||
'train': 18,
|
||||
'tvmonitor': 19
|
||||
}
|
||||
return labels_map
|
||||
180
paddle_detection/ppdet/data/source/widerface.py
Normal file
180
paddle_detection/ppdet/data/source/widerface.py
Normal file
@@ -0,0 +1,180 @@
|
||||
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from .dataset import DetDataset
|
||||
|
||||
from ppdet.utils.logger import setup_logger
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class WIDERFaceDataSet(DetDataset):
|
||||
"""
|
||||
Load WiderFace records with 'anno_path'
|
||||
|
||||
Args:
|
||||
dataset_dir (str): root directory for dataset.
|
||||
image_dir (str): directory for images.
|
||||
anno_path (str): WiderFace annotation data.
|
||||
data_fields (list): key name of data dictionary, at least have 'image'.
|
||||
sample_num (int): number of samples to load, -1 means all.
|
||||
with_lmk (bool): whether to load face landmark keypoint labels.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir=None,
|
||||
image_dir=None,
|
||||
anno_path=None,
|
||||
data_fields=['image'],
|
||||
sample_num=-1,
|
||||
with_lmk=False):
|
||||
super(WIDERFaceDataSet, self).__init__(
|
||||
dataset_dir=dataset_dir,
|
||||
image_dir=image_dir,
|
||||
anno_path=anno_path,
|
||||
data_fields=data_fields,
|
||||
sample_num=sample_num,
|
||||
with_lmk=with_lmk)
|
||||
self.anno_path = anno_path
|
||||
self.sample_num = sample_num
|
||||
self.roidbs = None
|
||||
self.cname2cid = None
|
||||
self.with_lmk = with_lmk
|
||||
|
||||
def parse_dataset(self):
|
||||
anno_path = os.path.join(self.dataset_dir, self.anno_path)
|
||||
image_dir = os.path.join(self.dataset_dir, self.image_dir)
|
||||
|
||||
txt_file = anno_path
|
||||
|
||||
records = []
|
||||
ct = 0
|
||||
file_lists = self._load_file_list(txt_file)
|
||||
cname2cid = widerface_label()
|
||||
|
||||
for item in file_lists:
|
||||
im_fname = item[0]
|
||||
im_id = np.array([ct])
|
||||
gt_bbox = np.zeros((len(item) - 1, 4), dtype=np.float32)
|
||||
gt_class = np.zeros((len(item) - 1, 1), dtype=np.int32)
|
||||
gt_lmk_labels = np.zeros((len(item) - 1, 10), dtype=np.float32)
|
||||
lmk_ignore_flag = np.zeros((len(item) - 1, 1), dtype=np.int32)
|
||||
for index_box in range(len(item)):
|
||||
if index_box < 1:
|
||||
continue
|
||||
gt_bbox[index_box - 1] = item[index_box][0]
|
||||
if self.with_lmk:
|
||||
gt_lmk_labels[index_box - 1] = item[index_box][1]
|
||||
lmk_ignore_flag[index_box - 1] = item[index_box][2]
|
||||
im_fname = os.path.join(image_dir,
|
||||
im_fname) if image_dir else im_fname
|
||||
widerface_rec = {
|
||||
'im_file': im_fname,
|
||||
'im_id': im_id,
|
||||
} if 'image' in self.data_fields else {}
|
||||
gt_rec = {
|
||||
'gt_bbox': gt_bbox,
|
||||
'gt_class': gt_class,
|
||||
}
|
||||
for k, v in gt_rec.items():
|
||||
if k in self.data_fields:
|
||||
widerface_rec[k] = v
|
||||
if self.with_lmk:
|
||||
widerface_rec['gt_keypoint'] = gt_lmk_labels
|
||||
widerface_rec['keypoint_ignore'] = lmk_ignore_flag
|
||||
|
||||
if len(item) != 0:
|
||||
records.append(widerface_rec)
|
||||
|
||||
ct += 1
|
||||
if self.sample_num > 0 and ct >= self.sample_num:
|
||||
break
|
||||
assert len(records) > 0, 'not found any widerface in %s' % (anno_path)
|
||||
logger.debug('{} samples in file {}'.format(ct, anno_path))
|
||||
self.roidbs, self.cname2cid = records, cname2cid
|
||||
|
||||
def _load_file_list(self, input_txt):
|
||||
with open(input_txt, 'r') as f_dir:
|
||||
lines_input_txt = f_dir.readlines()
|
||||
|
||||
file_dict = {}
|
||||
num_class = 0
|
||||
exts = ['jpg', 'jpeg', 'png', 'bmp']
|
||||
exts += [ext.upper() for ext in exts]
|
||||
for i in range(len(lines_input_txt)):
|
||||
line_txt = lines_input_txt[i].strip('\n\t\r')
|
||||
split_str = line_txt.split(' ')
|
||||
if len(split_str) == 1:
|
||||
img_file_name = os.path.split(split_str[0])[1]
|
||||
split_txt = img_file_name.split('.')
|
||||
if len(split_txt) < 2:
|
||||
continue
|
||||
elif split_txt[-1] in exts:
|
||||
if i != 0:
|
||||
num_class += 1
|
||||
file_dict[num_class] = [line_txt]
|
||||
else:
|
||||
if len(line_txt) <= 6:
|
||||
continue
|
||||
result_boxs = []
|
||||
xmin = float(split_str[0])
|
||||
ymin = float(split_str[1])
|
||||
w = float(split_str[2])
|
||||
h = float(split_str[3])
|
||||
# Filter out wrong labels
|
||||
if w < 0 or h < 0:
|
||||
logger.warning('Illegal box with w: {}, h: {} in '
|
||||
'img: {}, and it will be ignored'.format(
|
||||
w, h, file_dict[num_class][0]))
|
||||
continue
|
||||
xmin = max(0, xmin)
|
||||
ymin = max(0, ymin)
|
||||
xmax = xmin + w
|
||||
ymax = ymin + h
|
||||
gt_bbox = [xmin, ymin, xmax, ymax]
|
||||
result_boxs.append(gt_bbox)
|
||||
if self.with_lmk:
|
||||
assert len(split_str) > 18, 'When `with_lmk=True`, the number' \
|
||||
'of characters per line in the annotation file should' \
|
||||
'exceed 18.'
|
||||
lmk0_x = float(split_str[5])
|
||||
lmk0_y = float(split_str[6])
|
||||
lmk1_x = float(split_str[8])
|
||||
lmk1_y = float(split_str[9])
|
||||
lmk2_x = float(split_str[11])
|
||||
lmk2_y = float(split_str[12])
|
||||
lmk3_x = float(split_str[14])
|
||||
lmk3_y = float(split_str[15])
|
||||
lmk4_x = float(split_str[17])
|
||||
lmk4_y = float(split_str[18])
|
||||
lmk_ignore_flag = 0 if lmk0_x == -1 else 1
|
||||
gt_lmk_label = [
|
||||
lmk0_x, lmk0_y, lmk1_x, lmk1_y, lmk2_x, lmk2_y, lmk3_x,
|
||||
lmk3_y, lmk4_x, lmk4_y
|
||||
]
|
||||
result_boxs.append(gt_lmk_label)
|
||||
result_boxs.append(lmk_ignore_flag)
|
||||
file_dict[num_class].append(result_boxs)
|
||||
|
||||
return list(file_dict.values())
|
||||
|
||||
|
||||
def widerface_label():
|
||||
labels_map = {'face': 0}
|
||||
return labels_map
|
||||
35
paddle_detection/ppdet/data/transform/__init__.py
Normal file
35
paddle_detection/ppdet/data/transform/__init__.py
Normal file
@@ -0,0 +1,35 @@
|
||||
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from . import operators
|
||||
from . import batch_operators
|
||||
from . import keypoint_operators
|
||||
from . import mot_operators
|
||||
from . import rotated_operators
|
||||
from . import keypoints_3d_operators
|
||||
from . import culane_operators
|
||||
|
||||
from .operators import *
|
||||
from .batch_operators import *
|
||||
from .keypoint_operators import *
|
||||
from .mot_operators import *
|
||||
from .rotated_operators import *
|
||||
from .keypoints_3d_operators import *
|
||||
from .culane_operators import *
|
||||
|
||||
__all__ = []
|
||||
__all__ += registered_ops
|
||||
__all__ += keypoint_operators.__all__
|
||||
__all__ += mot_operators.__all__
|
||||
__all__ += culane_operators.__all__
|
||||
421
paddle_detection/ppdet/data/transform/atss_assigner.py
Normal file
421
paddle_detection/ppdet/data/transform/atss_assigner.py
Normal file
@@ -0,0 +1,421 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# The code is based on:
|
||||
# https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/bbox/assigners/atss_assigner.py
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
from ppdet.utils.logger import setup_logger
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
|
||||
def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-6):
|
||||
"""Calculate overlap between two set of bboxes.
|
||||
If ``is_aligned `` is ``False``, then calculate the overlaps between each
|
||||
bbox of bboxes1 and bboxes2, otherwise the overlaps between each aligned
|
||||
pair of bboxes1 and bboxes2.
|
||||
Args:
|
||||
bboxes1 (Tensor): shape (B, m, 4) in <x1, y1, x2, y2> format or empty.
|
||||
bboxes2 (Tensor): shape (B, n, 4) in <x1, y1, x2, y2> format or empty.
|
||||
B indicates the batch dim, in shape (B1, B2, ..., Bn).
|
||||
If ``is_aligned `` is ``True``, then m and n must be equal.
|
||||
mode (str): "iou" (intersection over union) or "iof" (intersection over
|
||||
foreground).
|
||||
is_aligned (bool, optional): If True, then m and n must be equal.
|
||||
Default False.
|
||||
eps (float, optional): A value added to the denominator for numerical
|
||||
stability. Default 1e-6.
|
||||
Returns:
|
||||
Tensor: shape (m, n) if ``is_aligned `` is False else shape (m,)
|
||||
"""
|
||||
assert mode in ['iou', 'iof', 'giou', 'diou'], 'Unsupported mode {}'.format(
|
||||
mode)
|
||||
# Either the boxes are empty or the length of boxes's last dimenstion is 4
|
||||
assert (bboxes1.shape[-1] == 4 or bboxes1.shape[0] == 0)
|
||||
assert (bboxes2.shape[-1] == 4 or bboxes2.shape[0] == 0)
|
||||
|
||||
# Batch dim must be the same
|
||||
# Batch dim: (B1, B2, ... Bn)
|
||||
assert bboxes1.shape[:-2] == bboxes2.shape[:-2]
|
||||
batch_shape = bboxes1.shape[:-2]
|
||||
|
||||
rows = bboxes1.shape[-2] if bboxes1.shape[0] > 0 else 0
|
||||
cols = bboxes2.shape[-2] if bboxes2.shape[0] > 0 else 0
|
||||
if is_aligned:
|
||||
assert rows == cols
|
||||
|
||||
if rows * cols == 0:
|
||||
if is_aligned:
|
||||
return np.random.random(batch_shape + (rows, ))
|
||||
else:
|
||||
return np.random.random(batch_shape + (rows, cols))
|
||||
|
||||
area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * (
|
||||
bboxes1[..., 3] - bboxes1[..., 1])
|
||||
area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * (
|
||||
bboxes2[..., 3] - bboxes2[..., 1])
|
||||
|
||||
if is_aligned:
|
||||
lt = np.maximum(bboxes1[..., :2], bboxes2[..., :2]) # [B, rows, 2]
|
||||
rb = np.minimum(bboxes1[..., 2:], bboxes2[..., 2:]) # [B, rows, 2]
|
||||
|
||||
wh = (rb - lt).clip(min=0) # [B, rows, 2]
|
||||
overlap = wh[..., 0] * wh[..., 1]
|
||||
|
||||
if mode in ['iou', 'giou']:
|
||||
union = area1 + area2 - overlap
|
||||
else:
|
||||
union = area1
|
||||
if mode == 'giou':
|
||||
enclosed_lt = np.minimum(bboxes1[..., :2], bboxes2[..., :2])
|
||||
enclosed_rb = np.maximum(bboxes1[..., 2:], bboxes2[..., 2:])
|
||||
if mode == 'diou':
|
||||
enclosed_lt = np.minimum(bboxes1[..., :2], bboxes2[..., :2])
|
||||
enclosed_rb = np.maximum(bboxes1[..., 2:], bboxes2[..., 2:])
|
||||
b1_x1, b1_y1 = bboxes1[..., 0], bboxes1[..., 1]
|
||||
b1_x2, b1_y2 = bboxes1[..., 2], bboxes1[..., 3]
|
||||
b2_x1, b2_y1 = bboxes2[..., 0], bboxes2[..., 1]
|
||||
b2_x2, b2_y2 = bboxes2[..., 2], bboxes2[..., 3]
|
||||
else:
|
||||
lt = np.maximum(bboxes1[..., :, None, :2],
|
||||
bboxes2[..., None, :, :2]) # [B, rows, cols, 2]
|
||||
rb = np.minimum(bboxes1[..., :, None, 2:],
|
||||
bboxes2[..., None, :, 2:]) # [B, rows, cols, 2]
|
||||
|
||||
wh = (rb - lt).clip(min=0) # [B, rows, cols, 2]
|
||||
overlap = wh[..., 0] * wh[..., 1]
|
||||
|
||||
if mode in ['iou', 'giou']:
|
||||
union = area1[..., None] + area2[..., None, :] - overlap
|
||||
else:
|
||||
union = area1[..., None]
|
||||
if mode == 'giou':
|
||||
enclosed_lt = np.minimum(bboxes1[..., :, None, :2],
|
||||
bboxes2[..., None, :, :2])
|
||||
enclosed_rb = np.maximum(bboxes1[..., :, None, 2:],
|
||||
bboxes2[..., None, :, 2:])
|
||||
if mode == 'diou':
|
||||
enclosed_lt = np.minimum(bboxes1[..., :, None, :2],
|
||||
bboxes2[..., None, :, :2])
|
||||
enclosed_rb = np.maximum(bboxes1[..., :, None, 2:],
|
||||
bboxes2[..., None, :, 2:])
|
||||
b1_x1, b1_y1 = bboxes1[..., :, None, 0], bboxes1[..., :, None, 1]
|
||||
b1_x2, b1_y2 = bboxes1[..., :, None, 2], bboxes1[..., :, None, 3]
|
||||
b2_x1, b2_y1 = bboxes2[..., None, :, 0], bboxes2[..., None, :, 1]
|
||||
b2_x2, b2_y2 = bboxes2[..., None, :, 2], bboxes2[..., None, :, 3]
|
||||
|
||||
eps = np.array([eps])
|
||||
union = np.maximum(union, eps)
|
||||
ious = overlap / union
|
||||
if mode in ['iou', 'iof']:
|
||||
return ious
|
||||
# calculate gious
|
||||
if mode in ['giou']:
|
||||
enclose_wh = (enclosed_rb - enclosed_lt).clip(min=0)
|
||||
enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1]
|
||||
enclose_area = np.maximum(enclose_area, eps)
|
||||
gious = ious - (enclose_area - union) / enclose_area
|
||||
return gious
|
||||
if mode in ['diou']:
|
||||
left = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2))**2 / 4
|
||||
right = ((b2_y1 + b2_y2) - (b1_y1 + b1_y2))**2 / 4
|
||||
rho2 = left + right
|
||||
enclose_wh = (enclosed_rb - enclosed_lt).clip(min=0)
|
||||
enclose_c = enclose_wh[..., 0]**2 + enclose_wh[..., 1]**2
|
||||
enclose_c = np.maximum(enclose_c, eps)
|
||||
dious = ious - rho2 / enclose_c
|
||||
return dious
|
||||
|
||||
|
||||
def topk_(input, k, axis=1, largest=True):
|
||||
x = -input if largest else input
|
||||
if axis == 0:
|
||||
row_index = np.arange(input.shape[1 - axis])
|
||||
if k == x.shape[0]: # argpartition requires index < len(input)
|
||||
topk_index = np.argpartition(x, k - 1, axis=axis)[0:k, :]
|
||||
else:
|
||||
topk_index = np.argpartition(x, k, axis=axis)[0:k, :]
|
||||
|
||||
topk_data = x[topk_index, row_index]
|
||||
|
||||
topk_index_sort = np.argsort(topk_data, axis=axis)
|
||||
topk_data_sort = topk_data[topk_index_sort, row_index]
|
||||
topk_index_sort = topk_index[0:k, :][topk_index_sort, row_index]
|
||||
else:
|
||||
column_index = np.arange(x.shape[1 - axis])[:, None]
|
||||
topk_index = np.argpartition(x, k, axis=axis)[:, 0:k]
|
||||
topk_data = x[column_index, topk_index]
|
||||
topk_data = -topk_data if largest else topk_data
|
||||
topk_index_sort = np.argsort(topk_data, axis=axis)
|
||||
topk_data_sort = topk_data[column_index, topk_index_sort]
|
||||
topk_index_sort = topk_index[:, 0:k][column_index, topk_index_sort]
|
||||
|
||||
return topk_data_sort, topk_index_sort
|
||||
|
||||
|
||||
class ATSSAssigner(object):
|
||||
"""Assign a corresponding gt bbox or background to each bbox.
|
||||
|
||||
Each proposals will be assigned with `0` or a positive integer
|
||||
indicating the ground truth index.
|
||||
|
||||
- 0: negative sample, no assigned gt
|
||||
- positive integer: positive sample, index (1-based) of assigned gt
|
||||
|
||||
Args:
|
||||
topk (float): number of bbox selected in each level
|
||||
"""
|
||||
|
||||
def __init__(self, topk=9):
|
||||
self.topk = topk
|
||||
|
||||
def __call__(self,
|
||||
bboxes,
|
||||
num_level_bboxes,
|
||||
gt_bboxes,
|
||||
gt_bboxes_ignore=None,
|
||||
gt_labels=None):
|
||||
"""Assign gt to bboxes.
|
||||
The assignment is done in following steps
|
||||
1. compute iou between all bbox (bbox of all pyramid levels) and gt
|
||||
2. compute center distance between all bbox and gt
|
||||
3. on each pyramid level, for each gt, select k bbox whose center
|
||||
are closest to the gt center, so we total select k*l bbox as
|
||||
candidates for each gt
|
||||
4. get corresponding iou for the these candidates, and compute the
|
||||
mean and std, set mean + std as the iou threshold
|
||||
5. select these candidates whose iou are greater than or equal to
|
||||
the threshold as postive
|
||||
6. limit the positive sample's center in gt
|
||||
Args:
|
||||
bboxes (np.array): Bounding boxes to be assigned, shape(n, 4).
|
||||
num_level_bboxes (List): num of bboxes in each level
|
||||
gt_bboxes (np.array): Groundtruth boxes, shape (k, 4).
|
||||
gt_bboxes_ignore (np.array, optional): Ground truth bboxes that are
|
||||
labelled as `ignored`, e.g., crowd boxes in COCO.
|
||||
gt_labels (np.array, optional): Label of gt_bboxes, shape (k, ).
|
||||
"""
|
||||
bboxes = bboxes[:, :4]
|
||||
num_gt, num_bboxes = gt_bboxes.shape[0], bboxes.shape[0]
|
||||
|
||||
# assign 0 by default
|
||||
assigned_gt_inds = np.zeros((num_bboxes, ), dtype=np.int64)
|
||||
|
||||
if num_gt == 0 or num_bboxes == 0:
|
||||
# No ground truth or boxes, return empty assignment
|
||||
max_overlaps = np.zeros((num_bboxes, ))
|
||||
if num_gt == 0:
|
||||
# No truth, assign everything to background
|
||||
assigned_gt_inds[:] = 0
|
||||
if not np.any(gt_labels):
|
||||
assigned_labels = None
|
||||
else:
|
||||
assigned_labels = -np.ones((num_bboxes, ), dtype=np.int64)
|
||||
return assigned_gt_inds, max_overlaps
|
||||
|
||||
# compute iou between all bbox and gt
|
||||
overlaps = bbox_overlaps(bboxes, gt_bboxes)
|
||||
# compute center distance between all bbox and gt
|
||||
gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
|
||||
gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
|
||||
gt_points = np.stack((gt_cx, gt_cy), axis=1)
|
||||
|
||||
bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0
|
||||
bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0
|
||||
bboxes_points = np.stack((bboxes_cx, bboxes_cy), axis=1)
|
||||
|
||||
distances = np.sqrt(
|
||||
np.power((bboxes_points[:, None, :] - gt_points[None, :, :]), 2)
|
||||
.sum(-1))
|
||||
|
||||
# Selecting candidates based on the center distance
|
||||
candidate_idxs = []
|
||||
start_idx = 0
|
||||
for bboxes_per_level in num_level_bboxes:
|
||||
# on each pyramid level, for each gt,
|
||||
# select k bbox whose center are closest to the gt center
|
||||
end_idx = start_idx + bboxes_per_level
|
||||
distances_per_level = distances[start_idx:end_idx, :]
|
||||
selectable_k = min(self.topk, bboxes_per_level)
|
||||
_, topk_idxs_per_level = topk_(
|
||||
distances_per_level, selectable_k, axis=0, largest=False)
|
||||
candidate_idxs.append(topk_idxs_per_level + start_idx)
|
||||
start_idx = end_idx
|
||||
candidate_idxs = np.concatenate(candidate_idxs, axis=0)
|
||||
|
||||
# get corresponding iou for the these candidates, and compute the
|
||||
# mean and std, set mean + std as the iou threshold
|
||||
candidate_overlaps = overlaps[candidate_idxs, np.arange(num_gt)]
|
||||
overlaps_mean_per_gt = candidate_overlaps.mean(0)
|
||||
overlaps_std_per_gt = candidate_overlaps.std(0)
|
||||
overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
|
||||
|
||||
is_pos = candidate_overlaps >= overlaps_thr_per_gt[None, :]
|
||||
|
||||
# limit the positive sample's center in gt
|
||||
for gt_idx in range(num_gt):
|
||||
candidate_idxs[:, gt_idx] += gt_idx * num_bboxes
|
||||
ep_bboxes_cx = np.broadcast_to(
|
||||
bboxes_cx.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1)
|
||||
ep_bboxes_cy = np.broadcast_to(
|
||||
bboxes_cy.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1)
|
||||
candidate_idxs = candidate_idxs.reshape(-1)
|
||||
|
||||
# calculate the left, top, right, bottom distance between positive
|
||||
# bbox center and gt side
|
||||
l_ = ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 0]
|
||||
t_ = ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 1]
|
||||
r_ = gt_bboxes[:, 2] - ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt)
|
||||
b_ = gt_bboxes[:, 3] - ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt)
|
||||
is_in_gts = np.stack([l_, t_, r_, b_], axis=1).min(axis=1) > 0.01
|
||||
is_pos = is_pos & is_in_gts
|
||||
|
||||
# if an anchor box is assigned to multiple gts,
|
||||
# the one with the highest IoU will be selected.
|
||||
overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1)
|
||||
index = candidate_idxs.reshape(-1)[is_pos.reshape(-1)]
|
||||
overlaps_inf[index] = overlaps.T.reshape(-1)[index]
|
||||
overlaps_inf = overlaps_inf.reshape(num_gt, -1).T
|
||||
|
||||
max_overlaps = overlaps_inf.max(axis=1)
|
||||
argmax_overlaps = overlaps_inf.argmax(axis=1)
|
||||
assigned_gt_inds[max_overlaps !=
|
||||
-np.inf] = argmax_overlaps[max_overlaps != -np.inf] + 1
|
||||
|
||||
return assigned_gt_inds, max_overlaps
|
||||
|
||||
def get_vlr_region(self,
|
||||
bboxes,
|
||||
num_level_bboxes,
|
||||
gt_bboxes,
|
||||
gt_bboxes_ignore=None,
|
||||
gt_labels=None):
|
||||
"""get vlr region for ld distillation.
|
||||
Args:
|
||||
bboxes (np.array): Bounding boxes to be assigned, shape(n, 4).
|
||||
num_level_bboxes (List): num of bboxes in each level
|
||||
gt_bboxes (np.array): Groundtruth boxes, shape (k, 4).
|
||||
gt_bboxes_ignore (np.array, optional): Ground truth bboxes that are
|
||||
labelled as `ignored`, e.g., crowd boxes in COCO.
|
||||
gt_labels (np.array, optional): Label of gt_bboxes, shape (k, ).
|
||||
"""
|
||||
bboxes = bboxes[:, :4]
|
||||
|
||||
num_gt, num_bboxes = gt_bboxes.shape[0], bboxes.shape[0]
|
||||
|
||||
# compute iou between all bbox and gt
|
||||
overlaps = bbox_overlaps(bboxes, gt_bboxes)
|
||||
|
||||
# compute diou between all bbox and gt
|
||||
diou = bbox_overlaps(bboxes, gt_bboxes, mode='diou')
|
||||
|
||||
# assign 0 by default
|
||||
assigned_gt_inds = np.zeros((num_bboxes, ), dtype=np.int64)
|
||||
|
||||
vlr_region_iou = (assigned_gt_inds + 0).astype(np.float32)
|
||||
|
||||
if num_gt == 0 or num_bboxes == 0:
|
||||
# No ground truth or boxes, return empty assignment
|
||||
max_overlaps = np.zeros((num_bboxes, ))
|
||||
if num_gt == 0:
|
||||
# No truth, assign everything to background
|
||||
assigned_gt_inds[:] = 0
|
||||
if not np.any(gt_labels):
|
||||
assigned_labels = None
|
||||
else:
|
||||
assigned_labels = -np.ones((num_bboxes, ), dtype=np.int64)
|
||||
return assigned_gt_inds, max_overlaps
|
||||
|
||||
# compute center distance between all bbox and gt
|
||||
gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
|
||||
gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
|
||||
gt_points = np.stack((gt_cx, gt_cy), axis=1)
|
||||
|
||||
bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0
|
||||
bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0
|
||||
bboxes_points = np.stack((bboxes_cx, bboxes_cy), axis=1)
|
||||
|
||||
distances = np.sqrt(
|
||||
np.power((bboxes_points[:, None, :] - gt_points[None, :, :]), 2)
|
||||
.sum(-1))
|
||||
|
||||
# Selecting candidates based on the center distance
|
||||
candidate_idxs = []
|
||||
candidate_idxs_t = []
|
||||
start_idx = 0
|
||||
for bboxes_per_level in num_level_bboxes:
|
||||
# on each pyramid level, for each gt,
|
||||
# select k bbox whose center are closest to the gt center
|
||||
end_idx = start_idx + bboxes_per_level
|
||||
distances_per_level = distances[start_idx:end_idx, :]
|
||||
selectable_t = min(self.topk, bboxes_per_level)
|
||||
selectable_k = bboxes_per_level #k for all
|
||||
_, topt_idxs_per_level = topk_(
|
||||
distances_per_level, selectable_t, axis=0, largest=False)
|
||||
_, topk_idxs_per_level = topk_(
|
||||
distances_per_level, selectable_k, axis=0, largest=False)
|
||||
candidate_idxs_t.append(topt_idxs_per_level + start_idx)
|
||||
candidate_idxs.append(topk_idxs_per_level + start_idx)
|
||||
start_idx = end_idx
|
||||
|
||||
candidate_idxs_t = np.concatenate(candidate_idxs_t, axis=0)
|
||||
candidate_idxs = np.concatenate(candidate_idxs, axis=0)
|
||||
|
||||
# get corresponding iou for the these candidates, and compute the
|
||||
# mean and std, set mean + std as the iou threshold
|
||||
candidate_overlaps_t = overlaps[candidate_idxs_t, np.arange(num_gt)]
|
||||
|
||||
# compute tdiou
|
||||
t_diou = diou[candidate_idxs, np.arange(num_gt)]
|
||||
|
||||
overlaps_mean_per_gt = candidate_overlaps_t.mean(0)
|
||||
overlaps_std_per_gt = candidate_overlaps_t.std(
|
||||
0, ddof=1) # NOTE: use Bessel correction
|
||||
overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
|
||||
|
||||
# compute region
|
||||
is_pos = (t_diou < overlaps_thr_per_gt[None, :]) & (
|
||||
t_diou >= 0.25 * overlaps_thr_per_gt[None, :])
|
||||
|
||||
# limit the positive sample's center in gt
|
||||
for gt_idx in range(num_gt):
|
||||
candidate_idxs[:, gt_idx] += gt_idx * num_bboxes
|
||||
|
||||
candidate_idxs = candidate_idxs.reshape(-1)
|
||||
|
||||
# if an anchor box is assigned to multiple gts,
|
||||
# the one with the highest IoU will be selected.
|
||||
overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1)
|
||||
index = candidate_idxs.reshape(-1)[is_pos.reshape(-1)]
|
||||
|
||||
overlaps_inf[index] = overlaps.T.reshape(-1)[index]
|
||||
overlaps_inf = overlaps_inf.reshape(num_gt, -1).T
|
||||
|
||||
max_overlaps = overlaps_inf.max(axis=1)
|
||||
argmax_overlaps = overlaps_inf.argmax(axis=1)
|
||||
|
||||
overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1)
|
||||
overlaps_inf = overlaps_inf.reshape(num_gt, -1).T
|
||||
|
||||
assigned_gt_inds[max_overlaps !=
|
||||
-np.inf] = argmax_overlaps[max_overlaps != -np.inf] + 1
|
||||
|
||||
vlr_region_iou[max_overlaps !=
|
||||
-np.inf] = max_overlaps[max_overlaps != -np.inf] + 0
|
||||
|
||||
return vlr_region_iou
|
||||
1586
paddle_detection/ppdet/data/transform/autoaugment_utils.py
Normal file
1586
paddle_detection/ppdet/data/transform/autoaugment_utils.py
Normal file
File diff suppressed because it is too large
Load Diff
1532
paddle_detection/ppdet/data/transform/batch_operators.py
Normal file
1532
paddle_detection/ppdet/data/transform/batch_operators.py
Normal file
File diff suppressed because it is too large
Load Diff
366
paddle_detection/ppdet/data/transform/culane_operators.py
Normal file
366
paddle_detection/ppdet/data/transform/culane_operators.py
Normal file
@@ -0,0 +1,366 @@
|
||||
import numpy as np
|
||||
import imgaug.augmenters as iaa
|
||||
from .operators import BaseOperator, register_op
|
||||
from ppdet.utils.logger import setup_logger
|
||||
from ppdet.data.culane_utils import linestrings_to_lanes, transform_annotation
|
||||
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
__all__ = [
|
||||
"CULaneTrainProcess", "CULaneDataProcess", "HorizontalFlip",
|
||||
"ChannelShuffle", "CULaneAffine", "CULaneResize", "OneOfBlur",
|
||||
"MultiplyAndAddToBrightness", "AddToHueAndSaturation"
|
||||
]
|
||||
|
||||
|
||||
def trainTransforms(img_h, img_w):
|
||||
transforms = [{
|
||||
'name': 'Resize',
|
||||
'parameters': dict(size=dict(
|
||||
height=img_h, width=img_w)),
|
||||
'p': 1.0
|
||||
}, {
|
||||
'name': 'HorizontalFlip',
|
||||
'parameters': dict(p=1.0),
|
||||
'p': 0.5
|
||||
}, {
|
||||
'name': 'ChannelShuffle',
|
||||
'parameters': dict(p=1.0),
|
||||
'p': 0.1
|
||||
}, {
|
||||
'name': 'MultiplyAndAddToBrightness',
|
||||
'parameters': dict(
|
||||
mul=(0.85, 1.15), add=(-10, 10)),
|
||||
'p': 0.6
|
||||
}, {
|
||||
'name': 'AddToHueAndSaturation',
|
||||
'parameters': dict(value=(-10, 10)),
|
||||
'p': 0.7
|
||||
}, {
|
||||
'name': 'OneOf',
|
||||
'transforms': [
|
||||
dict(
|
||||
name='MotionBlur', parameters=dict(k=(3, 5))), dict(
|
||||
name='MedianBlur', parameters=dict(k=(3, 5)))
|
||||
],
|
||||
'p': 0.2
|
||||
}, {
|
||||
'name': 'Affine',
|
||||
'parameters': dict(
|
||||
translate_percent=dict(
|
||||
x=(-0.1, 0.1), y=(-0.1, 0.1)),
|
||||
rotate=(-10, 10),
|
||||
scale=(0.8, 1.2)),
|
||||
'p': 0.7
|
||||
}, {
|
||||
'name': 'Resize',
|
||||
'parameters': dict(size=dict(
|
||||
height=img_h, width=img_w)),
|
||||
'p': 1.0
|
||||
}]
|
||||
return transforms
|
||||
|
||||
|
||||
@register_op
|
||||
class CULaneTrainProcess(BaseOperator):
|
||||
def __init__(self, img_w, img_h):
|
||||
super(CULaneTrainProcess, self).__init__()
|
||||
self.img_w = img_w
|
||||
self.img_h = img_h
|
||||
self.transforms = trainTransforms(self.img_h, self.img_w)
|
||||
|
||||
if self.transforms is not None:
|
||||
img_transforms = []
|
||||
for aug in self.transforms:
|
||||
p = aug['p']
|
||||
if aug['name'] != 'OneOf':
|
||||
img_transforms.append(
|
||||
iaa.Sometimes(
|
||||
p=p,
|
||||
then_list=getattr(iaa, aug['name'])(**aug[
|
||||
'parameters'])))
|
||||
else:
|
||||
img_transforms.append(
|
||||
iaa.Sometimes(
|
||||
p=p,
|
||||
then_list=iaa.OneOf([
|
||||
getattr(iaa, aug_['name'])(**aug_['parameters'])
|
||||
for aug_ in aug['transforms']
|
||||
])))
|
||||
else:
|
||||
img_transforms = []
|
||||
self.iaa_transform = iaa.Sequential(img_transforms)
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
img, line_strings, seg = self.iaa_transform(
|
||||
image=sample['image'],
|
||||
line_strings=sample['lanes'],
|
||||
segmentation_maps=sample['mask'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
sample['mask'] = seg
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class CULaneDataProcess(BaseOperator):
|
||||
def __init__(self, img_w, img_h, num_points, max_lanes):
|
||||
super(CULaneDataProcess, self).__init__()
|
||||
self.img_w = img_w
|
||||
self.img_h = img_h
|
||||
self.num_points = num_points
|
||||
self.n_offsets = num_points
|
||||
self.n_strips = num_points - 1
|
||||
self.strip_size = self.img_h / self.n_strips
|
||||
|
||||
self.max_lanes = max_lanes
|
||||
self.offsets_ys = np.arange(self.img_h, -1, -self.strip_size)
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
data = {}
|
||||
line_strings = sample['lanes']
|
||||
line_strings.clip_out_of_image_()
|
||||
new_anno = {'lanes': linestrings_to_lanes(line_strings)}
|
||||
|
||||
for i in range(30):
|
||||
try:
|
||||
annos = transform_annotation(
|
||||
self.img_w, self.img_h, self.max_lanes, self.n_offsets,
|
||||
self.offsets_ys, self.n_strips, self.strip_size, new_anno)
|
||||
label = annos['label']
|
||||
lane_endpoints = annos['lane_endpoints']
|
||||
break
|
||||
except:
|
||||
if (i + 1) == 30:
|
||||
logger.critical('Transform annotation failed 30 times :(')
|
||||
exit()
|
||||
|
||||
sample['image'] = sample['image'].astype(np.float32) / 255.
|
||||
data['image'] = sample['image'].transpose(2, 0, 1)
|
||||
data['lane_line'] = label
|
||||
data['seg'] = sample['seg']
|
||||
data['full_img_path'] = sample['full_img_path']
|
||||
data['img_name'] = sample['img_name']
|
||||
data['im_id'] = sample['im_id']
|
||||
|
||||
if 'mask' in sample.keys():
|
||||
data['seg'] = sample['mask'].get_arr()
|
||||
|
||||
data['im_shape'] = np.array([self.img_w, self.img_h], dtype=np.float32)
|
||||
data['scale_factor'] = np.array([1., 1.], dtype=np.float32)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
@register_op
|
||||
class CULaneResize(BaseOperator):
|
||||
def __init__(self, img_h, img_w, prob=0.5):
|
||||
super(CULaneResize, self).__init__()
|
||||
self.img_h = img_h
|
||||
self.img_w = img_w
|
||||
self.prob = prob
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
transform = iaa.Sometimes(self.prob,
|
||||
iaa.Resize({
|
||||
"height": self.img_h,
|
||||
"width": self.img_w
|
||||
}))
|
||||
if 'mask' in sample.keys():
|
||||
img, line_strings, seg = transform(
|
||||
image=sample['image'],
|
||||
line_strings=sample['lanes'],
|
||||
segmentation_maps=sample['mask'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
sample['mask'] = seg
|
||||
else:
|
||||
img, line_strings = transform(
|
||||
image=sample['image'].copy().astype(np.uint8),
|
||||
line_strings=sample['lanes'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class HorizontalFlip(BaseOperator):
|
||||
def __init__(self, prob=0.5):
|
||||
super(HorizontalFlip, self).__init__()
|
||||
self.prob = prob
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
transform = iaa.Sometimes(self.prob, iaa.HorizontalFlip(1.0))
|
||||
if 'mask' in sample.keys():
|
||||
img, line_strings, seg = transform(
|
||||
image=sample['image'],
|
||||
line_strings=sample['lanes'],
|
||||
segmentation_maps=sample['mask'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
sample['mask'] = seg
|
||||
else:
|
||||
img, line_strings = transform(
|
||||
image=sample['image'], line_strings=sample['lanes'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class ChannelShuffle(BaseOperator):
|
||||
def __init__(self, prob=0.1):
|
||||
super(ChannelShuffle, self).__init__()
|
||||
self.prob = prob
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
transform = iaa.Sometimes(self.prob, iaa.ChannelShuffle(1.0))
|
||||
if 'mask' in sample.keys():
|
||||
img, line_strings, seg = transform(
|
||||
image=sample['image'],
|
||||
line_strings=sample['lanes'],
|
||||
segmentation_maps=sample['mask'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
sample['mask'] = seg
|
||||
else:
|
||||
img, line_strings = transform(
|
||||
image=sample['image'], line_strings=sample['lanes'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class MultiplyAndAddToBrightness(BaseOperator):
|
||||
def __init__(self, mul=(0.85, 1.15), add=(-10, 10), prob=0.5):
|
||||
super(MultiplyAndAddToBrightness, self).__init__()
|
||||
self.mul = tuple(mul)
|
||||
self.add = tuple(add)
|
||||
self.prob = prob
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
transform = iaa.Sometimes(
|
||||
self.prob,
|
||||
iaa.MultiplyAndAddToBrightness(
|
||||
mul=self.mul, add=self.add))
|
||||
if 'mask' in sample.keys():
|
||||
img, line_strings, seg = transform(
|
||||
image=sample['image'],
|
||||
line_strings=sample['lanes'],
|
||||
segmentation_maps=sample['mask'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
sample['mask'] = seg
|
||||
else:
|
||||
img, line_strings = transform(
|
||||
image=sample['image'], line_strings=sample['lanes'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class AddToHueAndSaturation(BaseOperator):
|
||||
def __init__(self, value=(-10, 10), prob=0.5):
|
||||
super(AddToHueAndSaturation, self).__init__()
|
||||
self.value = tuple(value)
|
||||
self.prob = prob
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
transform = iaa.Sometimes(
|
||||
self.prob, iaa.AddToHueAndSaturation(value=self.value))
|
||||
if 'mask' in sample.keys():
|
||||
img, line_strings, seg = transform(
|
||||
image=sample['image'],
|
||||
line_strings=sample['lanes'],
|
||||
segmentation_maps=sample['mask'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
sample['mask'] = seg
|
||||
else:
|
||||
img, line_strings = transform(
|
||||
image=sample['image'], line_strings=sample['lanes'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class OneOfBlur(BaseOperator):
|
||||
def __init__(self, MotionBlur_k=(3, 5), MedianBlur_k=(3, 5), prob=0.5):
|
||||
super(OneOfBlur, self).__init__()
|
||||
self.MotionBlur_k = tuple(MotionBlur_k)
|
||||
self.MedianBlur_k = tuple(MedianBlur_k)
|
||||
self.prob = prob
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
transform = iaa.Sometimes(
|
||||
self.prob,
|
||||
iaa.OneOf([
|
||||
iaa.MotionBlur(k=self.MotionBlur_k),
|
||||
iaa.MedianBlur(k=self.MedianBlur_k)
|
||||
]))
|
||||
|
||||
if 'mask' in sample.keys():
|
||||
img, line_strings, seg = transform(
|
||||
image=sample['image'],
|
||||
line_strings=sample['lanes'],
|
||||
segmentation_maps=sample['mask'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
sample['mask'] = seg
|
||||
else:
|
||||
img, line_strings = transform(
|
||||
image=sample['image'], line_strings=sample['lanes'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class CULaneAffine(BaseOperator):
|
||||
def __init__(self,
|
||||
translate_percent_x=(-0.1, 0.1),
|
||||
translate_percent_y=(-0.1, 0.1),
|
||||
rotate=(3, 5),
|
||||
scale=(0.8, 1.2),
|
||||
prob=0.5):
|
||||
super(CULaneAffine, self).__init__()
|
||||
self.translate_percent = {
|
||||
'x': tuple(translate_percent_x),
|
||||
'y': tuple(translate_percent_y)
|
||||
}
|
||||
self.rotate = tuple(rotate)
|
||||
self.scale = tuple(scale)
|
||||
self.prob = prob
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
transform = iaa.Sometimes(
|
||||
self.prob,
|
||||
iaa.Affine(
|
||||
translate_percent=self.translate_percent,
|
||||
rotate=self.rotate,
|
||||
scale=self.scale))
|
||||
|
||||
if 'mask' in sample.keys():
|
||||
img, line_strings, seg = transform(
|
||||
image=sample['image'],
|
||||
line_strings=sample['lanes'],
|
||||
segmentation_maps=sample['mask'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
sample['mask'] = seg
|
||||
else:
|
||||
img, line_strings = transform(
|
||||
image=sample['image'], line_strings=sample['lanes'])
|
||||
sample['image'] = img
|
||||
sample['lanes'] = line_strings
|
||||
|
||||
return sample
|
||||
86
paddle_detection/ppdet/data/transform/gridmask_utils.py
Normal file
86
paddle_detection/ppdet/data/transform/gridmask_utils.py
Normal file
@@ -0,0 +1,86 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# The code is based on:
|
||||
# https://github.com/dvlab-research/GridMask/blob/master/detection_grid/maskrcnn_benchmark/data/transforms/grid.py
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
from __future__ import division
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class Gridmask(object):
|
||||
def __init__(self,
|
||||
use_h=True,
|
||||
use_w=True,
|
||||
rotate=1,
|
||||
offset=False,
|
||||
ratio=0.5,
|
||||
mode=1,
|
||||
prob=0.7,
|
||||
upper_iter=360000):
|
||||
super(Gridmask, self).__init__()
|
||||
self.use_h = use_h
|
||||
self.use_w = use_w
|
||||
self.rotate = rotate
|
||||
self.offset = offset
|
||||
self.ratio = ratio
|
||||
self.mode = mode
|
||||
self.prob = prob
|
||||
self.st_prob = prob
|
||||
self.upper_iter = upper_iter
|
||||
|
||||
def __call__(self, x, curr_iter):
|
||||
self.prob = self.st_prob * min(1, 1.0 * curr_iter / self.upper_iter)
|
||||
if np.random.rand() > self.prob:
|
||||
return x
|
||||
h, w, _ = x.shape
|
||||
hh = int(1.5 * h)
|
||||
ww = int(1.5 * w)
|
||||
d = np.random.randint(2, h)
|
||||
self.l = min(max(int(d * self.ratio + 0.5), 1), d - 1)
|
||||
mask = np.ones((hh, ww), np.float32)
|
||||
st_h = np.random.randint(d)
|
||||
st_w = np.random.randint(d)
|
||||
if self.use_h:
|
||||
for i in range(hh // d):
|
||||
s = d * i + st_h
|
||||
t = min(s + self.l, hh)
|
||||
mask[s:t, :] *= 0
|
||||
if self.use_w:
|
||||
for i in range(ww // d):
|
||||
s = d * i + st_w
|
||||
t = min(s + self.l, ww)
|
||||
mask[:, s:t] *= 0
|
||||
|
||||
r = np.random.randint(self.rotate)
|
||||
mask = Image.fromarray(np.uint8(mask))
|
||||
mask = mask.rotate(r)
|
||||
mask = np.asarray(mask)
|
||||
mask = mask[(hh - h) // 2:(hh - h) // 2 + h, (ww - w) // 2:(ww - w) // 2
|
||||
+ w].astype(np.float32)
|
||||
|
||||
if self.mode == 1:
|
||||
mask = 1 - mask
|
||||
mask = np.expand_dims(mask, axis=-1)
|
||||
if self.offset:
|
||||
offset = (2 * (np.random.rand(h, w) - 0.5)).astype(np.float32)
|
||||
x = (x * mask + offset * (1 - mask)).astype(x.dtype)
|
||||
else:
|
||||
x = (x * mask).astype(x.dtype)
|
||||
|
||||
return x
|
||||
1742
paddle_detection/ppdet/data/transform/keypoint_operators.py
Normal file
1742
paddle_detection/ppdet/data/transform/keypoint_operators.py
Normal file
File diff suppressed because it is too large
Load Diff
296
paddle_detection/ppdet/data/transform/keypoints_3d_operators.py
Normal file
296
paddle_detection/ppdet/data/transform/keypoints_3d_operators.py
Normal file
@@ -0,0 +1,296 @@
|
||||
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
try:
|
||||
from collections.abc import Sequence
|
||||
except Exception:
|
||||
from collections import Sequence
|
||||
import cv2
|
||||
import numpy as np
|
||||
import math
|
||||
import copy
|
||||
import random
|
||||
import uuid
|
||||
from numbers import Number, Integral
|
||||
|
||||
from ...modeling.keypoint_utils import get_affine_mat_kernel, warp_affine_joints, get_affine_transform, affine_transform, get_warp_matrix
|
||||
from ppdet.core.workspace import serializable
|
||||
from ppdet.utils.logger import setup_logger
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
registered_ops = []
|
||||
|
||||
__all__ = [
|
||||
'CropAndFlipImages', 'PermuteImages', 'RandomFlipHalfBody3DTransformImages'
|
||||
]
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
from PIL import Image, ImageDraw
|
||||
from mpl_toolkits.mplot3d import Axes3D
|
||||
|
||||
|
||||
def register_keypointop(cls):
|
||||
return serializable(cls)
|
||||
|
||||
|
||||
def register_op(cls):
|
||||
registered_ops.append(cls.__name__)
|
||||
if not hasattr(BaseOperator, cls.__name__):
|
||||
setattr(BaseOperator, cls.__name__, cls)
|
||||
else:
|
||||
raise KeyError("The {} class has been registered.".format(cls.__name__))
|
||||
return serializable(cls)
|
||||
|
||||
|
||||
class BaseOperator(object):
|
||||
def __init__(self, name=None):
|
||||
if name is None:
|
||||
name = self.__class__.__name__
|
||||
self._id = name + '_' + str(uuid.uuid4())[-6:]
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
""" Process a sample.
|
||||
Args:
|
||||
sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
|
||||
context (dict): info about this sample processing
|
||||
Returns:
|
||||
result (dict): a processed sample
|
||||
"""
|
||||
return sample
|
||||
|
||||
def __call__(self, sample, context=None):
|
||||
""" Process a sample.
|
||||
Args:
|
||||
sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx}
|
||||
context (dict): info about this sample processing
|
||||
Returns:
|
||||
result (dict): a processed sample
|
||||
"""
|
||||
if isinstance(sample, Sequence): # for batch_size
|
||||
for i in range(len(sample)):
|
||||
sample[i] = self.apply(sample[i], context)
|
||||
else:
|
||||
# image.shape changed
|
||||
sample = self.apply(sample, context)
|
||||
return sample
|
||||
|
||||
def __str__(self):
|
||||
return str(self._id)
|
||||
|
||||
|
||||
@register_keypointop
|
||||
class CropAndFlipImages(object):
|
||||
"""Crop all images"""
|
||||
|
||||
def __init__(self, crop_range, flip_pairs=None):
|
||||
super(CropAndFlipImages, self).__init__()
|
||||
self.crop_range = crop_range
|
||||
self.flip_pairs = flip_pairs
|
||||
|
||||
def __call__(self, records): # tuple
|
||||
images = records["image"]
|
||||
images = images[:, :, ::-1, :]
|
||||
images = images[:, :, self.crop_range[0]:self.crop_range[1]]
|
||||
records["image"] = images
|
||||
|
||||
if "kps2d" in records.keys():
|
||||
kps2d = records["kps2d"]
|
||||
|
||||
width, height = images.shape[2], images.shape[1]
|
||||
kps2d = np.array(kps2d)
|
||||
kps2d[:, :, 0] = kps2d[:, :, 0] - self.crop_range[0]
|
||||
|
||||
for pair in self.flip_pairs:
|
||||
kps2d[:, pair[0], :], kps2d[:,pair[1], :] = \
|
||||
kps2d[:,pair[1], :], kps2d[:,pair[0], :].copy()
|
||||
|
||||
records["kps2d"] = kps2d
|
||||
|
||||
return records
|
||||
|
||||
|
||||
@register_op
|
||||
class PermuteImages(BaseOperator):
|
||||
def __init__(self):
|
||||
"""
|
||||
Change the channel to be (batch_size, C, H, W) #(6, 3, 1080, 1920)
|
||||
"""
|
||||
super(PermuteImages, self).__init__()
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
images = sample["image"]
|
||||
images = images.transpose((0, 3, 1, 2))
|
||||
|
||||
sample["image"] = images
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_keypointop
|
||||
class RandomFlipHalfBody3DTransformImages(object):
|
||||
"""apply data augment to images and coords
|
||||
to achieve the flip, scale, rotate and half body transform effect for training image
|
||||
Args:
|
||||
trainsize (list):[w, h], Image target size
|
||||
upper_body_ids (list): The upper body joint ids
|
||||
flip_pairs (list): The left-right joints exchange order list
|
||||
pixel_std (int): The pixel std of the scale
|
||||
scale (float): The scale factor to transform the image
|
||||
rot (int): The rotate factor to transform the image
|
||||
num_joints_half_body (int): The joints threshold of the half body transform
|
||||
prob_half_body (float): The threshold of the half body transform
|
||||
flip (bool): Whether to flip the image
|
||||
Returns:
|
||||
records(dict): contain the image and coords after tranformed
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
trainsize,
|
||||
upper_body_ids,
|
||||
flip_pairs,
|
||||
pixel_std,
|
||||
scale=0.35,
|
||||
rot=40,
|
||||
num_joints_half_body=8,
|
||||
prob_half_body=0.3,
|
||||
flip=True,
|
||||
rot_prob=0.6,
|
||||
do_occlusion=False):
|
||||
super(RandomFlipHalfBody3DTransformImages, self).__init__()
|
||||
self.trainsize = trainsize
|
||||
self.upper_body_ids = upper_body_ids
|
||||
self.flip_pairs = flip_pairs
|
||||
self.pixel_std = pixel_std
|
||||
self.scale = scale
|
||||
self.rot = rot
|
||||
self.num_joints_half_body = num_joints_half_body
|
||||
self.prob_half_body = prob_half_body
|
||||
self.flip = flip
|
||||
self.aspect_ratio = trainsize[0] * 1.0 / trainsize[1]
|
||||
self.rot_prob = rot_prob
|
||||
self.do_occlusion = do_occlusion
|
||||
|
||||
def halfbody_transform(self, joints, joints_vis):
|
||||
upper_joints = []
|
||||
lower_joints = []
|
||||
for joint_id in range(joints.shape[0]):
|
||||
if joints_vis[joint_id][0] > 0:
|
||||
if joint_id in self.upper_body_ids:
|
||||
upper_joints.append(joints[joint_id])
|
||||
else:
|
||||
lower_joints.append(joints[joint_id])
|
||||
if np.random.randn() < 0.5 and len(upper_joints) > 2:
|
||||
selected_joints = upper_joints
|
||||
else:
|
||||
selected_joints = lower_joints if len(
|
||||
lower_joints) > 2 else upper_joints
|
||||
if len(selected_joints) < 2:
|
||||
return None, None
|
||||
selected_joints = np.array(selected_joints, dtype=np.float32)
|
||||
center = selected_joints.mean(axis=0)[:2]
|
||||
left_top = np.amin(selected_joints, axis=0)
|
||||
right_bottom = np.amax(selected_joints, axis=0)
|
||||
w = right_bottom[0] - left_top[0]
|
||||
h = right_bottom[1] - left_top[1]
|
||||
if w > self.aspect_ratio * h:
|
||||
h = w * 1.0 / self.aspect_ratio
|
||||
elif w < self.aspect_ratio * h:
|
||||
w = h * self.aspect_ratio
|
||||
scale = np.array(
|
||||
[w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
|
||||
dtype=np.float32)
|
||||
scale = scale * 1.5
|
||||
|
||||
return center, scale
|
||||
|
||||
def flip_joints(self, joints, joints_vis, width, matched_parts, kps2d=None):
|
||||
# joints: (6, 24, 3),(num_frames, num_joints, 3)
|
||||
|
||||
joints[:, :, 0] = width - joints[:, :, 0] - 1 # x
|
||||
if kps2d is not None:
|
||||
kps2d[:, :, 0] = width - kps2d[:, :, 0] - 1
|
||||
|
||||
for pair in matched_parts:
|
||||
joints[:, pair[0], :], joints[:,pair[1], :] = \
|
||||
joints[:,pair[1], :], joints[:,pair[0], :].copy()
|
||||
|
||||
joints_vis[:,pair[0], :], joints_vis[:,pair[1], :] = \
|
||||
joints_vis[:,pair[1], :], joints_vis[:,pair[0], :].copy()
|
||||
|
||||
if kps2d is not None:
|
||||
kps2d[:, pair[0], :], kps2d[:,pair[1], :] = \
|
||||
kps2d[:,pair[1], :], kps2d[:,pair[0], :].copy()
|
||||
|
||||
# move to zero
|
||||
joints -= joints[:, [0], :] # (batch_size, 24, 3),numpy.ndarray
|
||||
|
||||
return joints, joints_vis, kps2d
|
||||
|
||||
def __call__(self, records):
|
||||
images = records[
|
||||
'image'] #kps3d, kps3d_vis, images. images.shape(num_frames, width, height, 3)
|
||||
|
||||
joints = records['kps3d']
|
||||
joints_vis = records['kps3d_vis']
|
||||
|
||||
kps2d = None
|
||||
if 'kps2d' in records.keys():
|
||||
kps2d = records['kps2d']
|
||||
|
||||
if self.flip and np.random.random() <= 0.5:
|
||||
images = images[:, :, ::-1, :] # 图像水平翻转 (6, 1080, 810, 3)
|
||||
joints, joints_vis, kps2d = self.flip_joints(
|
||||
joints, joints_vis, images.shape[2], self.flip_pairs,
|
||||
kps2d) # 关键点左右对称翻转
|
||||
occlusion = False
|
||||
if self.do_occlusion and random.random() <= 0.5: # 随机遮挡
|
||||
height = images[0].shape[0]
|
||||
width = images[0].shape[1]
|
||||
occlusion = True
|
||||
while True:
|
||||
area_min = 0.0
|
||||
area_max = 0.2
|
||||
synth_area = (random.random() *
|
||||
(area_max - area_min) + area_min) * width * height
|
||||
|
||||
ratio_min = 0.3
|
||||
ratio_max = 1 / 0.3
|
||||
synth_ratio = (random.random() *
|
||||
(ratio_max - ratio_min) + ratio_min)
|
||||
|
||||
synth_h = math.sqrt(synth_area * synth_ratio)
|
||||
synth_w = math.sqrt(synth_area / synth_ratio)
|
||||
synth_xmin = random.random() * (width - synth_w - 1)
|
||||
synth_ymin = random.random() * (height - synth_h - 1)
|
||||
|
||||
if synth_xmin >= 0 and synth_ymin >= 0 and synth_xmin + synth_w < width and synth_ymin + synth_h < height:
|
||||
xmin = int(synth_xmin)
|
||||
ymin = int(synth_ymin)
|
||||
w = int(synth_w)
|
||||
h = int(synth_h)
|
||||
|
||||
mask = np.random.rand(h, w, 3) * 255
|
||||
images[:, ymin:ymin + h, xmin:xmin + w, :] = mask[
|
||||
None, :, :, :]
|
||||
break
|
||||
|
||||
records['image'] = images
|
||||
records['kps3d'] = joints
|
||||
records['kps3d_vis'] = joints_vis
|
||||
if kps2d is not None:
|
||||
records['kps2d'] = kps2d
|
||||
|
||||
return records
|
||||
627
paddle_detection/ppdet/data/transform/mot_operators.py
Normal file
627
paddle_detection/ppdet/data/transform/mot_operators.py
Normal file
@@ -0,0 +1,627 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
try:
|
||||
from collections.abc import Sequence
|
||||
except Exception:
|
||||
from collections import Sequence
|
||||
from numbers import Integral
|
||||
|
||||
import cv2
|
||||
import copy
|
||||
import numpy as np
|
||||
import random
|
||||
import math
|
||||
|
||||
from .operators import BaseOperator, register_op
|
||||
from .batch_operators import Gt2TTFTarget
|
||||
from ppdet.modeling.bbox_utils import bbox_iou_np_expand
|
||||
from ppdet.utils.logger import setup_logger
|
||||
from .op_helper import gaussian_radius
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
__all__ = [
|
||||
'RGBReverse', 'LetterBoxResize', 'MOTRandomAffine', 'Gt2JDETargetThres',
|
||||
'Gt2JDETargetMax', 'Gt2FairMOTTarget'
|
||||
]
|
||||
|
||||
|
||||
@register_op
|
||||
class RGBReverse(BaseOperator):
|
||||
"""RGB to BGR, or BGR to RGB, sensitive to MOTRandomAffine
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(RGBReverse, self).__init__()
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
im = sample['image']
|
||||
sample['image'] = np.ascontiguousarray(im[:, :, ::-1])
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class LetterBoxResize(BaseOperator):
|
||||
def __init__(self, target_size):
|
||||
"""
|
||||
Resize image to target size, convert normalized xywh to pixel xyxy
|
||||
format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
|
||||
Args:
|
||||
target_size (int|list): image target size.
|
||||
"""
|
||||
super(LetterBoxResize, self).__init__()
|
||||
if not isinstance(target_size, (Integral, Sequence)):
|
||||
raise TypeError(
|
||||
"Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
|
||||
format(type(target_size)))
|
||||
if isinstance(target_size, Integral):
|
||||
target_size = [target_size, target_size]
|
||||
self.target_size = target_size
|
||||
|
||||
def apply_image(self, img, height, width, color=(127.5, 127.5, 127.5)):
|
||||
# letterbox: resize a rectangular image to a padded rectangular
|
||||
shape = img.shape[:2] # [height, width]
|
||||
ratio_h = float(height) / shape[0]
|
||||
ratio_w = float(width) / shape[1]
|
||||
ratio = min(ratio_h, ratio_w)
|
||||
new_shape = (round(shape[1] * ratio),
|
||||
round(shape[0] * ratio)) # [width, height]
|
||||
padw = (width - new_shape[0]) / 2
|
||||
padh = (height - new_shape[1]) / 2
|
||||
top, bottom = round(padh - 0.1), round(padh + 0.1)
|
||||
left, right = round(padw - 0.1), round(padw + 0.1)
|
||||
|
||||
img = cv2.resize(
|
||||
img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
|
||||
img = cv2.copyMakeBorder(
|
||||
img, top, bottom, left, right, cv2.BORDER_CONSTANT,
|
||||
value=color) # padded rectangular
|
||||
return img, ratio, padw, padh
|
||||
|
||||
def apply_bbox(self, bbox0, h, w, ratio, padw, padh):
|
||||
bboxes = bbox0.copy()
|
||||
bboxes[:, 0] = ratio * w * (bbox0[:, 0] - bbox0[:, 2] / 2) + padw
|
||||
bboxes[:, 1] = ratio * h * (bbox0[:, 1] - bbox0[:, 3] / 2) + padh
|
||||
bboxes[:, 2] = ratio * w * (bbox0[:, 0] + bbox0[:, 2] / 2) + padw
|
||||
bboxes[:, 3] = ratio * h * (bbox0[:, 1] + bbox0[:, 3] / 2) + padh
|
||||
return bboxes
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
""" Resize the image numpy.
|
||||
"""
|
||||
im = sample['image']
|
||||
h, w = sample['im_shape']
|
||||
if not isinstance(im, np.ndarray):
|
||||
raise TypeError("{}: image type is not numpy.".format(self))
|
||||
if len(im.shape) != 3:
|
||||
from PIL import UnidentifiedImageError
|
||||
raise UnidentifiedImageError(
|
||||
'{}: image is not 3-dimensional.'.format(self))
|
||||
|
||||
# apply image
|
||||
height, width = self.target_size
|
||||
img, ratio, padw, padh = self.apply_image(
|
||||
im, height=height, width=width)
|
||||
|
||||
sample['image'] = img
|
||||
new_shape = (round(h * ratio), round(w * ratio))
|
||||
sample['im_shape'] = np.asarray(new_shape, dtype=np.float32)
|
||||
sample['scale_factor'] = np.asarray([ratio, ratio], dtype=np.float32)
|
||||
|
||||
# apply bbox
|
||||
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
|
||||
sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], h, w, ratio,
|
||||
padw, padh)
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class MOTRandomAffine(BaseOperator):
|
||||
"""
|
||||
Affine transform to image and coords to achieve the rotate, scale and
|
||||
shift effect for training image.
|
||||
|
||||
Args:
|
||||
degrees (list[2]): the rotate range to apply, transform range is [min, max]
|
||||
translate (list[2]): the translate range to apply, transform range is [min, max]
|
||||
scale (list[2]): the scale range to apply, transform range is [min, max]
|
||||
shear (list[2]): the shear range to apply, transform range is [min, max]
|
||||
borderValue (list[3]): value used in case of a constant border when appling
|
||||
the perspective transformation
|
||||
reject_outside (bool): reject warped bounding bboxes outside of image
|
||||
|
||||
Returns:
|
||||
records(dict): contain the image and coords after tranformed
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
degrees=(-5, 5),
|
||||
translate=(0.10, 0.10),
|
||||
scale=(0.50, 1.20),
|
||||
shear=(-2, 2),
|
||||
borderValue=(127.5, 127.5, 127.5),
|
||||
reject_outside=True):
|
||||
super(MOTRandomAffine, self).__init__()
|
||||
self.degrees = degrees
|
||||
self.translate = translate
|
||||
self.scale = scale
|
||||
self.shear = shear
|
||||
self.borderValue = borderValue
|
||||
self.reject_outside = reject_outside
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
|
||||
border = 0 # width of added border (optional)
|
||||
|
||||
img = sample['image']
|
||||
height, width = img.shape[0], img.shape[1]
|
||||
|
||||
# Rotation and Scale
|
||||
R = np.eye(3)
|
||||
a = random.random() * (self.degrees[1] - self.degrees[0]
|
||||
) + self.degrees[0]
|
||||
s = random.random() * (self.scale[1] - self.scale[0]) + self.scale[0]
|
||||
R[:2] = cv2.getRotationMatrix2D(
|
||||
angle=a, center=(width / 2, height / 2), scale=s)
|
||||
|
||||
# Translation
|
||||
T = np.eye(3)
|
||||
T[0, 2] = (
|
||||
random.random() * 2 - 1
|
||||
) * self.translate[0] * height + border # x translation (pixels)
|
||||
T[1, 2] = (
|
||||
random.random() * 2 - 1
|
||||
) * self.translate[1] * width + border # y translation (pixels)
|
||||
|
||||
# Shear
|
||||
S = np.eye(3)
|
||||
S[0, 1] = math.tan((random.random() *
|
||||
(self.shear[1] - self.shear[0]) + self.shear[0]) *
|
||||
math.pi / 180) # x shear (deg)
|
||||
S[1, 0] = math.tan((random.random() *
|
||||
(self.shear[1] - self.shear[0]) + self.shear[0]) *
|
||||
math.pi / 180) # y shear (deg)
|
||||
|
||||
M = S @T @R # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
|
||||
imw = cv2.warpPerspective(
|
||||
img,
|
||||
M,
|
||||
dsize=(width, height),
|
||||
flags=cv2.INTER_LINEAR,
|
||||
borderValue=self.borderValue) # BGR order borderValue
|
||||
|
||||
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
|
||||
targets = sample['gt_bbox']
|
||||
n = targets.shape[0]
|
||||
points = targets.copy()
|
||||
area0 = (points[:, 2] - points[:, 0]) * (
|
||||
points[:, 3] - points[:, 1])
|
||||
|
||||
# warp points
|
||||
xy = np.ones((n * 4, 3))
|
||||
xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
|
||||
n * 4, 2) # x1y1, x2y2, x1y2, x2y1
|
||||
xy = (xy @M.T)[:, :2].reshape(n, 8)
|
||||
|
||||
# create new boxes
|
||||
x = xy[:, [0, 2, 4, 6]]
|
||||
y = xy[:, [1, 3, 5, 7]]
|
||||
xy = np.concatenate(
|
||||
(x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
|
||||
|
||||
# apply angle-based reduction
|
||||
radians = a * math.pi / 180
|
||||
reduction = max(abs(math.sin(radians)), abs(math.cos(radians)))**0.5
|
||||
x = (xy[:, 2] + xy[:, 0]) / 2
|
||||
y = (xy[:, 3] + xy[:, 1]) / 2
|
||||
w = (xy[:, 2] - xy[:, 0]) * reduction
|
||||
h = (xy[:, 3] - xy[:, 1]) * reduction
|
||||
xy = np.concatenate(
|
||||
(x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
|
||||
|
||||
# reject warped points outside of image
|
||||
if self.reject_outside:
|
||||
np.clip(xy[:, 0], 0, width, out=xy[:, 0])
|
||||
np.clip(xy[:, 2], 0, width, out=xy[:, 2])
|
||||
np.clip(xy[:, 1], 0, height, out=xy[:, 1])
|
||||
np.clip(xy[:, 3], 0, height, out=xy[:, 3])
|
||||
w = xy[:, 2] - xy[:, 0]
|
||||
h = xy[:, 3] - xy[:, 1]
|
||||
area = w * h
|
||||
ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
|
||||
i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
|
||||
|
||||
if sum(i) > 0:
|
||||
sample['gt_bbox'] = xy[i].astype(sample['gt_bbox'].dtype)
|
||||
sample['gt_class'] = sample['gt_class'][i]
|
||||
if 'difficult' in sample:
|
||||
sample['difficult'] = sample['difficult'][i]
|
||||
if 'gt_ide' in sample:
|
||||
sample['gt_ide'] = sample['gt_ide'][i]
|
||||
if 'is_crowd' in sample:
|
||||
sample['is_crowd'] = sample['is_crowd'][i]
|
||||
sample['image'] = imw
|
||||
return sample
|
||||
else:
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class Gt2JDETargetThres(BaseOperator):
|
||||
__shared__ = ['num_classes']
|
||||
"""
|
||||
Generate JDE targets by groud truth data when training
|
||||
Args:
|
||||
anchors (list): anchors of JDE model
|
||||
anchor_masks (list): anchor_masks of JDE model
|
||||
downsample_ratios (list): downsample ratios of JDE model
|
||||
ide_thresh (float): thresh of identity, higher is groud truth
|
||||
fg_thresh (float): thresh of foreground, higher is foreground
|
||||
bg_thresh (float): thresh of background, lower is background
|
||||
num_classes (int): number of classes
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
anchors,
|
||||
anchor_masks,
|
||||
downsample_ratios,
|
||||
ide_thresh=0.5,
|
||||
fg_thresh=0.5,
|
||||
bg_thresh=0.4,
|
||||
num_classes=1):
|
||||
super(Gt2JDETargetThres, self).__init__()
|
||||
self.anchors = anchors
|
||||
self.anchor_masks = anchor_masks
|
||||
self.downsample_ratios = downsample_ratios
|
||||
self.ide_thresh = ide_thresh
|
||||
self.fg_thresh = fg_thresh
|
||||
self.bg_thresh = bg_thresh
|
||||
self.num_classes = num_classes
|
||||
|
||||
def generate_anchor(self, nGh, nGw, anchor_hw):
|
||||
nA = len(anchor_hw)
|
||||
yy, xx = np.meshgrid(np.arange(nGh), np.arange(nGw))
|
||||
|
||||
mesh = np.stack([xx.T, yy.T], axis=0) # [2, nGh, nGw]
|
||||
mesh = np.repeat(mesh[None, :], nA, axis=0) # [nA, 2, nGh, nGw]
|
||||
|
||||
anchor_offset_mesh = anchor_hw[:, :, None][:, :, :, None]
|
||||
anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGh, axis=-2)
|
||||
anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGw, axis=-1)
|
||||
|
||||
anchor_mesh = np.concatenate(
|
||||
[mesh, anchor_offset_mesh], axis=1) # [nA, 4, nGh, nGw]
|
||||
return anchor_mesh
|
||||
|
||||
def encode_delta(self, gt_box_list, fg_anchor_list):
|
||||
px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \
|
||||
fg_anchor_list[:, 2], fg_anchor_list[:,3]
|
||||
gx, gy, gw, gh = gt_box_list[:, 0], gt_box_list[:, 1], \
|
||||
gt_box_list[:, 2], gt_box_list[:, 3]
|
||||
dx = (gx - px) / pw
|
||||
dy = (gy - py) / ph
|
||||
dw = np.log(gw / pw)
|
||||
dh = np.log(gh / ph)
|
||||
return np.stack([dx, dy, dw, dh], axis=1)
|
||||
|
||||
def pad_box(self, sample, num_max):
|
||||
assert 'gt_bbox' in sample
|
||||
bbox = sample['gt_bbox']
|
||||
gt_num = len(bbox)
|
||||
pad_bbox = np.zeros((num_max, 4), dtype=np.float32)
|
||||
if gt_num > 0:
|
||||
pad_bbox[:gt_num, :] = bbox[:gt_num, :]
|
||||
sample['gt_bbox'] = pad_bbox
|
||||
if 'gt_score' in sample:
|
||||
pad_score = np.zeros((num_max, ), dtype=np.float32)
|
||||
if gt_num > 0:
|
||||
pad_score[:gt_num] = sample['gt_score'][:gt_num, 0]
|
||||
sample['gt_score'] = pad_score
|
||||
if 'difficult' in sample:
|
||||
pad_diff = np.zeros((num_max, ), dtype=np.int32)
|
||||
if gt_num > 0:
|
||||
pad_diff[:gt_num] = sample['difficult'][:gt_num, 0]
|
||||
sample['difficult'] = pad_diff
|
||||
if 'is_crowd' in sample:
|
||||
pad_crowd = np.zeros((num_max, ), dtype=np.int32)
|
||||
if gt_num > 0:
|
||||
pad_crowd[:gt_num] = sample['is_crowd'][:gt_num, 0]
|
||||
sample['is_crowd'] = pad_crowd
|
||||
if 'gt_ide' in sample:
|
||||
pad_ide = np.zeros((num_max, ), dtype=np.int32)
|
||||
if gt_num > 0:
|
||||
pad_ide[:gt_num] = sample['gt_ide'][:gt_num, 0]
|
||||
sample['gt_ide'] = pad_ide
|
||||
return sample
|
||||
|
||||
def __call__(self, samples, context=None):
|
||||
assert len(self.anchor_masks) == len(self.downsample_ratios), \
|
||||
"anchor_masks', and 'downsample_ratios' should have same length."
|
||||
h, w = samples[0]['image'].shape[1:3]
|
||||
|
||||
num_max = 0
|
||||
for sample in samples:
|
||||
num_max = max(num_max, len(sample['gt_bbox']))
|
||||
|
||||
for sample in samples:
|
||||
gt_bbox = sample['gt_bbox']
|
||||
gt_ide = sample['gt_ide']
|
||||
for i, (anchor_hw, downsample_ratio
|
||||
) in enumerate(zip(self.anchors, self.downsample_ratios)):
|
||||
anchor_hw = np.array(
|
||||
anchor_hw, dtype=np.float32) / downsample_ratio
|
||||
nA = len(anchor_hw)
|
||||
nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio)
|
||||
tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32)
|
||||
tconf = np.zeros((nA, nGh, nGw), dtype=np.float32)
|
||||
tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32)
|
||||
|
||||
gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy()
|
||||
gxy[:, 0] = gxy[:, 0] * nGw
|
||||
gxy[:, 1] = gxy[:, 1] * nGh
|
||||
gwh[:, 0] = gwh[:, 0] * nGw
|
||||
gwh[:, 1] = gwh[:, 1] * nGh
|
||||
gxy[:, 0] = np.clip(gxy[:, 0], 0, nGw - 1)
|
||||
gxy[:, 1] = np.clip(gxy[:, 1], 0, nGh - 1)
|
||||
tboxes = np.concatenate([gxy, gwh], axis=1)
|
||||
|
||||
anchor_mesh = self.generate_anchor(nGh, nGw, anchor_hw)
|
||||
|
||||
anchor_list = np.transpose(anchor_mesh,
|
||||
(0, 2, 3, 1)).reshape(-1, 4)
|
||||
iou_pdist = bbox_iou_np_expand(
|
||||
anchor_list, tboxes, x1y1x2y2=False)
|
||||
|
||||
iou_max = np.max(iou_pdist, axis=1)
|
||||
max_gt_index = np.argmax(iou_pdist, axis=1)
|
||||
|
||||
iou_map = iou_max.reshape(nA, nGh, nGw)
|
||||
gt_index_map = max_gt_index.reshape(nA, nGh, nGw)
|
||||
|
||||
id_index = iou_map > self.ide_thresh
|
||||
fg_index = iou_map > self.fg_thresh
|
||||
bg_index = iou_map < self.bg_thresh
|
||||
ign_index = (iou_map < self.fg_thresh) * (
|
||||
iou_map > self.bg_thresh)
|
||||
tconf[fg_index] = 1
|
||||
tconf[bg_index] = 0
|
||||
tconf[ign_index] = -1
|
||||
|
||||
gt_index = gt_index_map[fg_index]
|
||||
gt_box_list = tboxes[gt_index]
|
||||
gt_id_list = gt_ide[gt_index_map[id_index]]
|
||||
|
||||
if np.sum(fg_index) > 0:
|
||||
tid[id_index] = gt_id_list
|
||||
|
||||
fg_anchor_list = anchor_list.reshape(nA, nGh, nGw,
|
||||
4)[fg_index]
|
||||
delta_target = self.encode_delta(gt_box_list,
|
||||
fg_anchor_list)
|
||||
tbox[fg_index] = delta_target
|
||||
|
||||
sample['tbox{}'.format(i)] = tbox
|
||||
sample['tconf{}'.format(i)] = tconf
|
||||
sample['tide{}'.format(i)] = tid
|
||||
sample.pop('gt_class')
|
||||
sample = self.pad_box(sample, num_max)
|
||||
return samples
|
||||
|
||||
|
||||
@register_op
|
||||
class Gt2JDETargetMax(BaseOperator):
|
||||
__shared__ = ['num_classes']
|
||||
"""
|
||||
Generate JDE targets by groud truth data when evaluating
|
||||
Args:
|
||||
anchors (list): anchors of JDE model
|
||||
anchor_masks (list): anchor_masks of JDE model
|
||||
downsample_ratios (list): downsample ratios of JDE model
|
||||
max_iou_thresh (float): iou thresh for high quality anchor
|
||||
num_classes (int): number of classes
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
anchors,
|
||||
anchor_masks,
|
||||
downsample_ratios,
|
||||
max_iou_thresh=0.60,
|
||||
num_classes=1):
|
||||
super(Gt2JDETargetMax, self).__init__()
|
||||
self.anchors = anchors
|
||||
self.anchor_masks = anchor_masks
|
||||
self.downsample_ratios = downsample_ratios
|
||||
self.max_iou_thresh = max_iou_thresh
|
||||
self.num_classes = num_classes
|
||||
|
||||
def __call__(self, samples, context=None):
|
||||
assert len(self.anchor_masks) == len(self.downsample_ratios), \
|
||||
"anchor_masks', and 'downsample_ratios' should have same length."
|
||||
h, w = samples[0]['image'].shape[1:3]
|
||||
for sample in samples:
|
||||
gt_bbox = sample['gt_bbox']
|
||||
gt_ide = sample['gt_ide']
|
||||
for i, (anchor_hw, downsample_ratio
|
||||
) in enumerate(zip(self.anchors, self.downsample_ratios)):
|
||||
anchor_hw = np.array(
|
||||
anchor_hw, dtype=np.float32) / downsample_ratio
|
||||
nA = len(anchor_hw)
|
||||
nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio)
|
||||
tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32)
|
||||
tconf = np.zeros((nA, nGh, nGw), dtype=np.float32)
|
||||
tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32)
|
||||
|
||||
gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy()
|
||||
gxy[:, 0] = gxy[:, 0] * nGw
|
||||
gxy[:, 1] = gxy[:, 1] * nGh
|
||||
gwh[:, 0] = gwh[:, 0] * nGw
|
||||
gwh[:, 1] = gwh[:, 1] * nGh
|
||||
gi = np.clip(gxy[:, 0], 0, nGw - 1).astype(int)
|
||||
gj = np.clip(gxy[:, 1], 0, nGh - 1).astype(int)
|
||||
|
||||
# iou of targets-anchors (using wh only)
|
||||
box1 = gwh
|
||||
box2 = anchor_hw[:, None, :]
|
||||
inter_area = np.minimum(box1, box2).prod(2)
|
||||
iou = inter_area / (
|
||||
box1.prod(1) + box2.prod(2) - inter_area + 1e-16)
|
||||
|
||||
# Select best iou_pred and anchor
|
||||
iou_best = iou.max(0) # best anchor [0-2] for each target
|
||||
a = np.argmax(iou, axis=0)
|
||||
|
||||
# Select best unique target-anchor combinations
|
||||
iou_order = np.argsort(-iou_best) # best to worst
|
||||
|
||||
# Unique anchor selection
|
||||
u = np.stack((gi, gj, a), 0)[:, iou_order]
|
||||
_, first_unique = np.unique(u, axis=1, return_index=True)
|
||||
mask = iou_order[first_unique]
|
||||
# best anchor must share significant commonality (iou) with target
|
||||
# TODO: examine arbitrary threshold
|
||||
idx = mask[iou_best[mask] > self.max_iou_thresh]
|
||||
|
||||
if len(idx) > 0:
|
||||
a_i, gj_i, gi_i = a[idx], gj[idx], gi[idx]
|
||||
t_box = gt_bbox[idx]
|
||||
t_id = gt_ide[idx]
|
||||
if len(t_box.shape) == 1:
|
||||
t_box = t_box.reshape(1, 4)
|
||||
|
||||
gxy, gwh = t_box[:, 0:2].copy(), t_box[:, 2:4].copy()
|
||||
gxy[:, 0] = gxy[:, 0] * nGw
|
||||
gxy[:, 1] = gxy[:, 1] * nGh
|
||||
gwh[:, 0] = gwh[:, 0] * nGw
|
||||
gwh[:, 1] = gwh[:, 1] * nGh
|
||||
|
||||
# XY coordinates
|
||||
tbox[:, :, :, 0:2][a_i, gj_i, gi_i] = gxy - gxy.astype(int)
|
||||
# Width and height in yolo method
|
||||
tbox[:, :, :, 2:4][a_i, gj_i, gi_i] = np.log(gwh /
|
||||
anchor_hw[a_i])
|
||||
tconf[a_i, gj_i, gi_i] = 1
|
||||
tid[a_i, gj_i, gi_i] = t_id
|
||||
|
||||
sample['tbox{}'.format(i)] = tbox
|
||||
sample['tconf{}'.format(i)] = tconf
|
||||
sample['tide{}'.format(i)] = tid
|
||||
|
||||
|
||||
class Gt2FairMOTTarget(Gt2TTFTarget):
|
||||
__shared__ = ['num_classes']
|
||||
"""
|
||||
Generate FairMOT targets by ground truth data.
|
||||
Difference between Gt2FairMOTTarget and Gt2TTFTarget are:
|
||||
1. the gaussian kernal radius to generate a heatmap.
|
||||
2. the targets needed during training.
|
||||
|
||||
Args:
|
||||
num_classes(int): the number of classes.
|
||||
down_ratio(int): the down ratio from images to heatmap, 4 by default.
|
||||
max_objs(int): the maximum number of ground truth objects in a image, 500 by default.
|
||||
"""
|
||||
|
||||
def __init__(self, num_classes=1, down_ratio=4, max_objs=500):
|
||||
super(Gt2TTFTarget, self).__init__()
|
||||
self.down_ratio = down_ratio
|
||||
self.num_classes = num_classes
|
||||
self.max_objs = max_objs
|
||||
|
||||
def __call__(self, samples, context=None):
|
||||
for b_id, sample in enumerate(samples):
|
||||
output_h = sample['image'].shape[1] // self.down_ratio
|
||||
output_w = sample['image'].shape[2] // self.down_ratio
|
||||
|
||||
heatmap = np.zeros(
|
||||
(self.num_classes, output_h, output_w), dtype='float32')
|
||||
bbox_size = np.zeros((self.max_objs, 4), dtype=np.float32)
|
||||
center_offset = np.zeros((self.max_objs, 2), dtype=np.float32)
|
||||
index = np.zeros((self.max_objs, ), dtype=np.int64)
|
||||
index_mask = np.zeros((self.max_objs, ), dtype=np.int32)
|
||||
reid = np.zeros((self.max_objs, ), dtype=np.int64)
|
||||
bbox_xys = np.zeros((self.max_objs, 4), dtype=np.float32)
|
||||
if self.num_classes > 1:
|
||||
# each category corresponds to a set of track ids
|
||||
cls_tr_ids = np.zeros(
|
||||
(self.num_classes, output_h, output_w), dtype=np.int64)
|
||||
cls_id_map = np.full((output_h, output_w), -1, dtype=np.int64)
|
||||
|
||||
gt_bbox = sample['gt_bbox']
|
||||
gt_class = sample['gt_class']
|
||||
gt_ide = sample['gt_ide']
|
||||
|
||||
for k in range(len(gt_bbox)):
|
||||
cls_id = gt_class[k][0]
|
||||
bbox = gt_bbox[k]
|
||||
ide = gt_ide[k][0]
|
||||
bbox[[0, 2]] = bbox[[0, 2]] * output_w
|
||||
bbox[[1, 3]] = bbox[[1, 3]] * output_h
|
||||
bbox_amodal = copy.deepcopy(bbox)
|
||||
bbox_amodal[0] = bbox_amodal[0] - bbox_amodal[2] / 2.
|
||||
bbox_amodal[1] = bbox_amodal[1] - bbox_amodal[3] / 2.
|
||||
bbox_amodal[2] = bbox_amodal[0] + bbox_amodal[2]
|
||||
bbox_amodal[3] = bbox_amodal[1] + bbox_amodal[3]
|
||||
bbox[0] = np.clip(bbox[0], 0, output_w - 1)
|
||||
bbox[1] = np.clip(bbox[1], 0, output_h - 1)
|
||||
h = bbox[3]
|
||||
w = bbox[2]
|
||||
|
||||
bbox_xy = copy.deepcopy(bbox)
|
||||
bbox_xy[0] = bbox_xy[0] - bbox_xy[2] / 2
|
||||
bbox_xy[1] = bbox_xy[1] - bbox_xy[3] / 2
|
||||
bbox_xy[2] = bbox_xy[0] + bbox_xy[2]
|
||||
bbox_xy[3] = bbox_xy[1] + bbox_xy[3]
|
||||
|
||||
if h > 0 and w > 0:
|
||||
radius = gaussian_radius((math.ceil(h), math.ceil(w)), 0.7)
|
||||
radius = max(0, int(radius))
|
||||
ct = np.array([bbox[0], bbox[1]], dtype=np.float32)
|
||||
ct_int = ct.astype(np.int32)
|
||||
self.draw_truncate_gaussian(heatmap[cls_id], ct_int, radius,
|
||||
radius)
|
||||
bbox_size[k] = ct[0] - bbox_amodal[0], ct[1] - bbox_amodal[1], \
|
||||
bbox_amodal[2] - ct[0], bbox_amodal[3] - ct[1]
|
||||
|
||||
index[k] = ct_int[1] * output_w + ct_int[0]
|
||||
center_offset[k] = ct - ct_int
|
||||
index_mask[k] = 1
|
||||
reid[k] = ide
|
||||
bbox_xys[k] = bbox_xy
|
||||
if self.num_classes > 1:
|
||||
cls_id_map[ct_int[1], ct_int[0]] = cls_id
|
||||
cls_tr_ids[cls_id][ct_int[1]][ct_int[0]] = ide - 1
|
||||
# track id start from 0
|
||||
|
||||
sample['heatmap'] = heatmap
|
||||
sample['index'] = index
|
||||
sample['offset'] = center_offset
|
||||
sample['size'] = bbox_size
|
||||
sample['index_mask'] = index_mask
|
||||
sample['reid'] = reid
|
||||
if self.num_classes > 1:
|
||||
sample['cls_id_map'] = cls_id_map
|
||||
sample['cls_tr_ids'] = cls_tr_ids
|
||||
sample['bbox_xys'] = bbox_xys
|
||||
sample.pop('is_crowd', None)
|
||||
sample.pop('difficult', None)
|
||||
sample.pop('gt_class', None)
|
||||
sample.pop('gt_bbox', None)
|
||||
sample.pop('gt_score', None)
|
||||
sample.pop('gt_ide', None)
|
||||
return samples
|
||||
494
paddle_detection/ppdet/data/transform/op_helper.py
Normal file
494
paddle_detection/ppdet/data/transform/op_helper.py
Normal file
@@ -0,0 +1,494 @@
|
||||
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# this file contains helper methods for BBOX processing
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import random
|
||||
import math
|
||||
import cv2
|
||||
|
||||
|
||||
def meet_emit_constraint(src_bbox, sample_bbox):
|
||||
center_x = (src_bbox[2] + src_bbox[0]) / 2
|
||||
center_y = (src_bbox[3] + src_bbox[1]) / 2
|
||||
if center_x >= sample_bbox[0] and \
|
||||
center_x <= sample_bbox[2] and \
|
||||
center_y >= sample_bbox[1] and \
|
||||
center_y <= sample_bbox[3]:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def clip_bbox(src_bbox):
|
||||
src_bbox[0] = max(min(src_bbox[0], 1.0), 0.0)
|
||||
src_bbox[1] = max(min(src_bbox[1], 1.0), 0.0)
|
||||
src_bbox[2] = max(min(src_bbox[2], 1.0), 0.0)
|
||||
src_bbox[3] = max(min(src_bbox[3], 1.0), 0.0)
|
||||
return src_bbox
|
||||
|
||||
|
||||
def bbox_area(src_bbox):
|
||||
if src_bbox[2] < src_bbox[0] or src_bbox[3] < src_bbox[1]:
|
||||
return 0.
|
||||
else:
|
||||
width = src_bbox[2] - src_bbox[0]
|
||||
height = src_bbox[3] - src_bbox[1]
|
||||
return width * height
|
||||
|
||||
|
||||
def is_overlap(object_bbox, sample_bbox):
|
||||
if object_bbox[0] >= sample_bbox[2] or \
|
||||
object_bbox[2] <= sample_bbox[0] or \
|
||||
object_bbox[1] >= sample_bbox[3] or \
|
||||
object_bbox[3] <= sample_bbox[1]:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def filter_and_process(sample_bbox, bboxes, labels, scores=None,
|
||||
keypoints=None):
|
||||
new_bboxes = []
|
||||
new_labels = []
|
||||
new_scores = []
|
||||
new_keypoints = []
|
||||
new_kp_ignore = []
|
||||
for i in range(len(bboxes)):
|
||||
new_bbox = [0, 0, 0, 0]
|
||||
obj_bbox = [bboxes[i][0], bboxes[i][1], bboxes[i][2], bboxes[i][3]]
|
||||
if not meet_emit_constraint(obj_bbox, sample_bbox):
|
||||
continue
|
||||
if not is_overlap(obj_bbox, sample_bbox):
|
||||
continue
|
||||
sample_width = sample_bbox[2] - sample_bbox[0]
|
||||
sample_height = sample_bbox[3] - sample_bbox[1]
|
||||
new_bbox[0] = (obj_bbox[0] - sample_bbox[0]) / sample_width
|
||||
new_bbox[1] = (obj_bbox[1] - sample_bbox[1]) / sample_height
|
||||
new_bbox[2] = (obj_bbox[2] - sample_bbox[0]) / sample_width
|
||||
new_bbox[3] = (obj_bbox[3] - sample_bbox[1]) / sample_height
|
||||
new_bbox = clip_bbox(new_bbox)
|
||||
if bbox_area(new_bbox) > 0:
|
||||
new_bboxes.append(new_bbox)
|
||||
new_labels.append([labels[i][0]])
|
||||
if scores is not None:
|
||||
new_scores.append([scores[i][0]])
|
||||
if keypoints is not None:
|
||||
sample_keypoint = keypoints[0][i]
|
||||
for j in range(len(sample_keypoint)):
|
||||
kp_len = sample_height if j % 2 else sample_width
|
||||
sample_coord = sample_bbox[1] if j % 2 else sample_bbox[0]
|
||||
sample_keypoint[j] = (
|
||||
sample_keypoint[j] - sample_coord) / kp_len
|
||||
sample_keypoint[j] = max(min(sample_keypoint[j], 1.0), 0.0)
|
||||
new_keypoints.append(sample_keypoint)
|
||||
new_kp_ignore.append(keypoints[1][i])
|
||||
|
||||
bboxes = np.array(new_bboxes)
|
||||
labels = np.array(new_labels)
|
||||
scores = np.array(new_scores)
|
||||
if keypoints is not None:
|
||||
keypoints = np.array(new_keypoints)
|
||||
new_kp_ignore = np.array(new_kp_ignore)
|
||||
return bboxes, labels, scores, (keypoints, new_kp_ignore)
|
||||
return bboxes, labels, scores
|
||||
|
||||
|
||||
def bbox_area_sampling(bboxes, labels, scores, target_size, min_size):
|
||||
new_bboxes = []
|
||||
new_labels = []
|
||||
new_scores = []
|
||||
for i, bbox in enumerate(bboxes):
|
||||
w = float((bbox[2] - bbox[0]) * target_size)
|
||||
h = float((bbox[3] - bbox[1]) * target_size)
|
||||
if w * h < float(min_size * min_size):
|
||||
continue
|
||||
else:
|
||||
new_bboxes.append(bbox)
|
||||
new_labels.append(labels[i])
|
||||
if scores is not None and scores.size != 0:
|
||||
new_scores.append(scores[i])
|
||||
bboxes = np.array(new_bboxes)
|
||||
labels = np.array(new_labels)
|
||||
scores = np.array(new_scores)
|
||||
return bboxes, labels, scores
|
||||
|
||||
|
||||
def generate_sample_bbox(sampler):
|
||||
scale = np.random.uniform(sampler[2], sampler[3])
|
||||
aspect_ratio = np.random.uniform(sampler[4], sampler[5])
|
||||
aspect_ratio = max(aspect_ratio, (scale**2.0))
|
||||
aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))
|
||||
bbox_width = scale * (aspect_ratio**0.5)
|
||||
bbox_height = scale / (aspect_ratio**0.5)
|
||||
xmin_bound = 1 - bbox_width
|
||||
ymin_bound = 1 - bbox_height
|
||||
xmin = np.random.uniform(0, xmin_bound)
|
||||
ymin = np.random.uniform(0, ymin_bound)
|
||||
xmax = xmin + bbox_width
|
||||
ymax = ymin + bbox_height
|
||||
sampled_bbox = [xmin, ymin, xmax, ymax]
|
||||
return sampled_bbox
|
||||
|
||||
|
||||
def generate_sample_bbox_square(sampler, image_width, image_height):
|
||||
scale = np.random.uniform(sampler[2], sampler[3])
|
||||
aspect_ratio = np.random.uniform(sampler[4], sampler[5])
|
||||
aspect_ratio = max(aspect_ratio, (scale**2.0))
|
||||
aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))
|
||||
bbox_width = scale * (aspect_ratio**0.5)
|
||||
bbox_height = scale / (aspect_ratio**0.5)
|
||||
if image_height < image_width:
|
||||
bbox_width = bbox_height * image_height / image_width
|
||||
else:
|
||||
bbox_height = bbox_width * image_width / image_height
|
||||
xmin_bound = 1 - bbox_width
|
||||
ymin_bound = 1 - bbox_height
|
||||
xmin = np.random.uniform(0, xmin_bound)
|
||||
ymin = np.random.uniform(0, ymin_bound)
|
||||
xmax = xmin + bbox_width
|
||||
ymax = ymin + bbox_height
|
||||
sampled_bbox = [xmin, ymin, xmax, ymax]
|
||||
return sampled_bbox
|
||||
|
||||
|
||||
def data_anchor_sampling(bbox_labels, image_width, image_height, scale_array,
|
||||
resize_width):
|
||||
num_gt = len(bbox_labels)
|
||||
# np.random.randint range: [low, high)
|
||||
rand_idx = np.random.randint(0, num_gt) if num_gt != 0 else 0
|
||||
|
||||
if num_gt != 0:
|
||||
norm_xmin = bbox_labels[rand_idx][0]
|
||||
norm_ymin = bbox_labels[rand_idx][1]
|
||||
norm_xmax = bbox_labels[rand_idx][2]
|
||||
norm_ymax = bbox_labels[rand_idx][3]
|
||||
|
||||
xmin = norm_xmin * image_width
|
||||
ymin = norm_ymin * image_height
|
||||
wid = image_width * (norm_xmax - norm_xmin)
|
||||
hei = image_height * (norm_ymax - norm_ymin)
|
||||
range_size = 0
|
||||
|
||||
area = wid * hei
|
||||
for scale_ind in range(0, len(scale_array) - 1):
|
||||
if area > scale_array[scale_ind] ** 2 and area < \
|
||||
scale_array[scale_ind + 1] ** 2:
|
||||
range_size = scale_ind + 1
|
||||
break
|
||||
|
||||
if area > scale_array[len(scale_array) - 2]**2:
|
||||
range_size = len(scale_array) - 2
|
||||
|
||||
scale_choose = 0.0
|
||||
if range_size == 0:
|
||||
rand_idx_size = 0
|
||||
else:
|
||||
# np.random.randint range: [low, high)
|
||||
rng_rand_size = np.random.randint(0, range_size + 1)
|
||||
rand_idx_size = rng_rand_size % (range_size + 1)
|
||||
|
||||
if rand_idx_size == range_size:
|
||||
min_resize_val = scale_array[rand_idx_size] / 2.0
|
||||
max_resize_val = min(2.0 * scale_array[rand_idx_size],
|
||||
2 * math.sqrt(wid * hei))
|
||||
scale_choose = random.uniform(min_resize_val, max_resize_val)
|
||||
else:
|
||||
min_resize_val = scale_array[rand_idx_size] / 2.0
|
||||
max_resize_val = 2.0 * scale_array[rand_idx_size]
|
||||
scale_choose = random.uniform(min_resize_val, max_resize_val)
|
||||
|
||||
sample_bbox_size = wid * resize_width / scale_choose
|
||||
|
||||
w_off_orig = 0.0
|
||||
h_off_orig = 0.0
|
||||
if sample_bbox_size < max(image_height, image_width):
|
||||
if wid <= sample_bbox_size:
|
||||
w_off_orig = np.random.uniform(xmin + wid - sample_bbox_size,
|
||||
xmin)
|
||||
else:
|
||||
w_off_orig = np.random.uniform(xmin,
|
||||
xmin + wid - sample_bbox_size)
|
||||
|
||||
if hei <= sample_bbox_size:
|
||||
h_off_orig = np.random.uniform(ymin + hei - sample_bbox_size,
|
||||
ymin)
|
||||
else:
|
||||
h_off_orig = np.random.uniform(ymin,
|
||||
ymin + hei - sample_bbox_size)
|
||||
|
||||
else:
|
||||
w_off_orig = np.random.uniform(image_width - sample_bbox_size, 0.0)
|
||||
h_off_orig = np.random.uniform(image_height - sample_bbox_size, 0.0)
|
||||
|
||||
w_off_orig = math.floor(w_off_orig)
|
||||
h_off_orig = math.floor(h_off_orig)
|
||||
|
||||
# Figure out top left coordinates.
|
||||
w_off = float(w_off_orig / image_width)
|
||||
h_off = float(h_off_orig / image_height)
|
||||
|
||||
sampled_bbox = [
|
||||
w_off, h_off, w_off + float(sample_bbox_size / image_width),
|
||||
h_off + float(sample_bbox_size / image_height)
|
||||
]
|
||||
return sampled_bbox
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
def jaccard_overlap(sample_bbox, object_bbox):
|
||||
if sample_bbox[0] >= object_bbox[2] or \
|
||||
sample_bbox[2] <= object_bbox[0] or \
|
||||
sample_bbox[1] >= object_bbox[3] or \
|
||||
sample_bbox[3] <= object_bbox[1]:
|
||||
return 0
|
||||
intersect_xmin = max(sample_bbox[0], object_bbox[0])
|
||||
intersect_ymin = max(sample_bbox[1], object_bbox[1])
|
||||
intersect_xmax = min(sample_bbox[2], object_bbox[2])
|
||||
intersect_ymax = min(sample_bbox[3], object_bbox[3])
|
||||
intersect_size = (intersect_xmax - intersect_xmin) * (
|
||||
intersect_ymax - intersect_ymin)
|
||||
sample_bbox_size = bbox_area(sample_bbox)
|
||||
object_bbox_size = bbox_area(object_bbox)
|
||||
overlap = intersect_size / (
|
||||
sample_bbox_size + object_bbox_size - intersect_size)
|
||||
return overlap
|
||||
|
||||
|
||||
def intersect_bbox(bbox1, bbox2):
|
||||
if bbox2[0] > bbox1[2] or bbox2[2] < bbox1[0] or \
|
||||
bbox2[1] > bbox1[3] or bbox2[3] < bbox1[1]:
|
||||
intersection_box = [0.0, 0.0, 0.0, 0.0]
|
||||
else:
|
||||
intersection_box = [
|
||||
max(bbox1[0], bbox2[0]), max(bbox1[1], bbox2[1]),
|
||||
min(bbox1[2], bbox2[2]), min(bbox1[3], bbox2[3])
|
||||
]
|
||||
return intersection_box
|
||||
|
||||
|
||||
def bbox_coverage(bbox1, bbox2):
|
||||
inter_box = intersect_bbox(bbox1, bbox2)
|
||||
intersect_size = bbox_area(inter_box)
|
||||
|
||||
if intersect_size > 0:
|
||||
bbox1_size = bbox_area(bbox1)
|
||||
return intersect_size / bbox1_size
|
||||
else:
|
||||
return 0.
|
||||
|
||||
|
||||
def satisfy_sample_constraint(sampler,
|
||||
sample_bbox,
|
||||
gt_bboxes,
|
||||
satisfy_all=False):
|
||||
if sampler[6] == 0 and sampler[7] == 0:
|
||||
return True
|
||||
satisfied = []
|
||||
for i in range(len(gt_bboxes)):
|
||||
object_bbox = [
|
||||
gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3]
|
||||
]
|
||||
overlap = jaccard_overlap(sample_bbox, object_bbox)
|
||||
if sampler[6] != 0 and \
|
||||
overlap < sampler[6]:
|
||||
satisfied.append(False)
|
||||
continue
|
||||
if sampler[7] != 0 and \
|
||||
overlap > sampler[7]:
|
||||
satisfied.append(False)
|
||||
continue
|
||||
satisfied.append(True)
|
||||
if not satisfy_all:
|
||||
return True
|
||||
|
||||
if satisfy_all:
|
||||
return np.all(satisfied)
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def satisfy_sample_constraint_coverage(sampler, sample_bbox, gt_bboxes):
|
||||
if sampler[6] == 0 and sampler[7] == 0:
|
||||
has_jaccard_overlap = False
|
||||
else:
|
||||
has_jaccard_overlap = True
|
||||
if sampler[8] == 0 and sampler[9] == 0:
|
||||
has_object_coverage = False
|
||||
else:
|
||||
has_object_coverage = True
|
||||
|
||||
if not has_jaccard_overlap and not has_object_coverage:
|
||||
return True
|
||||
found = False
|
||||
for i in range(len(gt_bboxes)):
|
||||
object_bbox = [
|
||||
gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3]
|
||||
]
|
||||
if has_jaccard_overlap:
|
||||
overlap = jaccard_overlap(sample_bbox, object_bbox)
|
||||
if sampler[6] != 0 and \
|
||||
overlap < sampler[6]:
|
||||
continue
|
||||
if sampler[7] != 0 and \
|
||||
overlap > sampler[7]:
|
||||
continue
|
||||
found = True
|
||||
if has_object_coverage:
|
||||
object_coverage = bbox_coverage(object_bbox, sample_bbox)
|
||||
if sampler[8] != 0 and \
|
||||
object_coverage < sampler[8]:
|
||||
continue
|
||||
if sampler[9] != 0 and \
|
||||
object_coverage > sampler[9]:
|
||||
continue
|
||||
found = True
|
||||
if found:
|
||||
return True
|
||||
return found
|
||||
|
||||
|
||||
def crop_image_sampling(img, sample_bbox, image_width, image_height,
|
||||
target_size):
|
||||
# no clipping here
|
||||
xmin = int(sample_bbox[0] * image_width)
|
||||
xmax = int(sample_bbox[2] * image_width)
|
||||
ymin = int(sample_bbox[1] * image_height)
|
||||
ymax = int(sample_bbox[3] * image_height)
|
||||
|
||||
w_off = xmin
|
||||
h_off = ymin
|
||||
width = xmax - xmin
|
||||
height = ymax - ymin
|
||||
cross_xmin = max(0.0, float(w_off))
|
||||
cross_ymin = max(0.0, float(h_off))
|
||||
cross_xmax = min(float(w_off + width - 1.0), float(image_width))
|
||||
cross_ymax = min(float(h_off + height - 1.0), float(image_height))
|
||||
cross_width = cross_xmax - cross_xmin
|
||||
cross_height = cross_ymax - cross_ymin
|
||||
|
||||
roi_xmin = 0 if w_off >= 0 else abs(w_off)
|
||||
roi_ymin = 0 if h_off >= 0 else abs(h_off)
|
||||
roi_width = cross_width
|
||||
roi_height = cross_height
|
||||
|
||||
roi_y1 = int(roi_ymin)
|
||||
roi_y2 = int(roi_ymin + roi_height)
|
||||
roi_x1 = int(roi_xmin)
|
||||
roi_x2 = int(roi_xmin + roi_width)
|
||||
|
||||
cross_y1 = int(cross_ymin)
|
||||
cross_y2 = int(cross_ymin + cross_height)
|
||||
cross_x1 = int(cross_xmin)
|
||||
cross_x2 = int(cross_xmin + cross_width)
|
||||
|
||||
sample_img = np.zeros((height, width, 3))
|
||||
sample_img[roi_y1: roi_y2, roi_x1: roi_x2] = \
|
||||
img[cross_y1: cross_y2, cross_x1: cross_x2]
|
||||
|
||||
sample_img = cv2.resize(
|
||||
sample_img, (target_size, target_size), interpolation=cv2.INTER_AREA)
|
||||
|
||||
return sample_img
|
||||
|
||||
|
||||
def is_poly(segm):
|
||||
assert isinstance(segm, (list, dict)), \
|
||||
"Invalid segm type: {}".format(type(segm))
|
||||
return isinstance(segm, list)
|
||||
|
||||
|
||||
def gaussian_radius(bbox_size, min_overlap):
|
||||
height, width = bbox_size
|
||||
|
||||
a1 = 1
|
||||
b1 = (height + width)
|
||||
c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
|
||||
sq1 = np.sqrt(b1**2 - 4 * a1 * c1)
|
||||
radius1 = (b1 + sq1) / (2 * a1)
|
||||
|
||||
a2 = 4
|
||||
b2 = 2 * (height + width)
|
||||
c2 = (1 - min_overlap) * width * height
|
||||
sq2 = np.sqrt(b2**2 - 4 * a2 * c2)
|
||||
radius2 = (b2 + sq2) / 2
|
||||
|
||||
a3 = 4 * min_overlap
|
||||
b3 = -2 * min_overlap * (height + width)
|
||||
c3 = (min_overlap - 1) * width * height
|
||||
sq3 = np.sqrt(b3**2 - 4 * a3 * c3)
|
||||
radius3 = (b3 + sq3) / 2
|
||||
return min(radius1, radius2, radius3)
|
||||
|
||||
|
||||
def draw_gaussian(heatmap, center, radius, k=1, delte=6):
|
||||
diameter = 2 * radius + 1
|
||||
sigma = diameter / delte
|
||||
gaussian = gaussian2D((diameter, diameter), sigma_x=sigma, sigma_y=sigma)
|
||||
|
||||
x, y = center
|
||||
|
||||
height, width = heatmap.shape[0:2]
|
||||
|
||||
left, right = min(x, radius), min(width - x, radius + 1)
|
||||
top, bottom = min(y, radius), min(height - y, radius + 1)
|
||||
|
||||
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
|
||||
masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:
|
||||
radius + right]
|
||||
np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
|
||||
|
||||
|
||||
def gaussian2D(shape, sigma_x=1, sigma_y=1):
|
||||
m, n = [(ss - 1.) / 2. for ss in shape]
|
||||
y, x = np.ogrid[-m:m + 1, -n:n + 1]
|
||||
|
||||
h = np.exp(-(x * x / (2 * sigma_x * sigma_x) + y * y / (2 * sigma_y *
|
||||
sigma_y)))
|
||||
h[h < np.finfo(h.dtype).eps * h.max()] = 0
|
||||
return h
|
||||
|
||||
|
||||
def draw_umich_gaussian(heatmap, center, radius, k=1):
|
||||
"""
|
||||
draw_umich_gaussian, refer to https://github.com/xingyizhou/CenterNet/blob/master/src/lib/utils/image.py#L126
|
||||
"""
|
||||
diameter = 2 * radius + 1
|
||||
gaussian = gaussian2D(
|
||||
(diameter, diameter), sigma_x=diameter / 6, sigma_y=diameter / 6)
|
||||
|
||||
x, y = int(center[0]), int(center[1])
|
||||
|
||||
height, width = heatmap.shape[0:2]
|
||||
|
||||
left, right = min(x, radius), min(width - x, radius + 1)
|
||||
top, bottom = min(y, radius), min(height - y, radius + 1)
|
||||
|
||||
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
|
||||
masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:
|
||||
radius + right]
|
||||
if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:
|
||||
np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
|
||||
return heatmap
|
||||
|
||||
|
||||
def get_border(border, size):
|
||||
i = 1
|
||||
while size - border // i <= border // i:
|
||||
i *= 2
|
||||
return border // i
|
||||
4148
paddle_detection/ppdet/data/transform/operators.py
Normal file
4148
paddle_detection/ppdet/data/transform/operators.py
Normal file
File diff suppressed because it is too large
Load Diff
480
paddle_detection/ppdet/data/transform/rotated_operators.py
Normal file
480
paddle_detection/ppdet/data/transform/rotated_operators.py
Normal file
@@ -0,0 +1,480 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
from __future__ import division
|
||||
|
||||
try:
|
||||
from collections.abc import Sequence
|
||||
except Exception:
|
||||
from collections import Sequence
|
||||
|
||||
from numbers import Number, Integral
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import math
|
||||
import copy
|
||||
|
||||
from .operators import register_op, BaseOperator
|
||||
from ppdet.modeling.rbox_utils import poly2rbox_le135_np, poly2rbox_oc_np, rbox2poly_np
|
||||
from ppdet.utils.logger import setup_logger
|
||||
from ppdet.utils.compact import imagedraw_textsize_c
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
|
||||
@register_op
|
||||
class RRotate(BaseOperator):
|
||||
""" Rotate Image, Polygon, Box
|
||||
|
||||
Args:
|
||||
scale (float): rotate scale
|
||||
angle (float): rotate angle
|
||||
fill_value (int, tuple): fill color
|
||||
auto_bound (bool): whether auto bound or not
|
||||
"""
|
||||
|
||||
def __init__(self, scale=1.0, angle=0., fill_value=0., auto_bound=True):
|
||||
super(RRotate, self).__init__()
|
||||
self.scale = scale
|
||||
self.angle = angle
|
||||
self.fill_value = fill_value
|
||||
self.auto_bound = auto_bound
|
||||
|
||||
def get_rotated_matrix(self, angle, scale, h, w):
|
||||
center = ((w - 1) * 0.5, (h - 1) * 0.5)
|
||||
matrix = cv2.getRotationMatrix2D(center, -angle, scale)
|
||||
# calculate the new size
|
||||
cos = np.abs(matrix[0, 0])
|
||||
sin = np.abs(matrix[0, 1])
|
||||
new_w = h * sin + w * cos
|
||||
new_h = h * cos + w * sin
|
||||
# calculate offset
|
||||
n_w = int(np.round(new_w))
|
||||
n_h = int(np.round(new_h))
|
||||
if self.auto_bound:
|
||||
ratio = min(w / n_w, h / n_h)
|
||||
matrix = cv2.getRotationMatrix2D(center, -angle, ratio)
|
||||
else:
|
||||
matrix[0, 2] += (new_w - w) * 0.5
|
||||
matrix[1, 2] += (new_h - h) * 0.5
|
||||
w = n_w
|
||||
h = n_h
|
||||
return matrix, h, w
|
||||
|
||||
def get_rect_from_pts(self, pts, h, w):
|
||||
""" get minimum rectangle of points
|
||||
"""
|
||||
assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
|
||||
min_x, min_y = np.min(pts[:, 0::2], axis=1), np.min(pts[:, 1::2],
|
||||
axis=1)
|
||||
max_x, max_y = np.max(pts[:, 0::2], axis=1), np.max(pts[:, 1::2],
|
||||
axis=1)
|
||||
min_x, min_y = np.clip(min_x, 0, w), np.clip(min_y, 0, h)
|
||||
max_x, max_y = np.clip(max_x, 0, w), np.clip(max_y, 0, h)
|
||||
boxes = np.stack([min_x, min_y, max_x, max_y], axis=-1)
|
||||
return boxes
|
||||
|
||||
def apply_image(self, image, matrix, h, w):
|
||||
return cv2.warpAffine(
|
||||
image, matrix, (w, h), borderValue=self.fill_value)
|
||||
|
||||
def apply_pts(self, pts, matrix, h, w):
|
||||
assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
|
||||
# n is number of samples and m is two times the number of points due to (x, y)
|
||||
_, m = pts.shape
|
||||
# transpose points
|
||||
pts_ = pts.reshape(-1, 2).T
|
||||
# pad 1 to convert the points to homogeneous coordinates
|
||||
padding = np.ones((1, pts_.shape[1]), pts.dtype)
|
||||
rotated_pts = np.matmul(matrix, np.concatenate((pts_, padding), axis=0))
|
||||
return rotated_pts[:2, :].T.reshape(-1, m)
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
image = sample['image']
|
||||
h, w = image.shape[:2]
|
||||
matrix, h, w = self.get_rotated_matrix(self.angle, self.scale, h, w)
|
||||
sample['image'] = self.apply_image(image, matrix, h, w)
|
||||
polys = sample['gt_poly']
|
||||
# TODO: segment or keypoint to be processed
|
||||
if len(polys) > 0:
|
||||
pts = self.apply_pts(polys, matrix, h, w)
|
||||
sample['gt_poly'] = pts
|
||||
sample['gt_bbox'] = self.get_rect_from_pts(pts, h, w)
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class RandomRRotate(BaseOperator):
|
||||
""" Random Rotate Image
|
||||
Args:
|
||||
scale (float, tuple, list): rotate scale
|
||||
scale_mode (str): mode of scale, [range, value, None]
|
||||
angle (float, tuple, list): rotate angle
|
||||
angle_mode (str): mode of angle, [range, value, None]
|
||||
fill_value (float, tuple, list): fill value
|
||||
rotate_prob (float): probability of rotation
|
||||
auto_bound (bool): whether auto bound or not
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
scale=1.0,
|
||||
scale_mode=None,
|
||||
angle=0.,
|
||||
angle_mode=None,
|
||||
fill_value=0.,
|
||||
rotate_prob=1.0,
|
||||
auto_bound=True):
|
||||
super(RandomRRotate, self).__init__()
|
||||
self.scale = scale
|
||||
self.scale_mode = scale_mode
|
||||
self.angle = angle
|
||||
self.angle_mode = angle_mode
|
||||
self.fill_value = fill_value
|
||||
self.rotate_prob = rotate_prob
|
||||
self.auto_bound = auto_bound
|
||||
|
||||
def get_angle(self, angle, angle_mode):
|
||||
assert not angle_mode or angle_mode in [
|
||||
'range', 'value'
|
||||
], 'angle mode should be in [range, value, None]'
|
||||
if not angle_mode:
|
||||
return angle
|
||||
elif angle_mode == 'range':
|
||||
low, high = angle
|
||||
return np.random.rand() * (high - low) + low
|
||||
elif angle_mode == 'value':
|
||||
return np.random.choice(angle)
|
||||
|
||||
def get_scale(self, scale, scale_mode):
|
||||
assert not scale_mode or scale_mode in [
|
||||
'range', 'value'
|
||||
], 'scale mode should be in [range, value, None]'
|
||||
if not scale_mode:
|
||||
return scale
|
||||
elif scale_mode == 'range':
|
||||
low, high = scale
|
||||
return np.random.rand() * (high - low) + low
|
||||
elif scale_mode == 'value':
|
||||
return np.random.choice(scale)
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
if np.random.rand() > self.rotate_prob:
|
||||
return sample
|
||||
|
||||
angle = self.get_angle(self.angle, self.angle_mode)
|
||||
scale = self.get_scale(self.scale, self.scale_mode)
|
||||
rotator = RRotate(scale, angle, self.fill_value, self.auto_bound)
|
||||
return rotator(sample)
|
||||
|
||||
|
||||
@register_op
|
||||
class Poly2RBox(BaseOperator):
|
||||
""" Polygon to Rotated Box, using new OpenCV definition since 4.5.1
|
||||
|
||||
Args:
|
||||
filter_threshold (int, float): threshold to filter annotations
|
||||
filter_mode (str): filter mode, ['area', 'edge']
|
||||
rbox_type (str): rbox type, ['le135', 'oc']
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, filter_threshold=4, filter_mode=None, rbox_type='le135'):
|
||||
super(Poly2RBox, self).__init__()
|
||||
self.filter_fn = lambda size: self.filter(size, filter_threshold, filter_mode)
|
||||
self.rbox_fn = poly2rbox_le135_np if rbox_type == 'le135' else poly2rbox_oc_np
|
||||
|
||||
def filter(self, size, threshold, mode):
|
||||
if mode == 'area':
|
||||
if size[0] * size[1] < threshold:
|
||||
return True
|
||||
elif mode == 'edge':
|
||||
if min(size) < threshold:
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_rbox(self, polys):
|
||||
valid_ids, rboxes, bboxes = [], [], []
|
||||
for i, poly in enumerate(polys):
|
||||
cx, cy, w, h, angle = self.rbox_fn(poly)
|
||||
if self.filter_fn((w, h)):
|
||||
continue
|
||||
rboxes.append(np.array([cx, cy, w, h, angle], dtype=np.float32))
|
||||
valid_ids.append(i)
|
||||
xmin, ymin = min(poly[0::2]), min(poly[1::2])
|
||||
xmax, ymax = max(poly[0::2]), max(poly[1::2])
|
||||
bboxes.append(np.array([xmin, ymin, xmax, ymax], dtype=np.float32))
|
||||
|
||||
if len(valid_ids) == 0:
|
||||
rboxes = np.zeros((0, 5), dtype=np.float32)
|
||||
bboxes = np.zeros((0, 4), dtype=np.float32)
|
||||
else:
|
||||
rboxes = np.stack(rboxes)
|
||||
bboxes = np.stack(bboxes)
|
||||
|
||||
return rboxes, bboxes, valid_ids
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
rboxes, bboxes, valid_ids = self.get_rbox(sample['gt_poly'])
|
||||
sample['gt_rbox'] = rboxes
|
||||
sample['gt_bbox'] = bboxes
|
||||
for k in ['gt_class', 'gt_score', 'gt_poly', 'is_crowd', 'difficult']:
|
||||
if k in sample:
|
||||
sample[k] = sample[k][valid_ids]
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class Poly2Array(BaseOperator):
|
||||
""" convert gt_poly to np.array for rotated bboxes
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(Poly2Array, self).__init__()
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
if 'gt_poly' in sample:
|
||||
sample['gt_poly'] = np.array(
|
||||
sample['gt_poly'], dtype=np.float32).reshape((-1, 8))
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class RResize(BaseOperator):
|
||||
def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
|
||||
"""
|
||||
Resize image to target size. if keep_ratio is True,
|
||||
resize the image's long side to the maximum of target_size
|
||||
if keep_ratio is False, resize the image to target size(h, w)
|
||||
Args:
|
||||
target_size (int|list): image target size
|
||||
keep_ratio (bool): whether keep_ratio or not, default true
|
||||
interp (int): the interpolation method
|
||||
"""
|
||||
super(RResize, self).__init__()
|
||||
self.keep_ratio = keep_ratio
|
||||
self.interp = interp
|
||||
if not isinstance(target_size, (Integral, Sequence)):
|
||||
raise TypeError(
|
||||
"Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
|
||||
format(type(target_size)))
|
||||
if isinstance(target_size, Integral):
|
||||
target_size = [target_size, target_size]
|
||||
self.target_size = target_size
|
||||
|
||||
def apply_image(self, image, scale):
|
||||
im_scale_x, im_scale_y = scale
|
||||
|
||||
return cv2.resize(
|
||||
image,
|
||||
None,
|
||||
None,
|
||||
fx=im_scale_x,
|
||||
fy=im_scale_y,
|
||||
interpolation=self.interp)
|
||||
|
||||
def apply_pts(self, pts, scale, size):
|
||||
im_scale_x, im_scale_y = scale
|
||||
resize_w, resize_h = size
|
||||
pts[:, 0::2] *= im_scale_x
|
||||
pts[:, 1::2] *= im_scale_y
|
||||
pts[:, 0::2] = np.clip(pts[:, 0::2], 0, resize_w)
|
||||
pts[:, 1::2] = np.clip(pts[:, 1::2], 0, resize_h)
|
||||
return pts
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
""" Resize the image numpy.
|
||||
"""
|
||||
im = sample['image']
|
||||
if not isinstance(im, np.ndarray):
|
||||
raise TypeError("{}: image type is not numpy.".format(self))
|
||||
if len(im.shape) != 3:
|
||||
raise ImageError('{}: image is not 3-dimensional.'.format(self))
|
||||
|
||||
# apply image
|
||||
im_shape = im.shape
|
||||
if self.keep_ratio:
|
||||
|
||||
im_size_min = np.min(im_shape[0:2])
|
||||
im_size_max = np.max(im_shape[0:2])
|
||||
|
||||
target_size_min = np.min(self.target_size)
|
||||
target_size_max = np.max(self.target_size)
|
||||
|
||||
im_scale = min(target_size_min / im_size_min,
|
||||
target_size_max / im_size_max)
|
||||
|
||||
resize_h = im_scale * float(im_shape[0])
|
||||
resize_w = im_scale * float(im_shape[1])
|
||||
|
||||
im_scale_x = im_scale
|
||||
im_scale_y = im_scale
|
||||
else:
|
||||
resize_h, resize_w = self.target_size
|
||||
im_scale_y = resize_h / im_shape[0]
|
||||
im_scale_x = resize_w / im_shape[1]
|
||||
|
||||
im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
|
||||
sample['image'] = im.astype(np.float32)
|
||||
sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
|
||||
if 'scale_factor' in sample:
|
||||
scale_factor = sample['scale_factor']
|
||||
sample['scale_factor'] = np.asarray(
|
||||
[scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
|
||||
dtype=np.float32)
|
||||
else:
|
||||
sample['scale_factor'] = np.asarray(
|
||||
[im_scale_y, im_scale_x], dtype=np.float32)
|
||||
|
||||
# apply bbox
|
||||
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
|
||||
sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'],
|
||||
[im_scale_x, im_scale_y],
|
||||
[resize_w, resize_h])
|
||||
|
||||
# apply polygon
|
||||
if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
|
||||
sample['gt_poly'] = self.apply_pts(sample['gt_poly'],
|
||||
[im_scale_x, im_scale_y],
|
||||
[resize_w, resize_h])
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class RandomRFlip(BaseOperator):
|
||||
def __init__(self, prob=0.5):
|
||||
"""
|
||||
Args:
|
||||
prob (float): the probability of flipping image
|
||||
"""
|
||||
super(RandomRFlip, self).__init__()
|
||||
self.prob = prob
|
||||
if not (isinstance(self.prob, float)):
|
||||
raise TypeError("{}: input type is invalid.".format(self))
|
||||
|
||||
def apply_image(self, image):
|
||||
return image[:, ::-1, :]
|
||||
|
||||
def apply_pts(self, pts, width):
|
||||
oldx = pts[:, 0::2].copy()
|
||||
pts[:, 0::2] = width - oldx - 1
|
||||
return pts
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
"""Filp the image and bounding box.
|
||||
Operators:
|
||||
1. Flip the image numpy.
|
||||
2. Transform the bboxes' x coordinates.
|
||||
(Must judge whether the coordinates are normalized!)
|
||||
3. Transform the segmentations' x coordinates.
|
||||
(Must judge whether the coordinates are normalized!)
|
||||
Output:
|
||||
sample: the image, bounding box and segmentation part
|
||||
in sample are flipped.
|
||||
"""
|
||||
if np.random.uniform(0, 1) < self.prob:
|
||||
im = sample['image']
|
||||
height, width = im.shape[:2]
|
||||
im = self.apply_image(im)
|
||||
if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
|
||||
sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'], width)
|
||||
if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
|
||||
sample['gt_poly'] = self.apply_pts(sample['gt_poly'], width)
|
||||
|
||||
sample['flipped'] = True
|
||||
sample['image'] = im
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class VisibleRBox(BaseOperator):
|
||||
"""
|
||||
In debug mode, visualize images according to `gt_box`.
|
||||
(Currently only supported when not cropping and flipping image.)
|
||||
"""
|
||||
|
||||
def __init__(self, output_dir='debug'):
|
||||
super(VisibleRBox, self).__init__()
|
||||
self.output_dir = output_dir
|
||||
if not os.path.isdir(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
image = Image.fromarray(sample['image'].astype(np.uint8))
|
||||
out_file_name = '{:012d}.jpg'.format(sample['im_id'][0])
|
||||
width = sample['w']
|
||||
height = sample['h']
|
||||
# gt_poly = sample['gt_rbox']
|
||||
gt_poly = sample['gt_poly']
|
||||
gt_class = sample['gt_class']
|
||||
draw = ImageDraw.Draw(image)
|
||||
for i in range(gt_poly.shape[0]):
|
||||
x1, y1, x2, y2, x3, y3, x4, y4 = gt_poly[i]
|
||||
draw.line(
|
||||
[(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)],
|
||||
width=2,
|
||||
fill='green')
|
||||
# draw label
|
||||
xmin = min(x1, x2, x3, x4)
|
||||
ymin = min(y1, y2, y3, y4)
|
||||
text = str(gt_class[i][0])
|
||||
tw, th = imagedraw_textsize_c(draw, text)
|
||||
draw.rectangle(
|
||||
[(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green')
|
||||
draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
|
||||
|
||||
if 'gt_keypoint' in sample.keys():
|
||||
gt_keypoint = sample['gt_keypoint']
|
||||
if self.is_normalized:
|
||||
for i in range(gt_keypoint.shape[1]):
|
||||
if i % 2:
|
||||
gt_keypoint[:, i] = gt_keypoint[:, i] * height
|
||||
else:
|
||||
gt_keypoint[:, i] = gt_keypoint[:, i] * width
|
||||
for i in range(gt_keypoint.shape[0]):
|
||||
keypoint = gt_keypoint[i]
|
||||
for j in range(int(keypoint.shape[0] / 2)):
|
||||
x1 = round(keypoint[2 * j]).astype(np.int32)
|
||||
y1 = round(keypoint[2 * j + 1]).astype(np.int32)
|
||||
draw.ellipse(
|
||||
(x1, y1, x1 + 5, y1 + 5), fill='green', outline='green')
|
||||
save_path = os.path.join(self.output_dir, out_file_name)
|
||||
image.save(save_path, quality=95)
|
||||
return sample
|
||||
|
||||
|
||||
@register_op
|
||||
class Rbox2Poly(BaseOperator):
|
||||
"""
|
||||
Convert rbbox format to poly format.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(Rbox2Poly, self).__init__()
|
||||
|
||||
def apply(self, sample, context=None):
|
||||
assert 'gt_rbox' in sample
|
||||
assert sample['gt_rbox'].shape[1] == 5
|
||||
rboxes = sample['gt_rbox']
|
||||
polys = rbox2poly_np(rboxes)
|
||||
sample['gt_poly'] = polys
|
||||
xmin, ymin = polys[:, 0::2].min(1), polys[:, 1::2].min(1)
|
||||
xmax, ymax = polys[:, 0::2].max(1), polys[:, 1::2].max(1)
|
||||
sample['gt_bbox'] = np.stack([xmin, ymin, xmin, ymin], axis=1)
|
||||
return sample
|
||||
72
paddle_detection/ppdet/data/utils.py
Normal file
72
paddle_detection/ppdet/data/utils.py
Normal file
@@ -0,0 +1,72 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import paddle
|
||||
import numbers
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
from collections.abc import Sequence, Mapping
|
||||
except:
|
||||
from collections import Sequence, Mapping
|
||||
|
||||
|
||||
def default_collate_fn(batch):
|
||||
"""
|
||||
Default batch collating function for :code:`paddle.io.DataLoader`,
|
||||
get input data as a list of sample datas, each element in list
|
||||
if the data of a sample, and sample data should composed of list,
|
||||
dictionary, string, number, numpy array, this
|
||||
function will parse input data recursively and stack number,
|
||||
numpy array and paddle.Tensor datas as batch datas. e.g. for
|
||||
following input data:
|
||||
[{'image': np.array(shape=[3, 224, 224]), 'label': 1},
|
||||
{'image': np.array(shape=[3, 224, 224]), 'label': 3},
|
||||
{'image': np.array(shape=[3, 224, 224]), 'label': 4},
|
||||
{'image': np.array(shape=[3, 224, 224]), 'label': 5},]
|
||||
|
||||
|
||||
This default collate function zipped each number and numpy array
|
||||
field together and stack each field as the batch field as follows:
|
||||
{'image': np.array(shape=[4, 3, 224, 224]), 'label': np.array([1, 3, 4, 5])}
|
||||
Args:
|
||||
batch(list of sample data): batch should be a list of sample data.
|
||||
|
||||
Returns:
|
||||
Batched data: batched each number, numpy array and paddle.Tensor
|
||||
in input data.
|
||||
"""
|
||||
sample = batch[0]
|
||||
if isinstance(sample, np.ndarray):
|
||||
batch = np.stack(batch, axis=0)
|
||||
return batch
|
||||
elif isinstance(sample, numbers.Number):
|
||||
batch = np.array(batch)
|
||||
return batch
|
||||
elif isinstance(sample, (str, bytes)):
|
||||
return batch
|
||||
elif isinstance(sample, Mapping):
|
||||
return {
|
||||
key: default_collate_fn([d[key] for d in batch])
|
||||
for key in sample
|
||||
}
|
||||
elif isinstance(sample, Sequence):
|
||||
sample_fields_num = len(sample)
|
||||
if not all(len(sample) == sample_fields_num for sample in iter(batch)):
|
||||
raise RuntimeError(
|
||||
"fileds number not same among samples in a batch")
|
||||
return [default_collate_fn(fields) for fields in zip(*batch)]
|
||||
|
||||
raise TypeError("batch data con only contains: tensor, numpy.ndarray, "
|
||||
"dict, list, number, but got {}".format(type(sample)))
|
||||
Reference in New Issue
Block a user