更换文档检测模型
This commit is contained in:
845
paddle_detection/ppdet/data/source/keypoint_coco.py
Normal file
845
paddle_detection/ppdet/data/source/keypoint_coco.py
Normal file
@@ -0,0 +1,845 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
this code is base on https://github.com/open-mmlab/mmpose
|
||||
"""
|
||||
import os
|
||||
import cv2
|
||||
import numpy as np
|
||||
import json
|
||||
import copy
|
||||
import pycocotools
|
||||
from pycocotools.coco import COCO
|
||||
from .dataset import DetDataset
|
||||
from ppdet.core.workspace import register, serializable
|
||||
|
||||
|
||||
@serializable
|
||||
class KeypointBottomUpBaseDataset(DetDataset):
|
||||
"""Base class for bottom-up datasets.
|
||||
|
||||
All datasets should subclass it.
|
||||
All subclasses should overwrite:
|
||||
Methods:`_get_imganno`
|
||||
|
||||
Args:
|
||||
dataset_dir (str): Root path to the dataset.
|
||||
anno_path (str): Relative path to the annotation file.
|
||||
image_dir (str): Path to a directory where images are held.
|
||||
Default: None.
|
||||
num_joints (int): keypoint numbers
|
||||
transform (composed(operators)): A sequence of data transforms.
|
||||
shard (list): [rank, worldsize], the distributed env params
|
||||
test_mode (bool): Store True when building test or
|
||||
validation dataset. Default: False.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir,
|
||||
image_dir,
|
||||
anno_path,
|
||||
num_joints,
|
||||
transform=[],
|
||||
shard=[0, 1],
|
||||
test_mode=False):
|
||||
super().__init__(dataset_dir, image_dir, anno_path)
|
||||
self.image_info = {}
|
||||
self.ann_info = {}
|
||||
|
||||
self.img_prefix = os.path.join(dataset_dir, image_dir)
|
||||
self.transform = transform
|
||||
self.test_mode = test_mode
|
||||
|
||||
self.ann_info['num_joints'] = num_joints
|
||||
self.img_ids = []
|
||||
|
||||
def parse_dataset(self):
|
||||
pass
|
||||
|
||||
def __len__(self):
|
||||
"""Get dataset length."""
|
||||
return len(self.img_ids)
|
||||
|
||||
def _get_imganno(self, idx):
|
||||
"""Get anno for a single image."""
|
||||
raise NotImplementedError
|
||||
|
||||
def __getitem__(self, idx):
|
||||
"""Prepare image for training given the index."""
|
||||
records = copy.deepcopy(self._get_imganno(idx))
|
||||
records['image'] = cv2.imread(records['image_file'])
|
||||
records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB)
|
||||
if 'mask' in records:
|
||||
records['mask'] = (records['mask'] + 0).astype('uint8')
|
||||
records = self.transform(records)
|
||||
return records
|
||||
|
||||
def parse_dataset(self):
|
||||
return
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class KeypointBottomUpCocoDataset(KeypointBottomUpBaseDataset):
|
||||
"""COCO dataset for bottom-up pose estimation.
|
||||
|
||||
The dataset loads raw features and apply specified transforms
|
||||
to return a dict containing the image tensors and other information.
|
||||
|
||||
COCO keypoint indexes::
|
||||
|
||||
0: 'nose',
|
||||
1: 'left_eye',
|
||||
2: 'right_eye',
|
||||
3: 'left_ear',
|
||||
4: 'right_ear',
|
||||
5: 'left_shoulder',
|
||||
6: 'right_shoulder',
|
||||
7: 'left_elbow',
|
||||
8: 'right_elbow',
|
||||
9: 'left_wrist',
|
||||
10: 'right_wrist',
|
||||
11: 'left_hip',
|
||||
12: 'right_hip',
|
||||
13: 'left_knee',
|
||||
14: 'right_knee',
|
||||
15: 'left_ankle',
|
||||
16: 'right_ankle'
|
||||
|
||||
Args:
|
||||
dataset_dir (str): Root path to the dataset.
|
||||
anno_path (str): Relative path to the annotation file.
|
||||
image_dir (str): Path to a directory where images are held.
|
||||
Default: None.
|
||||
num_joints (int): keypoint numbers
|
||||
transform (composed(operators)): A sequence of data transforms.
|
||||
shard (list): [rank, worldsize], the distributed env params
|
||||
test_mode (bool): Store True when building test or
|
||||
validation dataset. Default: False.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir,
|
||||
image_dir,
|
||||
anno_path,
|
||||
num_joints,
|
||||
transform=[],
|
||||
shard=[0, 1],
|
||||
test_mode=False,
|
||||
return_mask=True,
|
||||
return_bbox=True,
|
||||
return_area=True,
|
||||
return_class=True):
|
||||
super().__init__(dataset_dir, image_dir, anno_path, num_joints,
|
||||
transform, shard, test_mode)
|
||||
|
||||
self.ann_file = os.path.join(dataset_dir, anno_path)
|
||||
self.shard = shard
|
||||
self.test_mode = test_mode
|
||||
self.return_mask = return_mask
|
||||
self.return_bbox = return_bbox
|
||||
self.return_area = return_area
|
||||
self.return_class = return_class
|
||||
|
||||
def parse_dataset(self):
|
||||
self.coco = COCO(self.ann_file)
|
||||
|
||||
self.img_ids = self.coco.getImgIds()
|
||||
if not self.test_mode:
|
||||
self.img_ids_tmp = []
|
||||
for img_id in self.img_ids:
|
||||
ann_ids = self.coco.getAnnIds(imgIds=img_id)
|
||||
anno = self.coco.loadAnns(ann_ids)
|
||||
anno = [obj for obj in anno if obj['iscrowd'] == 0]
|
||||
if len(anno) == 0:
|
||||
continue
|
||||
self.img_ids_tmp.append(img_id)
|
||||
self.img_ids = self.img_ids_tmp
|
||||
|
||||
blocknum = int(len(self.img_ids) / self.shard[1])
|
||||
self.img_ids = self.img_ids[(blocknum * self.shard[0]):(blocknum * (
|
||||
self.shard[0] + 1))]
|
||||
self.num_images = len(self.img_ids)
|
||||
self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)
|
||||
self.dataset_name = 'coco'
|
||||
|
||||
cat_ids = self.coco.getCatIds()
|
||||
self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)})
|
||||
print('=> num_images: {}'.format(self.num_images))
|
||||
|
||||
@staticmethod
|
||||
def _get_mapping_id_name(imgs):
|
||||
"""
|
||||
Args:
|
||||
imgs (dict): dict of image info.
|
||||
|
||||
Returns:
|
||||
tuple: Image name & id mapping dicts.
|
||||
|
||||
- id2name (dict): Mapping image id to name.
|
||||
- name2id (dict): Mapping image name to id.
|
||||
"""
|
||||
id2name = {}
|
||||
name2id = {}
|
||||
for image_id, image in imgs.items():
|
||||
file_name = image['file_name']
|
||||
id2name[image_id] = file_name
|
||||
name2id[file_name] = image_id
|
||||
|
||||
return id2name, name2id
|
||||
|
||||
def _get_imganno(self, idx):
|
||||
"""Get anno for a single image.
|
||||
|
||||
Args:
|
||||
idx (int): image idx
|
||||
|
||||
Returns:
|
||||
dict: info for model training
|
||||
"""
|
||||
coco = self.coco
|
||||
img_id = self.img_ids[idx]
|
||||
ann_ids = coco.getAnnIds(imgIds=img_id)
|
||||
anno = coco.loadAnns(ann_ids)
|
||||
|
||||
anno = [
|
||||
obj for obj in anno
|
||||
if obj['iscrowd'] == 0 and obj['num_keypoints'] > 0
|
||||
]
|
||||
|
||||
db_rec = {}
|
||||
joints, orgsize = self._get_joints(anno, idx)
|
||||
db_rec['gt_joints'] = joints
|
||||
db_rec['im_shape'] = orgsize
|
||||
|
||||
if self.return_bbox:
|
||||
db_rec['gt_bbox'] = self._get_bboxs(anno, idx)
|
||||
|
||||
if self.return_class:
|
||||
db_rec['gt_class'] = self._get_labels(anno, idx)
|
||||
|
||||
if self.return_area:
|
||||
db_rec['gt_areas'] = self._get_areas(anno, idx)
|
||||
|
||||
if self.return_mask:
|
||||
db_rec['mask'] = self._get_mask(anno, idx)
|
||||
|
||||
db_rec['im_id'] = img_id
|
||||
db_rec['image_file'] = os.path.join(self.img_prefix,
|
||||
self.id2name[img_id])
|
||||
|
||||
return db_rec
|
||||
|
||||
def _get_joints(self, anno, idx):
|
||||
"""Get joints for all people in an image."""
|
||||
num_people = len(anno)
|
||||
|
||||
joints = np.zeros(
|
||||
(num_people, self.ann_info['num_joints'], 3), dtype=np.float32)
|
||||
|
||||
for i, obj in enumerate(anno):
|
||||
joints[i, :self.ann_info['num_joints'], :3] = \
|
||||
np.array(obj['keypoints']).reshape([-1, 3])
|
||||
|
||||
img_info = self.coco.loadImgs(self.img_ids[idx])[0]
|
||||
orgsize = np.array([img_info['height'], img_info['width'], 1])
|
||||
|
||||
return joints, orgsize
|
||||
|
||||
def _get_bboxs(self, anno, idx):
|
||||
num_people = len(anno)
|
||||
gt_bboxes = np.zeros((num_people, 4), dtype=np.float32)
|
||||
|
||||
for idx, obj in enumerate(anno):
|
||||
if 'bbox' in obj:
|
||||
gt_bboxes[idx, :] = obj['bbox']
|
||||
|
||||
gt_bboxes[:, 2] += gt_bboxes[:, 0]
|
||||
gt_bboxes[:, 3] += gt_bboxes[:, 1]
|
||||
return gt_bboxes
|
||||
|
||||
def _get_labels(self, anno, idx):
|
||||
num_people = len(anno)
|
||||
gt_labels = np.zeros((num_people, 1), dtype=np.float32)
|
||||
|
||||
for idx, obj in enumerate(anno):
|
||||
if 'category_id' in obj:
|
||||
catid = obj['category_id']
|
||||
gt_labels[idx, 0] = self.catid2clsid[catid]
|
||||
return gt_labels
|
||||
|
||||
def _get_areas(self, anno, idx):
|
||||
num_people = len(anno)
|
||||
gt_areas = np.zeros((num_people, ), dtype=np.float32)
|
||||
|
||||
for idx, obj in enumerate(anno):
|
||||
if 'area' in obj:
|
||||
gt_areas[idx, ] = obj['area']
|
||||
return gt_areas
|
||||
|
||||
def _get_mask(self, anno, idx):
|
||||
"""Get ignore masks to mask out losses."""
|
||||
coco = self.coco
|
||||
img_info = coco.loadImgs(self.img_ids[idx])[0]
|
||||
|
||||
m = np.zeros((img_info['height'], img_info['width']), dtype=np.float32)
|
||||
|
||||
for obj in anno:
|
||||
if 'segmentation' in obj:
|
||||
if obj['iscrowd']:
|
||||
rle = pycocotools.mask.frPyObjects(obj['segmentation'],
|
||||
img_info['height'],
|
||||
img_info['width'])
|
||||
m += pycocotools.mask.decode(rle)
|
||||
elif obj['num_keypoints'] == 0:
|
||||
rles = pycocotools.mask.frPyObjects(obj['segmentation'],
|
||||
img_info['height'],
|
||||
img_info['width'])
|
||||
for rle in rles:
|
||||
m += pycocotools.mask.decode(rle)
|
||||
|
||||
return m < 0.5
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class KeypointBottomUpCrowdPoseDataset(KeypointBottomUpCocoDataset):
|
||||
"""CrowdPose dataset for bottom-up pose estimation.
|
||||
|
||||
The dataset loads raw features and apply specified transforms
|
||||
to return a dict containing the image tensors and other information.
|
||||
|
||||
CrowdPose keypoint indexes::
|
||||
|
||||
0: 'left_shoulder',
|
||||
1: 'right_shoulder',
|
||||
2: 'left_elbow',
|
||||
3: 'right_elbow',
|
||||
4: 'left_wrist',
|
||||
5: 'right_wrist',
|
||||
6: 'left_hip',
|
||||
7: 'right_hip',
|
||||
8: 'left_knee',
|
||||
9: 'right_knee',
|
||||
10: 'left_ankle',
|
||||
11: 'right_ankle',
|
||||
12: 'top_head',
|
||||
13: 'neck'
|
||||
|
||||
Args:
|
||||
dataset_dir (str): Root path to the dataset.
|
||||
anno_path (str): Relative path to the annotation file.
|
||||
image_dir (str): Path to a directory where images are held.
|
||||
Default: None.
|
||||
num_joints (int): keypoint numbers
|
||||
transform (composed(operators)): A sequence of data transforms.
|
||||
shard (list): [rank, worldsize], the distributed env params
|
||||
test_mode (bool): Store True when building test or
|
||||
validation dataset. Default: False.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir,
|
||||
image_dir,
|
||||
anno_path,
|
||||
num_joints,
|
||||
transform=[],
|
||||
shard=[0, 1],
|
||||
test_mode=False):
|
||||
super().__init__(dataset_dir, image_dir, anno_path, num_joints,
|
||||
transform, shard, test_mode)
|
||||
|
||||
self.ann_file = os.path.join(dataset_dir, anno_path)
|
||||
self.shard = shard
|
||||
self.test_mode = test_mode
|
||||
|
||||
def parse_dataset(self):
|
||||
self.coco = COCO(self.ann_file)
|
||||
|
||||
self.img_ids = self.coco.getImgIds()
|
||||
if not self.test_mode:
|
||||
self.img_ids = [
|
||||
img_id for img_id in self.img_ids
|
||||
if len(self.coco.getAnnIds(
|
||||
imgIds=img_id, iscrowd=None)) > 0
|
||||
]
|
||||
blocknum = int(len(self.img_ids) / self.shard[1])
|
||||
self.img_ids = self.img_ids[(blocknum * self.shard[0]):(blocknum * (
|
||||
self.shard[0] + 1))]
|
||||
self.num_images = len(self.img_ids)
|
||||
self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs)
|
||||
|
||||
self.dataset_name = 'crowdpose'
|
||||
print('=> num_images: {}'.format(self.num_images))
|
||||
|
||||
|
||||
@serializable
|
||||
class KeypointTopDownBaseDataset(DetDataset):
|
||||
"""Base class for top_down datasets.
|
||||
|
||||
All datasets should subclass it.
|
||||
All subclasses should overwrite:
|
||||
Methods:`_get_db`
|
||||
|
||||
Args:
|
||||
dataset_dir (str): Root path to the dataset.
|
||||
image_dir (str): Path to a directory where images are held.
|
||||
anno_path (str): Relative path to the annotation file.
|
||||
num_joints (int): keypoint numbers
|
||||
transform (composed(operators)): A sequence of data transforms.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir,
|
||||
image_dir,
|
||||
anno_path,
|
||||
num_joints,
|
||||
transform=[]):
|
||||
super().__init__(dataset_dir, image_dir, anno_path)
|
||||
self.image_info = {}
|
||||
self.ann_info = {}
|
||||
|
||||
self.img_prefix = os.path.join(dataset_dir, image_dir)
|
||||
self.transform = transform
|
||||
|
||||
self.ann_info['num_joints'] = num_joints
|
||||
self.db = []
|
||||
|
||||
def __len__(self):
|
||||
"""Get dataset length."""
|
||||
return len(self.db)
|
||||
|
||||
def _get_db(self):
|
||||
"""Get a sample"""
|
||||
raise NotImplementedError
|
||||
|
||||
def __getitem__(self, idx):
|
||||
"""Prepare sample for training given the index."""
|
||||
records = copy.deepcopy(self.db[idx])
|
||||
records['image'] = cv2.imread(records['image_file'], cv2.IMREAD_COLOR |
|
||||
cv2.IMREAD_IGNORE_ORIENTATION)
|
||||
records['image'] = cv2.cvtColor(records['image'], cv2.COLOR_BGR2RGB)
|
||||
records['score'] = records['score'] if 'score' in records else 1
|
||||
records = self.transform(records)
|
||||
# print('records', records)
|
||||
return records
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class KeypointTopDownCocoDataset(KeypointTopDownBaseDataset):
|
||||
"""COCO dataset for top-down pose estimation.
|
||||
|
||||
The dataset loads raw features and apply specified transforms
|
||||
to return a dict containing the image tensors and other information.
|
||||
|
||||
COCO keypoint indexes:
|
||||
|
||||
0: 'nose',
|
||||
1: 'left_eye',
|
||||
2: 'right_eye',
|
||||
3: 'left_ear',
|
||||
4: 'right_ear',
|
||||
5: 'left_shoulder',
|
||||
6: 'right_shoulder',
|
||||
7: 'left_elbow',
|
||||
8: 'right_elbow',
|
||||
9: 'left_wrist',
|
||||
10: 'right_wrist',
|
||||
11: 'left_hip',
|
||||
12: 'right_hip',
|
||||
13: 'left_knee',
|
||||
14: 'right_knee',
|
||||
15: 'left_ankle',
|
||||
16: 'right_ankle'
|
||||
|
||||
Args:
|
||||
dataset_dir (str): Root path to the dataset.
|
||||
image_dir (str): Path to a directory where images are held.
|
||||
anno_path (str): Relative path to the annotation file.
|
||||
num_joints (int): Keypoint numbers
|
||||
trainsize (list):[w, h] Image target size
|
||||
transform (composed(operators)): A sequence of data transforms.
|
||||
bbox_file (str): Path to a detection bbox file
|
||||
Default: None.
|
||||
use_gt_bbox (bool): Whether to use ground truth bbox
|
||||
Default: True.
|
||||
pixel_std (int): The pixel std of the scale
|
||||
Default: 200.
|
||||
image_thre (float): The threshold to filter the detection box
|
||||
Default: 0.0.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir,
|
||||
image_dir,
|
||||
anno_path,
|
||||
num_joints,
|
||||
trainsize,
|
||||
transform=[],
|
||||
bbox_file=None,
|
||||
use_gt_bbox=True,
|
||||
pixel_std=200,
|
||||
image_thre=0.0,
|
||||
center_scale=None):
|
||||
super().__init__(dataset_dir, image_dir, anno_path, num_joints,
|
||||
transform)
|
||||
|
||||
self.bbox_file = bbox_file
|
||||
self.use_gt_bbox = use_gt_bbox
|
||||
self.trainsize = trainsize
|
||||
self.pixel_std = pixel_std
|
||||
self.image_thre = image_thre
|
||||
self.center_scale = center_scale
|
||||
self.dataset_name = 'coco'
|
||||
|
||||
def parse_dataset(self):
|
||||
if self.use_gt_bbox:
|
||||
self.db = self._load_coco_keypoint_annotations()
|
||||
else:
|
||||
self.db = self._load_coco_person_detection_results()
|
||||
|
||||
def _load_coco_keypoint_annotations(self):
|
||||
coco = COCO(self.get_anno())
|
||||
img_ids = coco.getImgIds()
|
||||
gt_db = []
|
||||
for index in img_ids:
|
||||
im_ann = coco.loadImgs(index)[0]
|
||||
width = im_ann['width']
|
||||
height = im_ann['height']
|
||||
file_name = im_ann['file_name']
|
||||
im_id = int(im_ann["id"])
|
||||
|
||||
annIds = coco.getAnnIds(imgIds=index, iscrowd=False)
|
||||
objs = coco.loadAnns(annIds)
|
||||
|
||||
valid_objs = []
|
||||
for obj in objs:
|
||||
x, y, w, h = obj['bbox']
|
||||
x1 = np.max((0, x))
|
||||
y1 = np.max((0, y))
|
||||
x2 = np.min((width - 1, x1 + np.max((0, w - 1))))
|
||||
y2 = np.min((height - 1, y1 + np.max((0, h - 1))))
|
||||
if obj['area'] > 0 and x2 >= x1 and y2 >= y1:
|
||||
obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
|
||||
valid_objs.append(obj)
|
||||
objs = valid_objs
|
||||
|
||||
rec = []
|
||||
for obj in objs:
|
||||
if max(obj['keypoints']) == 0:
|
||||
continue
|
||||
|
||||
joints = np.zeros(
|
||||
(self.ann_info['num_joints'], 3), dtype=np.float32)
|
||||
joints_vis = np.zeros(
|
||||
(self.ann_info['num_joints'], 3), dtype=np.float32)
|
||||
for ipt in range(self.ann_info['num_joints']):
|
||||
joints[ipt, 0] = obj['keypoints'][ipt * 3 + 0]
|
||||
joints[ipt, 1] = obj['keypoints'][ipt * 3 + 1]
|
||||
joints[ipt, 2] = 0
|
||||
t_vis = obj['keypoints'][ipt * 3 + 2]
|
||||
if t_vis > 1:
|
||||
t_vis = 1
|
||||
joints_vis[ipt, 0] = t_vis
|
||||
joints_vis[ipt, 1] = t_vis
|
||||
joints_vis[ipt, 2] = 0
|
||||
|
||||
center, scale = self._box2cs(obj['clean_bbox'][:4])
|
||||
rec.append({
|
||||
'image_file': os.path.join(self.img_prefix, file_name),
|
||||
'center': center,
|
||||
'scale': scale,
|
||||
'gt_joints': joints,
|
||||
'joints_vis': joints_vis,
|
||||
'im_id': im_id,
|
||||
})
|
||||
gt_db.extend(rec)
|
||||
|
||||
return gt_db
|
||||
|
||||
def _box2cs(self, box):
|
||||
x, y, w, h = box[:4]
|
||||
center = np.zeros((2), dtype=np.float32)
|
||||
center[0] = x + w * 0.5
|
||||
center[1] = y + h * 0.5
|
||||
aspect_ratio = self.trainsize[0] * 1.0 / self.trainsize[1]
|
||||
|
||||
if self.center_scale is not None and np.random.rand() < 0.3:
|
||||
center += self.center_scale * (np.random.rand(2) - 0.5) * [w, h]
|
||||
|
||||
if w > aspect_ratio * h:
|
||||
h = w * 1.0 / aspect_ratio
|
||||
elif w < aspect_ratio * h:
|
||||
w = h * aspect_ratio
|
||||
scale = np.array(
|
||||
[w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
|
||||
dtype=np.float32)
|
||||
if center[0] != -1:
|
||||
scale = scale * 1.25
|
||||
|
||||
return center, scale
|
||||
|
||||
def _load_coco_person_detection_results(self):
|
||||
all_boxes = None
|
||||
bbox_file_path = os.path.join(self.dataset_dir, self.bbox_file)
|
||||
with open(bbox_file_path, 'r') as f:
|
||||
all_boxes = json.load(f)
|
||||
|
||||
if not all_boxes:
|
||||
print('=> Load %s fail!' % bbox_file_path)
|
||||
return None
|
||||
|
||||
kpt_db = []
|
||||
for n_img in range(0, len(all_boxes)):
|
||||
det_res = all_boxes[n_img]
|
||||
if det_res['category_id'] != 1:
|
||||
continue
|
||||
file_name = det_res[
|
||||
'filename'] if 'filename' in det_res else '%012d.jpg' % det_res[
|
||||
'image_id']
|
||||
img_name = os.path.join(self.img_prefix, file_name)
|
||||
box = det_res['bbox']
|
||||
score = det_res['score']
|
||||
im_id = int(det_res['image_id'])
|
||||
|
||||
if score < self.image_thre:
|
||||
continue
|
||||
|
||||
center, scale = self._box2cs(box)
|
||||
joints = np.zeros(
|
||||
(self.ann_info['num_joints'], 3), dtype=np.float32)
|
||||
joints_vis = np.ones(
|
||||
(self.ann_info['num_joints'], 3), dtype=np.float32)
|
||||
kpt_db.append({
|
||||
'image_file': img_name,
|
||||
'im_id': im_id,
|
||||
'center': center,
|
||||
'scale': scale,
|
||||
'score': score,
|
||||
'gt_joints': joints,
|
||||
'joints_vis': joints_vis,
|
||||
})
|
||||
|
||||
return kpt_db
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class KeypointTopDownCocoWholeBodyHandDataset(KeypointTopDownBaseDataset):
|
||||
"""CocoWholeBody dataset for top-down hand pose estimation.
|
||||
|
||||
The dataset loads raw features and apply specified transforms
|
||||
to return a dict containing the image tensors and other information.
|
||||
|
||||
COCO-WholeBody Hand keypoint indexes:
|
||||
|
||||
0: 'wrist',
|
||||
1: 'thumb1',
|
||||
2: 'thumb2',
|
||||
3: 'thumb3',
|
||||
4: 'thumb4',
|
||||
5: 'forefinger1',
|
||||
6: 'forefinger2',
|
||||
7: 'forefinger3',
|
||||
8: 'forefinger4',
|
||||
9: 'middle_finger1',
|
||||
10: 'middle_finger2',
|
||||
11: 'middle_finger3',
|
||||
12: 'middle_finger4',
|
||||
13: 'ring_finger1',
|
||||
14: 'ring_finger2',
|
||||
15: 'ring_finger3',
|
||||
16: 'ring_finger4',
|
||||
17: 'pinky_finger1',
|
||||
18: 'pinky_finger2',
|
||||
19: 'pinky_finger3',
|
||||
20: 'pinky_finger4'
|
||||
|
||||
Args:
|
||||
dataset_dir (str): Root path to the dataset.
|
||||
image_dir (str): Path to a directory where images are held.
|
||||
anno_path (str): Relative path to the annotation file.
|
||||
num_joints (int): Keypoint numbers
|
||||
trainsize (list):[w, h] Image target size
|
||||
transform (composed(operators)): A sequence of data transforms.
|
||||
pixel_std (int): The pixel std of the scale
|
||||
Default: 200.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir,
|
||||
image_dir,
|
||||
anno_path,
|
||||
num_joints,
|
||||
trainsize,
|
||||
transform=[],
|
||||
pixel_std=200):
|
||||
super().__init__(dataset_dir, image_dir, anno_path, num_joints,
|
||||
transform)
|
||||
|
||||
self.trainsize = trainsize
|
||||
self.pixel_std = pixel_std
|
||||
self.dataset_name = 'coco_wholebady_hand'
|
||||
|
||||
def _box2cs(self, box):
|
||||
x, y, w, h = box[:4]
|
||||
center = np.zeros((2), dtype=np.float32)
|
||||
center[0] = x + w * 0.5
|
||||
center[1] = y + h * 0.5
|
||||
aspect_ratio = self.trainsize[0] * 1.0 / self.trainsize[1]
|
||||
|
||||
if w > aspect_ratio * h:
|
||||
h = w * 1.0 / aspect_ratio
|
||||
elif w < aspect_ratio * h:
|
||||
w = h * aspect_ratio
|
||||
scale = np.array(
|
||||
[w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
|
||||
dtype=np.float32)
|
||||
if center[0] != -1:
|
||||
scale = scale * 1.25
|
||||
|
||||
return center, scale
|
||||
|
||||
def parse_dataset(self):
|
||||
gt_db = []
|
||||
num_joints = self.ann_info['num_joints']
|
||||
coco = COCO(self.get_anno())
|
||||
img_ids = list(coco.imgs.keys())
|
||||
for img_id in img_ids:
|
||||
im_ann = coco.loadImgs(img_id)[0]
|
||||
image_file = os.path.join(self.img_prefix, im_ann['file_name'])
|
||||
im_id = int(im_ann["id"])
|
||||
|
||||
ann_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False)
|
||||
objs = coco.loadAnns(ann_ids)
|
||||
|
||||
for obj in objs:
|
||||
for type in ['left', 'right']:
|
||||
if (obj[f'{type}hand_valid'] and
|
||||
max(obj[f'{type}hand_kpts']) > 0):
|
||||
|
||||
joints = np.zeros((num_joints, 3), dtype=np.float32)
|
||||
joints_vis = np.zeros((num_joints, 3), dtype=np.float32)
|
||||
|
||||
keypoints = np.array(obj[f'{type}hand_kpts'])
|
||||
keypoints = keypoints.reshape(-1, 3)
|
||||
joints[:, :2] = keypoints[:, :2]
|
||||
joints_vis[:, :2] = np.minimum(1, keypoints[:, 2:3])
|
||||
|
||||
center, scale = self._box2cs(obj[f'{type}hand_box'][:4])
|
||||
gt_db.append({
|
||||
'image_file': image_file,
|
||||
'center': center,
|
||||
'scale': scale,
|
||||
'gt_joints': joints,
|
||||
'joints_vis': joints_vis,
|
||||
'im_id': im_id,
|
||||
})
|
||||
|
||||
self.db = gt_db
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class KeypointTopDownMPIIDataset(KeypointTopDownBaseDataset):
|
||||
"""MPII dataset for topdown pose estimation.
|
||||
|
||||
The dataset loads raw features and apply specified transforms
|
||||
to return a dict containing the image tensors and other information.
|
||||
|
||||
MPII keypoint indexes::
|
||||
|
||||
0: 'right_ankle',
|
||||
1: 'right_knee',
|
||||
2: 'right_hip',
|
||||
3: 'left_hip',
|
||||
4: 'left_knee',
|
||||
5: 'left_ankle',
|
||||
6: 'pelvis',
|
||||
7: 'thorax',
|
||||
8: 'upper_neck',
|
||||
9: 'head_top',
|
||||
10: 'right_wrist',
|
||||
11: 'right_elbow',
|
||||
12: 'right_shoulder',
|
||||
13: 'left_shoulder',
|
||||
14: 'left_elbow',
|
||||
15: 'left_wrist',
|
||||
|
||||
Args:
|
||||
dataset_dir (str): Root path to the dataset.
|
||||
image_dir (str): Path to a directory where images are held.
|
||||
anno_path (str): Relative path to the annotation file.
|
||||
num_joints (int): Keypoint numbers
|
||||
trainsize (list):[w, h] Image target size
|
||||
transform (composed(operators)): A sequence of data transforms.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
dataset_dir,
|
||||
image_dir,
|
||||
anno_path,
|
||||
num_joints,
|
||||
transform=[]):
|
||||
super().__init__(dataset_dir, image_dir, anno_path, num_joints,
|
||||
transform)
|
||||
|
||||
self.dataset_name = 'mpii'
|
||||
|
||||
def parse_dataset(self):
|
||||
with open(self.get_anno()) as anno_file:
|
||||
anno = json.load(anno_file)
|
||||
|
||||
gt_db = []
|
||||
for a in anno:
|
||||
image_name = a['image']
|
||||
im_id = a['image_id'] if 'image_id' in a else int(
|
||||
os.path.splitext(image_name)[0])
|
||||
|
||||
c = np.array(a['center'], dtype=np.float32)
|
||||
s = np.array([a['scale'], a['scale']], dtype=np.float32)
|
||||
|
||||
# Adjust center/scale slightly to avoid cropping limbs
|
||||
if c[0] != -1:
|
||||
c[1] = c[1] + 15 * s[1]
|
||||
s = s * 1.25
|
||||
c = c - 1
|
||||
|
||||
joints = np.zeros(
|
||||
(self.ann_info['num_joints'], 3), dtype=np.float32)
|
||||
joints_vis = np.zeros(
|
||||
(self.ann_info['num_joints'], 3), dtype=np.float32)
|
||||
if 'gt_joints' in a:
|
||||
joints_ = np.array(a['gt_joints'])
|
||||
joints_[:, 0:2] = joints_[:, 0:2] - 1
|
||||
joints_vis_ = np.array(a['joints_vis'])
|
||||
assert len(joints_) == self.ann_info[
|
||||
'num_joints'], 'joint num diff: {} vs {}'.format(
|
||||
len(joints_), self.ann_info['num_joints'])
|
||||
|
||||
joints[:, 0:2] = joints_[:, 0:2]
|
||||
joints_vis[:, 0] = joints_vis_[:]
|
||||
joints_vis[:, 1] = joints_vis_[:]
|
||||
|
||||
gt_db.append({
|
||||
'image_file': os.path.join(self.img_prefix, image_name),
|
||||
'im_id': im_id,
|
||||
'center': c,
|
||||
'scale': s,
|
||||
'gt_joints': joints,
|
||||
'joints_vis': joints_vis
|
||||
})
|
||||
print("number length: {}".format(len(gt_db)))
|
||||
self.db = gt_db
|
||||
Reference in New Issue
Block a user