更换文档检测模型
This commit is contained in:
13
paddle_detection/ppdet/data/crop_utils/__init__.py
Normal file
13
paddle_detection/ppdet/data/crop_utils/__init__.py
Normal file
@@ -0,0 +1,13 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
580
paddle_detection/ppdet/data/crop_utils/annotation_cropper.py
Normal file
580
paddle_detection/ppdet/data/crop_utils/annotation_cropper.py
Normal file
@@ -0,0 +1,580 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import copy
|
||||
import math
|
||||
import random
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
from typing import List, Tuple
|
||||
from collections import defaultdict
|
||||
|
||||
from .chip_box_utils import nms, transform_chip_boxes2image_boxes
|
||||
from .chip_box_utils import find_chips_to_cover_overlaped_boxes
|
||||
from .chip_box_utils import transform_chip_box
|
||||
from .chip_box_utils import intersection_over_box
|
||||
|
||||
|
||||
class AnnoCropper(object):
|
||||
def __init__(self,
|
||||
image_target_sizes: List[int],
|
||||
valid_box_ratio_ranges: List[List[float]],
|
||||
chip_target_size: int,
|
||||
chip_target_stride: int,
|
||||
use_neg_chip: bool=False,
|
||||
max_neg_num_per_im: int=8,
|
||||
max_per_img: int=-1,
|
||||
nms_thresh: int=0.5):
|
||||
"""
|
||||
Generate chips by chip_target_size and chip_target_stride.
|
||||
These two parameters just like kernel_size and stride in cnn.
|
||||
|
||||
Each image has its raw size. After resizing, then get its target size.
|
||||
The resizing scale = target_size / raw_size.
|
||||
So are chips of the image.
|
||||
box_ratio = box_raw_size / image_raw_size = box_target_size / image_target_size
|
||||
The 'size' above mentioned is the size of long-side of image, box or chip.
|
||||
|
||||
:param image_target_sizes: [2000, 1000]
|
||||
:param valid_box_ratio_ranges: [[-1, 0.1],[0.08, -1]]
|
||||
:param chip_target_size: 500
|
||||
:param chip_target_stride: 200
|
||||
"""
|
||||
self.target_sizes = image_target_sizes
|
||||
self.valid_box_ratio_ranges = valid_box_ratio_ranges
|
||||
assert len(self.target_sizes) == len(self.valid_box_ratio_ranges)
|
||||
self.scale_num = len(self.target_sizes)
|
||||
self.chip_target_size = chip_target_size # is target size
|
||||
self.chip_target_stride = chip_target_stride # is target stride
|
||||
self.use_neg_chip = use_neg_chip
|
||||
self.max_neg_num_per_im = max_neg_num_per_im
|
||||
self.max_per_img = max_per_img
|
||||
self.nms_thresh = nms_thresh
|
||||
|
||||
def crop_anno_records(self, records: List[dict]):
|
||||
"""
|
||||
The main logic:
|
||||
# foreach record(image):
|
||||
# foreach scale:
|
||||
# 1 generate chips by chip size and stride for each scale
|
||||
# 2 get pos chips
|
||||
# - validate boxes: current scale; h,w >= 1
|
||||
# - find pos chips greedily by valid gt boxes in each scale
|
||||
# - for every valid gt box, find its corresponding pos chips in each scale
|
||||
# 3 get neg chips
|
||||
# - If given proposals, find neg boxes in them which are not in pos chips
|
||||
# - If got neg boxes in last step, we find neg chips and assign neg boxes to neg chips such as 2.
|
||||
# 4 sample neg chips if too much each image
|
||||
# transform this image-scale annotations to chips(pos chips&neg chips) annotations
|
||||
|
||||
:param records, standard coco_record but with extra key `proposals`(Px4), which are predicted by stage1
|
||||
model and maybe have neg boxes in them.
|
||||
:return: new_records, list of dict like
|
||||
{
|
||||
'im_file': 'fake_image1.jpg',
|
||||
'im_id': np.array([1]), # new _global_chip_id as im_id
|
||||
'h': h, # chip height
|
||||
'w': w, # chip width
|
||||
'is_crowd': is_crowd, # Nx1 -> Mx1
|
||||
'gt_class': gt_class, # Nx1 -> Mx1
|
||||
'gt_bbox': gt_bbox, # Nx4 -> Mx4, 4 represents [x1,y1,x2,y2]
|
||||
'gt_poly': gt_poly, # [None]xN -> [None]xM
|
||||
'chip': [x1, y1, x2, y2] # added
|
||||
}
|
||||
|
||||
Attention:
|
||||
------------------------------>x
|
||||
|
|
||||
| (x1,y1)------
|
||||
| | |
|
||||
| | |
|
||||
| | |
|
||||
| | |
|
||||
| | |
|
||||
| ----------
|
||||
| (x2,y2)
|
||||
|
|
||||
↓
|
||||
y
|
||||
|
||||
If we use [x1, y1, x2, y2] to represent boxes or chips,
|
||||
(x1,y1) is the left-top point which is in the box,
|
||||
but (x2,y2) is the right-bottom point which is not in the box.
|
||||
So x1 in [0, w-1], x2 in [1, w], y1 in [0, h-1], y2 in [1,h].
|
||||
And you can use x2-x1 to get width, and you can use image[y1:y2, x1:x2] to get the box area.
|
||||
"""
|
||||
|
||||
self.chip_records = []
|
||||
self._global_chip_id = 1
|
||||
for r in records:
|
||||
self._cur_im_pos_chips = [
|
||||
] # element: (chip, boxes_idx), chip is [x1, y1, x2, y2], boxes_ids is List[int]
|
||||
self._cur_im_neg_chips = [] # element: (chip, neg_box_num)
|
||||
for scale_i in range(self.scale_num):
|
||||
self._get_current_scale_parameters(scale_i, r)
|
||||
|
||||
# Cx4
|
||||
chips = self._create_chips(r['h'], r['w'], self._cur_scale)
|
||||
|
||||
# # dict: chipid->[box_id, ...]
|
||||
pos_chip2boxes_idx = self._get_valid_boxes_and_pos_chips(
|
||||
r['gt_bbox'], chips)
|
||||
|
||||
# dict: chipid->neg_box_num
|
||||
neg_chip2box_num = self._get_neg_boxes_and_chips(
|
||||
chips,
|
||||
list(pos_chip2boxes_idx.keys()), r.get('proposals', None))
|
||||
|
||||
self._add_to_cur_im_chips(chips, pos_chip2boxes_idx,
|
||||
neg_chip2box_num)
|
||||
|
||||
cur_image_records = self._trans_all_chips2annotations(r)
|
||||
self.chip_records.extend(cur_image_records)
|
||||
return self.chip_records
|
||||
|
||||
def _add_to_cur_im_chips(self, chips, pos_chip2boxes_idx, neg_chip2box_num):
|
||||
for pos_chipid, boxes_idx in pos_chip2boxes_idx.items():
|
||||
chip = np.array(chips[pos_chipid]) # copy chips slice
|
||||
self._cur_im_pos_chips.append((chip, boxes_idx))
|
||||
|
||||
if neg_chip2box_num is None:
|
||||
return
|
||||
|
||||
for neg_chipid, neg_box_num in neg_chip2box_num.items():
|
||||
chip = np.array(chips[neg_chipid])
|
||||
self._cur_im_neg_chips.append((chip, neg_box_num))
|
||||
|
||||
def _trans_all_chips2annotations(self, r):
|
||||
gt_bbox = r['gt_bbox']
|
||||
im_file = r['im_file']
|
||||
is_crowd = r['is_crowd']
|
||||
gt_class = r['gt_class']
|
||||
# gt_poly = r['gt_poly'] # [None]xN
|
||||
# remaining keys: im_id, h, w
|
||||
chip_records = self._trans_pos_chips2annotations(im_file, gt_bbox,
|
||||
is_crowd, gt_class)
|
||||
|
||||
if not self.use_neg_chip:
|
||||
return chip_records
|
||||
|
||||
sampled_neg_chips = self._sample_neg_chips()
|
||||
neg_chip_records = self._trans_neg_chips2annotations(im_file,
|
||||
sampled_neg_chips)
|
||||
chip_records.extend(neg_chip_records)
|
||||
return chip_records
|
||||
|
||||
def _trans_pos_chips2annotations(self, im_file, gt_bbox, is_crowd,
|
||||
gt_class):
|
||||
chip_records = []
|
||||
for chip, boxes_idx in self._cur_im_pos_chips:
|
||||
chip_bbox, final_boxes_idx = transform_chip_box(gt_bbox, boxes_idx,
|
||||
chip)
|
||||
x1, y1, x2, y2 = chip
|
||||
chip_h = y2 - y1
|
||||
chip_w = x2 - x1
|
||||
rec = {
|
||||
'im_file': im_file,
|
||||
'im_id': np.array([self._global_chip_id]),
|
||||
'h': chip_h,
|
||||
'w': chip_w,
|
||||
'gt_bbox': chip_bbox,
|
||||
'is_crowd': is_crowd[final_boxes_idx].copy(),
|
||||
'gt_class': gt_class[final_boxes_idx].copy(),
|
||||
# 'gt_poly': [None] * len(final_boxes_idx),
|
||||
'chip': chip
|
||||
}
|
||||
self._global_chip_id += 1
|
||||
chip_records.append(rec)
|
||||
return chip_records
|
||||
|
||||
def _sample_neg_chips(self):
|
||||
pos_num = len(self._cur_im_pos_chips)
|
||||
neg_num = len(self._cur_im_neg_chips)
|
||||
sample_num = min(pos_num + 2, self.max_neg_num_per_im)
|
||||
assert sample_num >= 1
|
||||
if neg_num <= sample_num:
|
||||
return self._cur_im_neg_chips
|
||||
|
||||
candidate_num = int(sample_num * 1.5)
|
||||
candidate_neg_chips = sorted(
|
||||
self._cur_im_neg_chips, key=lambda x: -x[1])[:candidate_num]
|
||||
random.shuffle(candidate_neg_chips)
|
||||
sampled_neg_chips = candidate_neg_chips[:sample_num]
|
||||
return sampled_neg_chips
|
||||
|
||||
def _trans_neg_chips2annotations(self,
|
||||
im_file: str,
|
||||
sampled_neg_chips: List[Tuple]):
|
||||
chip_records = []
|
||||
for chip, neg_box_num in sampled_neg_chips:
|
||||
x1, y1, x2, y2 = chip
|
||||
chip_h = y2 - y1
|
||||
chip_w = x2 - x1
|
||||
rec = {
|
||||
'im_file': im_file,
|
||||
'im_id': np.array([self._global_chip_id]),
|
||||
'h': chip_h,
|
||||
'w': chip_w,
|
||||
'gt_bbox': np.zeros(
|
||||
(0, 4), dtype=np.float32),
|
||||
'is_crowd': np.zeros(
|
||||
(0, 1), dtype=np.int32),
|
||||
'gt_class': np.zeros(
|
||||
(0, 1), dtype=np.int32),
|
||||
# 'gt_poly': [],
|
||||
'chip': chip
|
||||
}
|
||||
self._global_chip_id += 1
|
||||
chip_records.append(rec)
|
||||
return chip_records
|
||||
|
||||
def _get_current_scale_parameters(self, scale_i, r):
|
||||
im_size = max(r['h'], r['w'])
|
||||
im_target_size = self.target_sizes[scale_i]
|
||||
self._cur_im_size, self._cur_im_target_size = im_size, im_target_size
|
||||
self._cur_scale = self._get_current_scale(im_target_size, im_size)
|
||||
self._cur_valid_ratio_range = self.valid_box_ratio_ranges[scale_i]
|
||||
|
||||
def _get_current_scale(self, im_target_size, im_size):
|
||||
return im_target_size / im_size
|
||||
|
||||
def _create_chips(self, h: int, w: int, scale: float):
|
||||
"""
|
||||
Generate chips by chip_target_size and chip_target_stride.
|
||||
These two parameters just like kernel_size and stride in cnn.
|
||||
:return: chips, Cx4, xy in raw size dimension
|
||||
"""
|
||||
chip_size = self.chip_target_size # omit target for simplicity
|
||||
stride = self.chip_target_stride
|
||||
width = int(scale * w)
|
||||
height = int(scale * h)
|
||||
min_chip_location_diff = 20 # in target size
|
||||
|
||||
assert chip_size >= stride
|
||||
chip_overlap = chip_size - stride
|
||||
if (width - chip_overlap
|
||||
) % stride > min_chip_location_diff: # 不能被stride整除的部分比较大,则保留
|
||||
w_steps = max(1, int(math.ceil((width - chip_overlap) / stride)))
|
||||
else: # 不能被stride整除的部分比较小,则丢弃
|
||||
w_steps = max(1, int(math.floor((width - chip_overlap) / stride)))
|
||||
if (height - chip_overlap) % stride > min_chip_location_diff:
|
||||
h_steps = max(1, int(math.ceil((height - chip_overlap) / stride)))
|
||||
else:
|
||||
h_steps = max(1, int(math.floor((height - chip_overlap) / stride)))
|
||||
|
||||
chips = list()
|
||||
for j in range(h_steps):
|
||||
for i in range(w_steps):
|
||||
x1 = i * stride
|
||||
y1 = j * stride
|
||||
x2 = min(x1 + chip_size, width)
|
||||
y2 = min(y1 + chip_size, height)
|
||||
chips.append([x1, y1, x2, y2])
|
||||
|
||||
# check chip size
|
||||
for item in chips:
|
||||
if item[2] - item[0] > chip_size * 1.1 or item[3] - item[
|
||||
1] > chip_size * 1.1:
|
||||
raise ValueError(item)
|
||||
chips = np.array(chips, dtype=np.float32)
|
||||
|
||||
raw_size_chips = chips / scale
|
||||
return raw_size_chips
|
||||
|
||||
def _get_valid_boxes_and_pos_chips(self, gt_bbox, chips):
|
||||
valid_ratio_range = self._cur_valid_ratio_range
|
||||
im_size = self._cur_im_size
|
||||
scale = self._cur_scale
|
||||
# Nx4 N
|
||||
valid_boxes, valid_boxes_idx = self._validate_boxes(
|
||||
valid_ratio_range, im_size, gt_bbox, scale)
|
||||
# dict: chipid->[box_id, ...]
|
||||
pos_chip2boxes_idx = self._find_pos_chips(chips, valid_boxes,
|
||||
valid_boxes_idx)
|
||||
return pos_chip2boxes_idx
|
||||
|
||||
def _validate_boxes(self,
|
||||
valid_ratio_range: List[float],
|
||||
im_size: int,
|
||||
gt_boxes: 'np.array of Nx4',
|
||||
scale: float):
|
||||
"""
|
||||
:return: valid_boxes: Nx4, valid_boxes_idx: N
|
||||
"""
|
||||
ws = (gt_boxes[:, 2] - gt_boxes[:, 0]).astype(np.int32)
|
||||
hs = (gt_boxes[:, 3] - gt_boxes[:, 1]).astype(np.int32)
|
||||
maxs = np.maximum(ws, hs)
|
||||
box_ratio = maxs / im_size
|
||||
mins = np.minimum(ws, hs)
|
||||
target_mins = mins * scale
|
||||
|
||||
low = valid_ratio_range[0] if valid_ratio_range[0] > 0 else 0
|
||||
high = valid_ratio_range[1] if valid_ratio_range[1] > 0 else np.finfo(
|
||||
np.float32).max
|
||||
|
||||
valid_boxes_idx = np.nonzero((low <= box_ratio) & (box_ratio < high) & (
|
||||
target_mins >= 2))[0]
|
||||
valid_boxes = gt_boxes[valid_boxes_idx]
|
||||
return valid_boxes, valid_boxes_idx
|
||||
|
||||
def _find_pos_chips(self,
|
||||
chips: 'Cx4',
|
||||
valid_boxes: 'Bx4',
|
||||
valid_boxes_idx: 'B'):
|
||||
"""
|
||||
:return: pos_chip2boxes_idx, dict: chipid->[box_id, ...]
|
||||
"""
|
||||
iob = intersection_over_box(chips, valid_boxes) # overlap, CxB
|
||||
|
||||
iob_threshold_to_find_chips = 1.
|
||||
pos_chip_ids, _ = self._find_chips_to_cover_overlaped_boxes(
|
||||
iob, iob_threshold_to_find_chips)
|
||||
pos_chip_ids = set(pos_chip_ids)
|
||||
|
||||
iob_threshold_to_assign_box = 0.5
|
||||
pos_chip2boxes_idx = self._assign_boxes_to_pos_chips(
|
||||
iob, iob_threshold_to_assign_box, pos_chip_ids, valid_boxes_idx)
|
||||
return pos_chip2boxes_idx
|
||||
|
||||
def _find_chips_to_cover_overlaped_boxes(self, iob, overlap_threshold):
|
||||
return find_chips_to_cover_overlaped_boxes(iob, overlap_threshold)
|
||||
|
||||
def _assign_boxes_to_pos_chips(self, iob, overlap_threshold, pos_chip_ids,
|
||||
valid_boxes_idx):
|
||||
chip_ids, box_ids = np.nonzero(iob >= overlap_threshold)
|
||||
pos_chip2boxes_idx = defaultdict(list)
|
||||
for chip_id, box_id in zip(chip_ids, box_ids):
|
||||
if chip_id not in pos_chip_ids:
|
||||
continue
|
||||
raw_gt_box_idx = valid_boxes_idx[box_id]
|
||||
pos_chip2boxes_idx[chip_id].append(raw_gt_box_idx)
|
||||
return pos_chip2boxes_idx
|
||||
|
||||
def _get_neg_boxes_and_chips(self,
|
||||
chips: 'Cx4',
|
||||
pos_chip_ids: 'D',
|
||||
proposals: 'Px4'):
|
||||
"""
|
||||
:param chips:
|
||||
:param pos_chip_ids:
|
||||
:param proposals:
|
||||
:return: neg_chip2box_num, None or dict: chipid->neg_box_num
|
||||
"""
|
||||
if not self.use_neg_chip:
|
||||
return None
|
||||
|
||||
# train proposals maybe None
|
||||
if proposals is None or len(proposals) < 1:
|
||||
return None
|
||||
|
||||
valid_ratio_range = self._cur_valid_ratio_range
|
||||
im_size = self._cur_im_size
|
||||
scale = self._cur_scale
|
||||
|
||||
valid_props, _ = self._validate_boxes(valid_ratio_range, im_size,
|
||||
proposals, scale)
|
||||
neg_boxes = self._find_neg_boxes(chips, pos_chip_ids, valid_props)
|
||||
neg_chip2box_num = self._find_neg_chips(chips, pos_chip_ids, neg_boxes)
|
||||
return neg_chip2box_num
|
||||
|
||||
def _find_neg_boxes(self,
|
||||
chips: 'Cx4',
|
||||
pos_chip_ids: 'D',
|
||||
valid_props: 'Px4'):
|
||||
"""
|
||||
:return: neg_boxes: Nx4
|
||||
"""
|
||||
if len(pos_chip_ids) == 0:
|
||||
return valid_props
|
||||
|
||||
pos_chips = chips[pos_chip_ids]
|
||||
iob = intersection_over_box(pos_chips, valid_props)
|
||||
overlap_per_prop = np.max(iob, axis=0)
|
||||
non_overlap_props_idx = overlap_per_prop < 0.5
|
||||
neg_boxes = valid_props[non_overlap_props_idx]
|
||||
return neg_boxes
|
||||
|
||||
def _find_neg_chips(self, chips: 'Cx4', pos_chip_ids: 'D',
|
||||
neg_boxes: 'Nx4'):
|
||||
"""
|
||||
:return: neg_chip2box_num, dict: chipid->neg_box_num
|
||||
"""
|
||||
neg_chip_ids = np.setdiff1d(np.arange(len(chips)), pos_chip_ids)
|
||||
neg_chips = chips[neg_chip_ids]
|
||||
|
||||
iob = intersection_over_box(neg_chips, neg_boxes)
|
||||
iob_threshold_to_find_chips = 0.7
|
||||
chosen_neg_chip_ids, chip_id2overlap_box_num = \
|
||||
self._find_chips_to_cover_overlaped_boxes(iob, iob_threshold_to_find_chips)
|
||||
|
||||
neg_chipid2box_num = {}
|
||||
for cid in chosen_neg_chip_ids:
|
||||
box_num = chip_id2overlap_box_num[cid]
|
||||
raw_chip_id = neg_chip_ids[cid]
|
||||
neg_chipid2box_num[raw_chip_id] = box_num
|
||||
return neg_chipid2box_num
|
||||
|
||||
def crop_infer_anno_records(self, records: List[dict]):
|
||||
"""
|
||||
transform image record to chips record
|
||||
:param records:
|
||||
:return: new_records, list of dict like
|
||||
{
|
||||
'im_file': 'fake_image1.jpg',
|
||||
'im_id': np.array([1]), # new _global_chip_id as im_id
|
||||
'h': h, # chip height
|
||||
'w': w, # chip width
|
||||
'chip': [x1, y1, x2, y2] # added
|
||||
'ori_im_h': ori_im_h # added, origin image height
|
||||
'ori_im_w': ori_im_w # added, origin image width
|
||||
'scale_i': 0 # added,
|
||||
}
|
||||
"""
|
||||
self.chip_records = []
|
||||
self._global_chip_id = 1 # im_id start from 1
|
||||
self._global_chip_id2img_id = {}
|
||||
|
||||
for r in records:
|
||||
for scale_i in range(self.scale_num):
|
||||
self._get_current_scale_parameters(scale_i, r)
|
||||
# Cx4
|
||||
chips = self._create_chips(r['h'], r['w'], self._cur_scale)
|
||||
cur_img_chip_record = self._get_chips_records(r, chips, scale_i)
|
||||
self.chip_records.extend(cur_img_chip_record)
|
||||
|
||||
return self.chip_records
|
||||
|
||||
def _get_chips_records(self, rec, chips, scale_i):
|
||||
cur_img_chip_records = []
|
||||
ori_im_h = rec["h"]
|
||||
ori_im_w = rec["w"]
|
||||
im_file = rec["im_file"]
|
||||
ori_im_id = rec["im_id"]
|
||||
for id, chip in enumerate(chips):
|
||||
chip_rec = {}
|
||||
x1, y1, x2, y2 = chip
|
||||
chip_h = y2 - y1
|
||||
chip_w = x2 - x1
|
||||
chip_rec["im_file"] = im_file
|
||||
chip_rec["im_id"] = self._global_chip_id
|
||||
chip_rec["h"] = chip_h
|
||||
chip_rec["w"] = chip_w
|
||||
chip_rec["chip"] = chip
|
||||
chip_rec["ori_im_h"] = ori_im_h
|
||||
chip_rec["ori_im_w"] = ori_im_w
|
||||
chip_rec["scale_i"] = scale_i
|
||||
|
||||
self._global_chip_id2img_id[self._global_chip_id] = int(ori_im_id)
|
||||
self._global_chip_id += 1
|
||||
cur_img_chip_records.append(chip_rec)
|
||||
|
||||
return cur_img_chip_records
|
||||
|
||||
def aggregate_chips_detections(self, results, records=None):
|
||||
"""
|
||||
# 1. transform chip dets to image dets
|
||||
# 2. nms boxes per image;
|
||||
# 3. format output results
|
||||
:param results:
|
||||
:param roidb:
|
||||
:return:
|
||||
"""
|
||||
results = deepcopy(results)
|
||||
records = records if records else self.chip_records
|
||||
img_id2bbox = self._transform_chip2image_bboxes(results, records)
|
||||
nms_img_id2bbox = self._nms_dets(img_id2bbox)
|
||||
aggregate_results = self._reformat_results(nms_img_id2bbox)
|
||||
return aggregate_results
|
||||
|
||||
def _transform_chip2image_bboxes(self, results, records):
|
||||
# 1. Transform chip dets to image dets;
|
||||
# 2. Filter valid range;
|
||||
# 3. Reformat and Aggregate chip dets to Get scale_cls_dets
|
||||
img_id2bbox = defaultdict(list)
|
||||
for result in results:
|
||||
bbox_locs = result['bbox']
|
||||
bbox_nums = result['bbox_num']
|
||||
if len(bbox_locs) == 1 and bbox_locs[0][
|
||||
0] == -1: # current batch has no detections
|
||||
# bbox_locs = array([[-1.]], dtype=float32); bbox_nums = [[1]]
|
||||
# MultiClassNMS output: If there is no detected boxes for all images, lod will be set to {1} and Out only contains one value which is -1.
|
||||
continue
|
||||
im_ids = result['im_id'] # replace with range(len(bbox_nums))
|
||||
|
||||
last_bbox_num = 0
|
||||
for idx, im_id in enumerate(im_ids):
|
||||
|
||||
cur_bbox_len = bbox_nums[idx]
|
||||
bboxes = bbox_locs[last_bbox_num:last_bbox_num + cur_bbox_len]
|
||||
last_bbox_num += cur_bbox_len
|
||||
# box: [num_id, score, xmin, ymin, xmax, ymax]
|
||||
if len(bboxes) == 0: # current image has no detections
|
||||
continue
|
||||
|
||||
chip_rec = records[int(im_id) -
|
||||
1] # im_id starts from 1, type is np.int64
|
||||
image_size = max(chip_rec["ori_im_h"], chip_rec["ori_im_w"])
|
||||
|
||||
bboxes = transform_chip_boxes2image_boxes(
|
||||
bboxes, chip_rec["chip"], chip_rec["ori_im_h"],
|
||||
chip_rec["ori_im_w"])
|
||||
|
||||
scale_i = chip_rec["scale_i"]
|
||||
cur_scale = self._get_current_scale(self.target_sizes[scale_i],
|
||||
image_size)
|
||||
_, valid_boxes_idx = self._validate_boxes(
|
||||
self.valid_box_ratio_ranges[scale_i], image_size,
|
||||
bboxes[:, 2:], cur_scale)
|
||||
ori_img_id = self._global_chip_id2img_id[int(im_id)]
|
||||
|
||||
img_id2bbox[ori_img_id].append(bboxes[valid_boxes_idx])
|
||||
|
||||
return img_id2bbox
|
||||
|
||||
def _nms_dets(self, img_id2bbox):
|
||||
# 1. NMS on each image-class
|
||||
# 2. Limit number of detections to MAX_PER_IMAGE if requested
|
||||
max_per_img = self.max_per_img
|
||||
nms_thresh = self.nms_thresh
|
||||
|
||||
for img_id in img_id2bbox:
|
||||
box = img_id2bbox[
|
||||
img_id] # list of np.array of shape [N, 6], 6 is [label, score, x1, y1, x2, y2]
|
||||
box = np.concatenate(box, axis=0)
|
||||
nms_dets = nms(box, nms_thresh)
|
||||
if max_per_img > 0:
|
||||
if len(nms_dets) > max_per_img:
|
||||
keep = np.argsort(-nms_dets[:, 1])[:max_per_img]
|
||||
nms_dets = nms_dets[keep]
|
||||
|
||||
img_id2bbox[img_id] = nms_dets
|
||||
|
||||
return img_id2bbox
|
||||
|
||||
def _reformat_results(self, img_id2bbox):
|
||||
"""reformat results"""
|
||||
im_ids = img_id2bbox.keys()
|
||||
results = []
|
||||
for img_id in im_ids: # output by original im_id order
|
||||
if len(img_id2bbox[img_id]) == 0:
|
||||
bbox = np.array(
|
||||
[[-1., 0., 0., 0., 0., 0.]]) # edge case: no detections
|
||||
bbox_num = np.array([0])
|
||||
else:
|
||||
# np.array of shape [N, 6], 6 is [label, score, x1, y1, x2, y2]
|
||||
bbox = img_id2bbox[img_id]
|
||||
bbox_num = np.array([len(bbox)])
|
||||
res = dict(im_id=np.array([[img_id]]), bbox=bbox, bbox_num=bbox_num)
|
||||
results.append(res)
|
||||
return results
|
||||
170
paddle_detection/ppdet/data/crop_utils/chip_box_utils.py
Normal file
170
paddle_detection/ppdet/data/crop_utils/chip_box_utils.py
Normal file
@@ -0,0 +1,170 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def bbox_area(boxes):
|
||||
return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
|
||||
|
||||
|
||||
def intersection_over_box(chips, boxes):
|
||||
"""
|
||||
intersection area over box area
|
||||
:param chips: C
|
||||
:param boxes: B
|
||||
:return: iob, CxB
|
||||
"""
|
||||
M = chips.shape[0]
|
||||
N = boxes.shape[0]
|
||||
if M * N == 0:
|
||||
return np.zeros([M, N], dtype='float32')
|
||||
|
||||
box_area = bbox_area(boxes) # B
|
||||
|
||||
inter_x2y2 = np.minimum(np.expand_dims(chips, 1)[:, :, 2:],
|
||||
boxes[:, 2:]) # CxBX2
|
||||
inter_x1y1 = np.maximum(np.expand_dims(chips, 1)[:, :, :2],
|
||||
boxes[:, :2]) # CxBx2
|
||||
inter_wh = inter_x2y2 - inter_x1y1
|
||||
inter_wh = np.clip(inter_wh, a_min=0, a_max=None)
|
||||
inter_area = inter_wh[:, :, 0] * inter_wh[:, :, 1] # CxB
|
||||
|
||||
iob = inter_area / np.expand_dims(box_area, 0)
|
||||
return iob
|
||||
|
||||
|
||||
def clip_boxes(boxes, im_shape):
|
||||
"""
|
||||
Clip boxes to image boundaries.
|
||||
:param boxes: [N, 4]
|
||||
:param im_shape: tuple of 2, [h, w]
|
||||
:return: [N, 4]
|
||||
"""
|
||||
# x1 >= 0
|
||||
boxes[:, 0] = np.clip(boxes[:, 0], 0, im_shape[1] - 1)
|
||||
# y1 >= 0
|
||||
boxes[:, 1] = np.clip(boxes[:, 1], 0, im_shape[0] - 1)
|
||||
# x2 < im_shape[1]
|
||||
boxes[:, 2] = np.clip(boxes[:, 2], 1, im_shape[1])
|
||||
# y2 < im_shape[0]
|
||||
boxes[:, 3] = np.clip(boxes[:, 3], 1, im_shape[0])
|
||||
return boxes
|
||||
|
||||
|
||||
def transform_chip_box(gt_bbox: 'Gx4', boxes_idx: 'B', chip: '4'):
|
||||
boxes_idx = np.array(boxes_idx)
|
||||
cur_gt_bbox = gt_bbox[boxes_idx].copy() # Bx4
|
||||
x1, y1, x2, y2 = chip
|
||||
cur_gt_bbox[:, 0] -= x1
|
||||
cur_gt_bbox[:, 1] -= y1
|
||||
cur_gt_bbox[:, 2] -= x1
|
||||
cur_gt_bbox[:, 3] -= y1
|
||||
h = y2 - y1
|
||||
w = x2 - x1
|
||||
cur_gt_bbox = clip_boxes(cur_gt_bbox, (h, w))
|
||||
ws = (cur_gt_bbox[:, 2] - cur_gt_bbox[:, 0]).astype(np.int32)
|
||||
hs = (cur_gt_bbox[:, 3] - cur_gt_bbox[:, 1]).astype(np.int32)
|
||||
valid_idx = (ws >= 2) & (hs >= 2)
|
||||
return cur_gt_bbox[valid_idx], boxes_idx[valid_idx]
|
||||
|
||||
|
||||
def find_chips_to_cover_overlaped_boxes(iob, overlap_threshold):
|
||||
chip_ids, box_ids = np.nonzero(iob >= overlap_threshold)
|
||||
chip_id2overlap_box_num = np.bincount(chip_ids) # 1d array
|
||||
chip_id2overlap_box_num = np.pad(
|
||||
chip_id2overlap_box_num, (0, len(iob) - len(chip_id2overlap_box_num)),
|
||||
constant_values=0)
|
||||
|
||||
chosen_chip_ids = []
|
||||
while len(box_ids) > 0:
|
||||
value_counts = np.bincount(chip_ids) # 1d array
|
||||
max_count_chip_id = np.argmax(value_counts)
|
||||
assert max_count_chip_id not in chosen_chip_ids
|
||||
chosen_chip_ids.append(max_count_chip_id)
|
||||
|
||||
box_ids_in_cur_chip = box_ids[chip_ids == max_count_chip_id]
|
||||
ids_not_in_cur_boxes_mask = np.logical_not(
|
||||
np.isin(box_ids, box_ids_in_cur_chip))
|
||||
chip_ids = chip_ids[ids_not_in_cur_boxes_mask]
|
||||
box_ids = box_ids[ids_not_in_cur_boxes_mask]
|
||||
return chosen_chip_ids, chip_id2overlap_box_num
|
||||
|
||||
|
||||
def transform_chip_boxes2image_boxes(chip_boxes, chip, img_h, img_w):
|
||||
chip_boxes = np.array(sorted(chip_boxes, key=lambda item: -item[1]))
|
||||
xmin, ymin, _, _ = chip
|
||||
# Transform to origin image loc
|
||||
chip_boxes[:, 2] += xmin
|
||||
chip_boxes[:, 4] += xmin
|
||||
chip_boxes[:, 3] += ymin
|
||||
chip_boxes[:, 5] += ymin
|
||||
chip_boxes = clip_boxes(chip_boxes, (img_h, img_w))
|
||||
return chip_boxes
|
||||
|
||||
|
||||
def nms(dets, thresh):
|
||||
"""Apply classic DPM-style greedy NMS."""
|
||||
if dets.shape[0] == 0:
|
||||
return dets[[], :]
|
||||
scores = dets[:, 1]
|
||||
x1 = dets[:, 2]
|
||||
y1 = dets[:, 3]
|
||||
x2 = dets[:, 4]
|
||||
y2 = dets[:, 5]
|
||||
|
||||
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
||||
order = scores.argsort()[::-1]
|
||||
|
||||
ndets = dets.shape[0]
|
||||
suppressed = np.zeros((ndets), dtype=np.int32)
|
||||
|
||||
# nominal indices
|
||||
# _i, _j
|
||||
# sorted indices
|
||||
# i, j
|
||||
# temp variables for box i's (the box currently under consideration)
|
||||
# ix1, iy1, ix2, iy2, iarea
|
||||
|
||||
# variables for computing overlap with box j (lower scoring box)
|
||||
# xx1, yy1, xx2, yy2
|
||||
# w, h
|
||||
# inter, ovr
|
||||
|
||||
for _i in range(ndets):
|
||||
i = order[_i]
|
||||
if suppressed[i] == 1:
|
||||
continue
|
||||
ix1 = x1[i]
|
||||
iy1 = y1[i]
|
||||
ix2 = x2[i]
|
||||
iy2 = y2[i]
|
||||
iarea = areas[i]
|
||||
for _j in range(_i + 1, ndets):
|
||||
j = order[_j]
|
||||
if suppressed[j] == 1:
|
||||
continue
|
||||
xx1 = max(ix1, x1[j])
|
||||
yy1 = max(iy1, y1[j])
|
||||
xx2 = min(ix2, x2[j])
|
||||
yy2 = min(iy2, y2[j])
|
||||
w = max(0.0, xx2 - xx1 + 1)
|
||||
h = max(0.0, yy2 - yy1 + 1)
|
||||
inter = w * h
|
||||
ovr = inter / (iarea + areas[j] - inter)
|
||||
if ovr >= thresh:
|
||||
suppressed[j] = 1
|
||||
keep = np.where(suppressed == 0)[0]
|
||||
dets = dets[keep, :]
|
||||
return dets
|
||||
Reference in New Issue
Block a user