更换文档检测模型

This commit is contained in:
2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions

View File

@@ -0,0 +1,25 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import matching
from . import tracker
from . import motion
from . import visualization
from . import utils
from .matching import *
from .tracker import *
from .motion import *
from .visualization import *
from .utils import *

View File

@@ -0,0 +1,21 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import jde_matching
from . import deepsort_matching
from . import ocsort_matching
from .jde_matching import *
from .deepsort_matching import *
from .ocsort_matching import *

View File

@@ -0,0 +1,379 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/nwojke/deep_sort/tree/master/deep_sort
"""
import numpy as np
from scipy.optimize import linear_sum_assignment
from ..motion import kalman_filter
INFTY_COST = 1e+5
__all__ = [
'iou_1toN',
'iou_cost',
'_nn_euclidean_distance',
'_nn_cosine_distance',
'NearestNeighborDistanceMetric',
'min_cost_matching',
'matching_cascade',
'gate_cost_matrix',
]
def iou_1toN(bbox, candidates):
"""
Computer intersection over union (IoU) by one box to N candidates.
Args:
bbox (ndarray): A bounding box in format `(top left x, top left y, width, height)`.
candidates (ndarray): A matrix of candidate bounding boxes (one per row) in the
same format as `bbox`.
Returns:
ious (ndarray): The intersection over union in [0, 1] between the `bbox`
and each candidate. A higher score means a larger fraction of the
`bbox` is occluded by the candidate.
"""
bbox_tl = bbox[:2]
bbox_br = bbox[:2] + bbox[2:]
candidates_tl = candidates[:, :2]
candidates_br = candidates[:, :2] + candidates[:, 2:]
tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
wh = np.maximum(0., br - tl)
area_intersection = wh.prod(axis=1)
area_bbox = bbox[2:].prod()
area_candidates = candidates[:, 2:].prod(axis=1)
ious = area_intersection / (area_bbox + area_candidates - area_intersection)
return ious
def iou_cost(tracks, detections, track_indices=None, detection_indices=None):
"""
IoU distance metric.
Args:
tracks (list[Track]): A list of tracks.
detections (list[Detection]): A list of detections.
track_indices (Optional[list[int]]): A list of indices to tracks that
should be matched. Defaults to all `tracks`.
detection_indices (Optional[list[int]]): A list of indices to detections
that should be matched. Defaults to all `detections`.
Returns:
cost_matrix (ndarray): A cost matrix of shape len(track_indices),
len(detection_indices) where entry (i, j) is
`1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
"""
if track_indices is None:
track_indices = np.arange(len(tracks))
if detection_indices is None:
detection_indices = np.arange(len(detections))
cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
for row, track_idx in enumerate(track_indices):
if tracks[track_idx].time_since_update > 1:
cost_matrix[row, :] = 1e+5
continue
bbox = tracks[track_idx].to_tlwh()
candidates = np.asarray([detections[i].tlwh for i in detection_indices])
cost_matrix[row, :] = 1. - iou_1toN(bbox, candidates)
return cost_matrix
def _nn_euclidean_distance(s, q):
"""
Compute pair-wise squared (Euclidean) distance between points in `s` and `q`.
Args:
s (ndarray): Sample points: an NxM matrix of N samples of dimensionality M.
q (ndarray): Query points: an LxM matrix of L samples of dimensionality M.
Returns:
distances (ndarray): A vector of length M that contains for each entry in `q` the
smallest Euclidean distance to a sample in `s`.
"""
s, q = np.asarray(s), np.asarray(q)
if len(s) == 0 or len(q) == 0:
return np.zeros((len(s), len(q)))
s2, q2 = np.square(s).sum(axis=1), np.square(q).sum(axis=1)
distances = -2. * np.dot(s, q.T) + s2[:, None] + q2[None, :]
distances = np.clip(distances, 0., float(np.inf))
return np.maximum(0.0, distances.min(axis=0))
def _nn_cosine_distance(s, q):
"""
Compute pair-wise cosine distance between points in `s` and `q`.
Args:
s (ndarray): Sample points: an NxM matrix of N samples of dimensionality M.
q (ndarray): Query points: an LxM matrix of L samples of dimensionality M.
Returns:
distances (ndarray): A vector of length M that contains for each entry in `q` the
smallest Euclidean distance to a sample in `s`.
"""
s = np.asarray(s) / np.linalg.norm(s, axis=1, keepdims=True)
q = np.asarray(q) / np.linalg.norm(q, axis=1, keepdims=True)
distances = 1. - np.dot(s, q.T)
return distances.min(axis=0)
class NearestNeighborDistanceMetric(object):
"""
A nearest neighbor distance metric that, for each target, returns
the closest distance to any sample that has been observed so far.
Args:
metric (str): Either "euclidean" or "cosine".
matching_threshold (float): The matching threshold. Samples with larger
distance are considered an invalid match.
budget (Optional[int]): If not None, fix samples per class to at most
this number. Removes the oldest samples when the budget is reached.
Attributes:
samples (Dict[int -> List[ndarray]]): A dictionary that maps from target
identities to the list of samples that have been observed so far.
"""
def __init__(self, metric, matching_threshold, budget=None):
if metric == "euclidean":
self._metric = _nn_euclidean_distance
elif metric == "cosine":
self._metric = _nn_cosine_distance
else:
raise ValueError(
"Invalid metric; must be either 'euclidean' or 'cosine'")
self.matching_threshold = matching_threshold
self.budget = budget
self.samples = {}
def partial_fit(self, features, targets, active_targets):
"""
Update the distance metric with new data.
Args:
features (ndarray): An NxM matrix of N features of dimensionality M.
targets (ndarray): An integer array of associated target identities.
active_targets (List[int]): A list of targets that are currently
present in the scene.
"""
for feature, target in zip(features, targets):
self.samples.setdefault(target, []).append(feature)
if self.budget is not None:
self.samples[target] = self.samples[target][-self.budget:]
self.samples = {k: self.samples[k] for k in active_targets}
def distance(self, features, targets):
"""
Compute distance between features and targets.
Args:
features (ndarray): An NxM matrix of N features of dimensionality M.
targets (list[int]): A list of targets to match the given `features` against.
Returns:
cost_matrix (ndarray): a cost matrix of shape len(targets), len(features),
where element (i, j) contains the closest squared distance between
`targets[i]` and `features[j]`.
"""
cost_matrix = np.zeros((len(targets), len(features)))
for i, target in enumerate(targets):
cost_matrix[i, :] = self._metric(self.samples[target], features)
return cost_matrix
def min_cost_matching(distance_metric,
max_distance,
tracks,
detections,
track_indices=None,
detection_indices=None):
"""
Solve linear assignment problem.
Args:
distance_metric :
Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
The distance metric is given a list of tracks and detections as
well as a list of N track indices and M detection indices. The
metric should return the NxM dimensional cost matrix, where element
(i, j) is the association cost between the i-th track in the given
track indices and the j-th detection in the given detection_indices.
max_distance (float): Gating threshold. Associations with cost larger
than this value are disregarded.
tracks (list[Track]): A list of predicted tracks at the current time
step.
detections (list[Detection]): A list of detections at the current time
step.
track_indices (list[int]): List of track indices that maps rows in
`cost_matrix` to tracks in `tracks`.
detection_indices (List[int]): List of detection indices that maps
columns in `cost_matrix` to detections in `detections`.
Returns:
A tuple (List[(int, int)], List[int], List[int]) with the following
three entries:
* A list of matched track and detection indices.
* A list of unmatched track indices.
* A list of unmatched detection indices.
"""
if track_indices is None:
track_indices = np.arange(len(tracks))
if detection_indices is None:
detection_indices = np.arange(len(detections))
if len(detection_indices) == 0 or len(track_indices) == 0:
return [], track_indices, detection_indices # Nothing to match.
cost_matrix = distance_metric(tracks, detections, track_indices,
detection_indices)
cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
indices = linear_sum_assignment(cost_matrix)
matches, unmatched_tracks, unmatched_detections = [], [], []
for col, detection_idx in enumerate(detection_indices):
if col not in indices[1]:
unmatched_detections.append(detection_idx)
for row, track_idx in enumerate(track_indices):
if row not in indices[0]:
unmatched_tracks.append(track_idx)
for row, col in zip(indices[0], indices[1]):
track_idx = track_indices[row]
detection_idx = detection_indices[col]
if cost_matrix[row, col] > max_distance:
unmatched_tracks.append(track_idx)
unmatched_detections.append(detection_idx)
else:
matches.append((track_idx, detection_idx))
return matches, unmatched_tracks, unmatched_detections
def matching_cascade(distance_metric,
max_distance,
cascade_depth,
tracks,
detections,
track_indices=None,
detection_indices=None):
"""
Run matching cascade.
Args:
distance_metric :
Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
The distance metric is given a list of tracks and detections as
well as a list of N track indices and M detection indices. The
metric should return the NxM dimensional cost matrix, where element
(i, j) is the association cost between the i-th track in the given
track indices and the j-th detection in the given detection_indices.
max_distance (float): Gating threshold. Associations with cost larger
than this value are disregarded.
cascade_depth (int): The cascade depth, should be se to the maximum
track age.
tracks (list[Track]): A list of predicted tracks at the current time
step.
detections (list[Detection]): A list of detections at the current time
step.
track_indices (list[int]): List of track indices that maps rows in
`cost_matrix` to tracks in `tracks`.
detection_indices (List[int]): List of detection indices that maps
columns in `cost_matrix` to detections in `detections`.
Returns:
A tuple (List[(int, int)], List[int], List[int]) with the following
three entries:
* A list of matched track and detection indices.
* A list of unmatched track indices.
* A list of unmatched detection indices.
"""
if track_indices is None:
track_indices = list(range(len(tracks)))
if detection_indices is None:
detection_indices = list(range(len(detections)))
unmatched_detections = detection_indices
matches = []
for level in range(cascade_depth):
if len(unmatched_detections) == 0: # No detections left
break
track_indices_l = [
k for k in track_indices if tracks[k].time_since_update == 1 + level
]
if len(track_indices_l) == 0: # Nothing to match at this level
continue
matches_l, _, unmatched_detections = \
min_cost_matching(
distance_metric, max_distance, tracks, detections,
track_indices_l, unmatched_detections)
matches += matches_l
unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
return matches, unmatched_tracks, unmatched_detections
def gate_cost_matrix(kf,
cost_matrix,
tracks,
detections,
track_indices,
detection_indices,
gated_cost=INFTY_COST,
only_position=False):
"""
Invalidate infeasible entries in cost matrix based on the state
distributions obtained by Kalman filtering.
Args:
kf (object): The Kalman filter.
cost_matrix (ndarray): The NxM dimensional cost matrix, where N is the
number of track indices and M is the number of detection indices,
such that entry (i, j) is the association cost between
`tracks[track_indices[i]]` and `detections[detection_indices[j]]`.
tracks (list[Track]): A list of predicted tracks at the current time
step.
detections (list[Detection]): A list of detections at the current time
step.
track_indices (List[int]): List of track indices that maps rows in
`cost_matrix` to tracks in `tracks`.
detection_indices (List[int]): List of detection indices that maps
columns in `cost_matrix` to detections in `detections`.
gated_cost (Optional[float]): Entries in the cost matrix corresponding
to infeasible associations are set this value. Defaults to a very
large value.
only_position (Optional[bool]): If True, only the x, y position of the
state distribution is considered during gating. Default False.
"""
gating_dim = 2 if only_position else 4
gating_threshold = kalman_filter.chi2inv95[gating_dim]
measurements = np.asarray(
[detections[i].to_xyah() for i in detection_indices])
for row, track_idx in enumerate(track_indices):
track = tracks[track_idx]
gating_distance = kf.gating_distance(track.mean, track.covariance,
measurements, only_position)
cost_matrix[row, gating_distance > gating_threshold] = gated_cost
return cost_matrix

View File

@@ -0,0 +1,163 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/matching.py
"""
try:
import lap
except:
print(
'Warning: Unable to use JDE/FairMOT/ByteTrack, please install lap, for example: `pip install lap`, see https://github.com/gatagat/lap'
)
pass
import scipy
import numpy as np
from scipy.spatial.distance import cdist
from ..motion import kalman_filter
import warnings
warnings.filterwarnings("ignore")
__all__ = [
'merge_matches',
'linear_assignment',
'bbox_ious',
'iou_distance',
'embedding_distance',
'fuse_motion',
]
def merge_matches(m1, m2, shape):
O, P, Q = shape
m1 = np.asarray(m1)
m2 = np.asarray(m2)
M1 = scipy.sparse.coo_matrix(
(np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
M2 = scipy.sparse.coo_matrix(
(np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
mask = M1 * M2
match = mask.nonzero()
match = list(zip(match[0], match[1]))
unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
return match, unmatched_O, unmatched_Q
def linear_assignment(cost_matrix, thresh):
try:
import lap
except Exception as e:
raise RuntimeError(
'Unable to use JDE/FairMOT/ByteTrack, please install lap, for example: `pip install lap`, see https://github.com/gatagat/lap'
)
if cost_matrix.size == 0:
return np.empty(
(0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(
range(cost_matrix.shape[1]))
matches, unmatched_a, unmatched_b = [], [], []
cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
for ix, mx in enumerate(x):
if mx >= 0:
matches.append([ix, mx])
unmatched_a = np.where(x < 0)[0]
unmatched_b = np.where(y < 0)[0]
matches = np.asarray(matches)
return matches, unmatched_a, unmatched_b
def bbox_ious(atlbrs, btlbrs):
boxes = np.ascontiguousarray(atlbrs, dtype=np.float32)
query_boxes = np.ascontiguousarray(btlbrs, dtype=np.float32)
N = boxes.shape[0]
K = query_boxes.shape[0]
ious = np.zeros((N, K), dtype=boxes.dtype)
if N * K == 0:
return ious
for k in range(K):
box_area = ((query_boxes[k, 2] - query_boxes[k, 0] + 1) *
(query_boxes[k, 3] - query_boxes[k, 1] + 1))
for n in range(N):
iw = (min(boxes[n, 2], query_boxes[k, 2]) - max(
boxes[n, 0], query_boxes[k, 0]) + 1)
if iw > 0:
ih = (min(boxes[n, 3], query_boxes[k, 3]) - max(
boxes[n, 1], query_boxes[k, 1]) + 1)
if ih > 0:
ua = float((boxes[n, 2] - boxes[n, 0] + 1) * (boxes[
n, 3] - boxes[n, 1] + 1) + box_area - iw * ih)
ious[n, k] = iw * ih / ua
return ious
def iou_distance(atracks, btracks):
"""
Compute cost based on IoU between two list[STrack].
"""
if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) or (
len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
atlbrs = atracks
btlbrs = btracks
else:
atlbrs = [track.tlbr for track in atracks]
btlbrs = [track.tlbr for track in btracks]
_ious = bbox_ious(atlbrs, btlbrs)
cost_matrix = 1 - _ious
return cost_matrix
def embedding_distance(tracks, detections, metric='euclidean'):
"""
Compute cost based on features between two list[STrack].
"""
cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float32)
if cost_matrix.size == 0:
return cost_matrix
det_features = np.asarray(
[track.curr_feat for track in detections], dtype=np.float32)
track_features = np.asarray(
[track.smooth_feat for track in tracks], dtype=np.float32)
cost_matrix = np.maximum(0.0, cdist(track_features, det_features,
metric)) # Nomalized features
return cost_matrix
def fuse_motion(kf,
cost_matrix,
tracks,
detections,
only_position=False,
lambda_=0.98):
if cost_matrix.size == 0:
return cost_matrix
gating_dim = 2 if only_position else 4
gating_threshold = kalman_filter.chi2inv95[gating_dim]
measurements = np.asarray([det.to_xyah() for det in detections])
for row, track in enumerate(tracks):
gating_distance = kf.gating_distance(
track.mean,
track.covariance,
measurements,
only_position,
metric='maha')
cost_matrix[row, gating_distance > gating_threshold] = np.inf
cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_
) * gating_distance
return cost_matrix

View File

@@ -0,0 +1,165 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/noahcao/OC_SORT/blob/master/trackers/ocsort_tracker/association.py
"""
import os
import numpy as np
def iou_batch(bboxes1, bboxes2):
bboxes2 = np.expand_dims(bboxes2, 0)
bboxes1 = np.expand_dims(bboxes1, 1)
xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])
yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])
xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])
yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])
w = np.maximum(0., xx2 - xx1)
h = np.maximum(0., yy2 - yy1)
area = w * h
iou_matrix = area / ((bboxes1[..., 2] - bboxes1[..., 0]) *
(bboxes1[..., 3] - bboxes1[..., 1]) +
(bboxes2[..., 2] - bboxes2[..., 0]) *
(bboxes2[..., 3] - bboxes2[..., 1]) - area)
return iou_matrix
def speed_direction_batch(dets, tracks):
tracks = tracks[..., np.newaxis]
CX1, CY1 = (dets[:, 0] + dets[:, 2]) / 2.0, (dets[:, 1] + dets[:, 3]) / 2.0
CX2, CY2 = (tracks[:, 0] + tracks[:, 2]) / 2.0, (
tracks[:, 1] + tracks[:, 3]) / 2.0
dx = CX1 - CX2
dy = CY1 - CY2
norm = np.sqrt(dx**2 + dy**2) + 1e-6
dx = dx / norm
dy = dy / norm
return dy, dx
def linear_assignment(cost_matrix):
try:
import lap
_, x, y = lap.lapjv(cost_matrix, extend_cost=True)
return np.array([[y[i], i] for i in x if i >= 0])
except ImportError:
from scipy.optimize import linear_sum_assignment
x, y = linear_sum_assignment(cost_matrix)
return np.array(list(zip(x, y)))
def associate(detections, trackers, iou_threshold, velocities, previous_obs,
vdc_weight):
if (len(trackers) == 0):
return np.empty(
(0, 2), dtype=int), np.arange(len(detections)), np.empty(
(0, 5), dtype=int)
Y, X = speed_direction_batch(detections, previous_obs)
inertia_Y, inertia_X = velocities[:, 0], velocities[:, 1]
inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1)
inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1)
diff_angle_cos = inertia_X * X + inertia_Y * Y
diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1)
diff_angle = np.arccos(diff_angle_cos)
diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi
valid_mask = np.ones(previous_obs.shape[0])
valid_mask[np.where(previous_obs[:, 4] < 0)] = 0
iou_matrix = iou_batch(detections, trackers)
scores = np.repeat(
detections[:, -1][:, np.newaxis], trackers.shape[0], axis=1)
# iou_matrix = iou_matrix * scores # a trick sometiems works, we don't encourage this
valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1)
angle_diff_cost = (valid_mask * diff_angle) * vdc_weight
angle_diff_cost = angle_diff_cost.T
angle_diff_cost = angle_diff_cost * scores
if min(iou_matrix.shape) > 0:
a = (iou_matrix > iou_threshold).astype(np.int32)
if a.sum(1).max() == 1 and a.sum(0).max() == 1:
matched_indices = np.stack(np.where(a), axis=1)
else:
matched_indices = linear_assignment(-(iou_matrix + angle_diff_cost))
else:
matched_indices = np.empty(shape=(0, 2))
unmatched_detections = []
for d, det in enumerate(detections):
if (d not in matched_indices[:, 0]):
unmatched_detections.append(d)
unmatched_trackers = []
for t, trk in enumerate(trackers):
if (t not in matched_indices[:, 1]):
unmatched_trackers.append(t)
# filter out matched with low IOU
matches = []
for m in matched_indices:
if (iou_matrix[m[0], m[1]] < iou_threshold):
unmatched_detections.append(m[0])
unmatched_trackers.append(m[1])
else:
matches.append(m.reshape(1, 2))
if (len(matches) == 0):
matches = np.empty((0, 2), dtype=int)
else:
matches = np.concatenate(matches, axis=0)
return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
def associate_only_iou(detections, trackers, iou_threshold):
if (len(trackers) == 0):
return np.empty(
(0, 2), dtype=int), np.arange(len(detections)), np.empty(
(0, 5), dtype=int)
iou_matrix = iou_batch(detections, trackers)
if min(iou_matrix.shape) > 0:
a = (iou_matrix > iou_threshold).astype(np.int32)
if a.sum(1).max() == 1 and a.sum(0).max() == 1:
matched_indices = np.stack(np.where(a), axis=1)
else:
matched_indices = linear_assignment(-iou_matrix)
else:
matched_indices = np.empty(shape=(0, 2))
unmatched_detections = []
for d, det in enumerate(detections):
if (d not in matched_indices[:, 0]):
unmatched_detections.append(d)
unmatched_trackers = []
for t, trk in enumerate(trackers):
if (t not in matched_indices[:, 1]):
unmatched_trackers.append(t)
# filter out matched with low IOU
matches = []
for m in matched_indices:
if (iou_matrix[m[0], m[1]] < iou_threshold):
unmatched_detections.append(m[0])
unmatched_trackers.append(m[1])
else:
matches.append(m.reshape(1, 2))
if (len(matches) == 0):
matches = np.empty((0, 2), dtype=int)
else:
matches = np.concatenate(matches, axis=0)
return matches, np.array(unmatched_detections), np.array(unmatched_trackers)

View File

@@ -0,0 +1,18 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import kalman_filter
from .kalman_filter import *
from .gmc import *

View File

@@ -0,0 +1,368 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/WWangYuHsiang/SMILEtrack/blob/main/BoT-SORT/tracker/gmc.py
"""
import cv2
import matplotlib.pyplot as plt
import numpy as np
import copy
import time
from ppdet.core.workspace import register, serializable
@register
@serializable
class GMC:
def __init__(self, method='sparseOptFlow', downscale=2, verbose=None):
super(GMC, self).__init__()
self.method = method
self.downscale = max(1, int(downscale))
if self.method == 'orb':
self.detector = cv2.FastFeatureDetector_create(20)
self.extractor = cv2.ORB_create()
self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)
elif self.method == 'sift':
self.detector = cv2.SIFT_create(
nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
self.extractor = cv2.SIFT_create(
nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
self.matcher = cv2.BFMatcher(cv2.NORM_L2)
elif self.method == 'ecc':
number_of_iterations = 5000
termination_eps = 1e-6
self.warp_mode = cv2.MOTION_EUCLIDEAN
self.criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT,
number_of_iterations, termination_eps)
elif self.method == 'sparseOptFlow':
self.feature_params = dict(
maxCorners=1000,
qualityLevel=0.01,
minDistance=1,
blockSize=3,
useHarrisDetector=False,
k=0.04)
# self.gmc_file = open('GMC_results.txt', 'w')
elif self.method == 'file' or self.method == 'files':
seqName = verbose[0]
ablation = verbose[1]
if ablation:
filePath = r'tracker/GMC_files/MOT17_ablation'
else:
filePath = r'tracker/GMC_files/MOTChallenge'
if '-FRCNN' in seqName:
seqName = seqName[:-6]
elif '-DPM' in seqName:
seqName = seqName[:-4]
elif '-SDP' in seqName:
seqName = seqName[:-4]
self.gmcFile = open(filePath + "/GMC-" + seqName + ".txt", 'r')
if self.gmcFile is None:
raise ValueError("Error: Unable to open GMC file in directory:"
+ filePath)
elif self.method == 'none' or self.method == 'None':
self.method = 'none'
else:
raise ValueError("Error: Unknown CMC method:" + method)
self.prevFrame = None
self.prevKeyPoints = None
self.prevDescriptors = None
self.initializedFirstFrame = False
def apply(self, raw_frame, detections=None):
if self.method == 'orb' or self.method == 'sift':
return self.applyFeaures(raw_frame, detections)
elif self.method == 'ecc':
return self.applyEcc(raw_frame, detections)
elif self.method == 'sparseOptFlow':
return self.applySparseOptFlow(raw_frame, detections)
elif self.method == 'file':
return self.applyFile(raw_frame, detections)
elif self.method == 'none':
return np.eye(2, 3)
else:
return np.eye(2, 3)
def applyEcc(self, raw_frame, detections=None):
# Initialize
height, width, _ = raw_frame.shape
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
H = np.eye(2, 3, dtype=np.float32)
# Downscale image (TODO: consider using pyramids)
if self.downscale > 1.0:
frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
frame = cv2.resize(frame, (width // self.downscale,
height // self.downscale))
width = width // self.downscale
height = height // self.downscale
# Handle first frame
if not self.initializedFirstFrame:
# Initialize data
self.prevFrame = frame.copy()
# Initialization done
self.initializedFirstFrame = True
return H
# Run the ECC algorithm. The results are stored in warp_matrix.
# (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria)
try:
(cc,
H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode,
self.criteria, None, 1)
except:
print('Warning: find transform failed. Set warp as identity')
return H
def applyFeaures(self, raw_frame, detections=None):
# Initialize
height, width, _ = raw_frame.shape
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
H = np.eye(2, 3)
# Downscale image (TODO: consider using pyramids)
if self.downscale > 1.0:
# frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
frame = cv2.resize(frame, (width // self.downscale,
height // self.downscale))
width = width // self.downscale
height = height // self.downscale
# find the keypoints
mask = np.zeros_like(frame)
# mask[int(0.05 * height): int(0.95 * height), int(0.05 * width): int(0.95 * width)] = 255
mask[int(0.02 * height):int(0.98 * height), int(0.02 * width):int(
0.98 * width)] = 255
if detections is not None:
for det in detections:
tlbr = (det[:4] / self.downscale).astype(np.int_)
mask[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2]] = 0
keypoints = self.detector.detect(frame, mask)
# compute the descriptors
keypoints, descriptors = self.extractor.compute(frame, keypoints)
# Handle first frame
if not self.initializedFirstFrame:
# Initialize data
self.prevFrame = frame.copy()
self.prevKeyPoints = copy.copy(keypoints)
self.prevDescriptors = copy.copy(descriptors)
# Initialization done
self.initializedFirstFrame = True
return H
# Match descriptors.
knnMatches = self.matcher.knnMatch(self.prevDescriptors, descriptors, 2)
# Filtered matches based on smallest spatial distance
matches = []
spatialDistances = []
maxSpatialDistance = 0.25 * np.array([width, height])
# Handle empty matches case
if len(knnMatches) == 0:
# Store to next iteration
self.prevFrame = frame.copy()
self.prevKeyPoints = copy.copy(keypoints)
self.prevDescriptors = copy.copy(descriptors)
return H
for m, n in knnMatches:
if m.distance < 0.9 * n.distance:
prevKeyPointLocation = self.prevKeyPoints[m.queryIdx].pt
currKeyPointLocation = keypoints[m.trainIdx].pt
spatialDistance = (
prevKeyPointLocation[0] - currKeyPointLocation[0],
prevKeyPointLocation[1] - currKeyPointLocation[1])
if (np.abs(spatialDistance[0]) < maxSpatialDistance[0]) and \
(np.abs(spatialDistance[1]) < maxSpatialDistance[1]):
spatialDistances.append(spatialDistance)
matches.append(m)
meanSpatialDistances = np.mean(spatialDistances, 0)
stdSpatialDistances = np.std(spatialDistances, 0)
inliesrs = (spatialDistances - meanSpatialDistances
) < 2.5 * stdSpatialDistances
goodMatches = []
prevPoints = []
currPoints = []
for i in range(len(matches)):
if inliesrs[i, 0] and inliesrs[i, 1]:
goodMatches.append(matches[i])
prevPoints.append(self.prevKeyPoints[matches[i].queryIdx].pt)
currPoints.append(keypoints[matches[i].trainIdx].pt)
prevPoints = np.array(prevPoints)
currPoints = np.array(currPoints)
# Draw the keypoint matches on the output image
if 0:
matches_img = np.hstack((self.prevFrame, frame))
matches_img = cv2.cvtColor(matches_img, cv2.COLOR_GRAY2BGR)
W = np.size(self.prevFrame, 1)
for m in goodMatches:
prev_pt = np.array(
self.prevKeyPoints[m.queryIdx].pt, dtype=np.int_)
curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_)
curr_pt[0] += W
color = np.random.randint(0, 255, (3, ))
color = (int(color[0]), int(color[1]), int(color[2]))
matches_img = cv2.line(matches_img, prev_pt, curr_pt,
tuple(color), 1, cv2.LINE_AA)
matches_img = cv2.circle(matches_img, prev_pt, 2,
tuple(color), -1)
matches_img = cv2.circle(matches_img, curr_pt, 2,
tuple(color), -1)
plt.figure()
plt.imshow(matches_img)
plt.show()
# Find rigid matrix
if (np.size(prevPoints, 0) > 4) and (
np.size(prevPoints, 0) == np.size(prevPoints, 0)):
H, inliesrs = cv2.estimateAffinePartial2D(prevPoints, currPoints,
cv2.RANSAC)
# Handle downscale
if self.downscale > 1.0:
H[0, 2] *= self.downscale
H[1, 2] *= self.downscale
else:
print('Warning: not enough matching points')
# Store to next iteration
self.prevFrame = frame.copy()
self.prevKeyPoints = copy.copy(keypoints)
self.prevDescriptors = copy.copy(descriptors)
return H
def applySparseOptFlow(self, raw_frame, detections=None):
t0 = time.time()
# Initialize
height, width, _ = raw_frame.shape
frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
H = np.eye(2, 3)
# Downscale image
if self.downscale > 1.0:
# frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
frame = cv2.resize(frame, (width // self.downscale,
height // self.downscale))
# find the keypoints
keypoints = cv2.goodFeaturesToTrack(
frame, mask=None, **self.feature_params)
# Handle first frame
if not self.initializedFirstFrame:
# Initialize data
self.prevFrame = frame.copy()
self.prevKeyPoints = copy.copy(keypoints)
# Initialization done
self.initializedFirstFrame = True
return H
if self.prevFrame.shape != frame.shape:
self.prevFrame = frame.copy()
self.prevKeyPoints = copy.copy(keypoints)
return H
# find correspondences
matchedKeypoints, status, err = cv2.calcOpticalFlowPyrLK(
self.prevFrame, frame, self.prevKeyPoints, None)
# leave good correspondences only
prevPoints = []
currPoints = []
for i in range(len(status)):
if status[i]:
prevPoints.append(self.prevKeyPoints[i])
currPoints.append(matchedKeypoints[i])
prevPoints = np.array(prevPoints)
currPoints = np.array(currPoints)
# Find rigid matrix
if (np.size(prevPoints, 0) > 4) and (
np.size(prevPoints, 0) == np.size(prevPoints, 0)):
H, inliesrs = cv2.estimateAffinePartial2D(prevPoints, currPoints,
cv2.RANSAC)
# Handle downscale
if self.downscale > 1.0:
H[0, 2] *= self.downscale
H[1, 2] *= self.downscale
else:
print('Warning: not enough matching points')
# Store to next iteration
self.prevFrame = frame.copy()
self.prevKeyPoints = copy.copy(keypoints)
t1 = time.time()
# gmc_line = str(1000 * (t1 - t0)) + "\t" + str(H[0, 0]) + "\t" + str(H[0, 1]) + "\t" + str(
# H[0, 2]) + "\t" + str(H[1, 0]) + "\t" + str(H[1, 1]) + "\t" + str(H[1, 2]) + "\n"
# self.gmc_file.write(gmc_line)
return H
def applyFile(self, raw_frame, detections=None):
line = self.gmcFile.readline()
tokens = line.split("\t")
H = np.eye(2, 3, dtype=np.float_)
H[0, 0] = float(tokens[1])
H[0, 1] = float(tokens[2])
H[0, 2] = float(tokens[3])
H[1, 0] = float(tokens[4])
H[1, 1] = float(tokens[5])
H[1, 2] = float(tokens[6])
return H

View File

@@ -0,0 +1,316 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/nwojke/deep_sort/blob/master/deep_sort/kalman_filter.py
"""
import numpy as np
import scipy.linalg
use_numba = True
try:
import numba as nb
@nb.njit(fastmath=True, cache=True)
def nb_project(mean, covariance, std, _update_mat):
innovation_cov = np.diag(np.square(std))
mean = np.dot(_update_mat, mean)
covariance = np.dot(np.dot(_update_mat, covariance), _update_mat.T)
return mean, covariance + innovation_cov
@nb.njit(fastmath=True, cache=True)
def nb_multi_predict(mean, covariance, motion_cov, motion_mat):
mean = np.dot(mean, motion_mat.T)
left = np.dot(motion_mat, covariance)
covariance = np.dot(left, motion_mat.T) + motion_cov
return mean, covariance
@nb.njit(fastmath=True, cache=True)
def nb_update(mean, covariance, proj_mean, proj_cov, measurement, meas_mat):
kalman_gain = np.linalg.solve(proj_cov, (covariance @meas_mat.T).T).T
innovation = measurement - proj_mean
mean = mean + innovation @kalman_gain.T
covariance = covariance - kalman_gain @proj_cov @kalman_gain.T
return mean, covariance
except:
use_numba = False
print(
'Warning: Unable to use numba in PP-Tracking, please install numba, for example(python3.7): `pip install numba==0.56.4`'
)
pass
__all__ = ['KalmanFilter']
"""
Table for the 0.95 quantile of the chi-square distribution with N degrees of
freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
function and used as Mahalanobis gating threshold.
"""
chi2inv95 = {
1: 3.8415,
2: 5.9915,
3: 7.8147,
4: 9.4877,
5: 11.070,
6: 12.592,
7: 14.067,
8: 15.507,
9: 16.919
}
class KalmanFilter(object):
"""
A simple Kalman filter for tracking bounding boxes in image space.
The 8-dimensional state space
x, y, a, h, vx, vy, va, vh
contains the bounding box center position (x, y), aspect ratio a, height h,
and their respective velocities.
Object motion follows a constant velocity model. The bounding box location
(x, y, a, h) is taken as direct observation of the state space (linear
observation model).
"""
def __init__(self):
ndim, dt = 4, 1.
# Create Kalman filter model matrices.
self._motion_mat = np.eye(2 * ndim, 2 * ndim, dtype=np.float32)
for i in range(ndim):
self._motion_mat[i, ndim + i] = dt
self._update_mat = np.eye(ndim, 2 * ndim, dtype=np.float32)
# Motion and observation uncertainty are chosen relative to the current
# state estimate. These weights control the amount of uncertainty in
# the model. This is a bit hacky.
self._std_weight_position = 1. / 20
self._std_weight_velocity = 1. / 160
def initiate(self, measurement):
"""
Create track from unassociated measurement.
Args:
measurement (ndarray): Bounding box coordinates (x, y, a, h) with
center position (x, y), aspect ratio a, and height h.
Returns:
The mean vector (8 dimensional) and covariance matrix (8x8
dimensional) of the new track. Unobserved velocities are
initialized to 0 mean.
"""
mean_pos = measurement
mean_vel = np.zeros_like(mean_pos)
mean = np.r_[mean_pos, mean_vel]
std = [
2 * self._std_weight_position * measurement[3],
2 * self._std_weight_position * measurement[3], 1e-2,
2 * self._std_weight_position * measurement[3],
10 * self._std_weight_velocity * measurement[3],
10 * self._std_weight_velocity * measurement[3], 1e-5,
10 * self._std_weight_velocity * measurement[3]
]
covariance = np.diag(np.square(std))
return mean, np.float32(covariance)
def predict(self, mean, covariance):
"""
Run Kalman filter prediction step.
Args:
mean (ndarray): The 8 dimensional mean vector of the object state
at the previous time step.
covariance (ndarray): The 8x8 dimensional covariance matrix of the
object state at the previous time step.
Returns:
The mean vector and covariance matrix of the predicted state.
Unobserved velocities are initialized to 0 mean.
"""
std_pos = [
self._std_weight_position * mean[3], self._std_weight_position *
mean[3], 1e-2, self._std_weight_position * mean[3]
]
std_vel = [
self._std_weight_velocity * mean[3], self._std_weight_velocity *
mean[3], 1e-5, self._std_weight_velocity * mean[3]
]
motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
#mean = np.dot(self._motion_mat, mean)
mean = np.dot(mean, self._motion_mat.T)
covariance = np.linalg.multi_dot(
(self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
return mean, covariance
def project(self, mean, covariance):
"""
Project state distribution to measurement space.
Args
mean (ndarray): The state's mean vector (8 dimensional array).
covariance (ndarray): The state's covariance matrix (8x8 dimensional).
Returns:
The projected mean and covariance matrix of the given state estimate.
"""
std = np.array(
[
self._std_weight_position * mean[3], self._std_weight_position *
mean[3], 1e-1, self._std_weight_position * mean[3]
],
dtype=np.float32)
if use_numba:
return nb_project(mean, covariance, std, self._update_mat)
innovation_cov = np.diag(np.square(std))
mean = np.dot(self._update_mat, mean)
covariance = np.linalg.multi_dot((self._update_mat, covariance,
self._update_mat.T))
return mean, covariance + innovation_cov
def multi_predict(self, mean, covariance):
"""
Run Kalman filter prediction step (Vectorized version).
Args:
mean (ndarray): The Nx8 dimensional mean matrix of the object states
at the previous time step.
covariance (ndarray): The Nx8x8 dimensional covariance matrics of the
object states at the previous time step.
Returns:
The mean vector and covariance matrix of the predicted state.
Unobserved velocities are initialized to 0 mean.
"""
std_pos = np.array([
self._std_weight_position * mean[:, 3], self._std_weight_position *
mean[:, 3], 1e-2 * np.ones_like(mean[:, 3]),
self._std_weight_position * mean[:, 3]
])
std_vel = np.array([
self._std_weight_velocity * mean[:, 3], self._std_weight_velocity *
mean[:, 3], 1e-5 * np.ones_like(mean[:, 3]),
self._std_weight_velocity * mean[:, 3]
])
sqr = np.square(np.r_[std_pos, std_vel]).T
if use_numba:
means = []
covariances = []
for i in range(len(mean)):
a, b = nb_multi_predict(mean[i], covariance[i],
np.diag(sqr[i]), self._motion_mat)
means.append(a)
covariances.append(b)
return np.asarray(means), np.asarray(covariances)
motion_cov = []
for i in range(len(mean)):
motion_cov.append(np.diag(sqr[i]))
motion_cov = np.asarray(motion_cov)
mean = np.dot(mean, self._motion_mat.T)
left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))
covariance = np.dot(left, self._motion_mat.T) + motion_cov
return mean, covariance
def update(self, mean, covariance, measurement):
"""
Run Kalman filter correction step.
Args:
mean (ndarray): The predicted state's mean vector (8 dimensional).
covariance (ndarray): The state's covariance matrix (8x8 dimensional).
measurement (ndarray): The 4 dimensional measurement vector
(x, y, a, h), where (x, y) is the center position, a the aspect
ratio, and h the height of the bounding box.
Returns:
The measurement-corrected state distribution.
"""
projected_mean, projected_cov = self.project(mean, covariance)
if use_numba:
return nb_update(mean, covariance, projected_mean, projected_cov,
measurement, self._update_mat)
kalman_gain = np.linalg.solve(projected_cov,
(covariance @self._update_mat.T).T).T
innovation = measurement - projected_mean
mean = mean + innovation @kalman_gain.T
covariance = covariance - kalman_gain @projected_cov @kalman_gain.T
return mean, covariance
def gating_distance(self,
mean,
covariance,
measurements,
only_position=False,
metric='maha'):
"""
Compute gating distance between state distribution and measurements.
A suitable distance threshold can be obtained from `chi2inv95`. If
`only_position` is False, the chi-square distribution has 4 degrees of
freedom, otherwise 2.
Args:
mean (ndarray): Mean vector over the state distribution (8
dimensional).
covariance (ndarray): Covariance of the state distribution (8x8
dimensional).
measurements (ndarray): An Nx4 dimensional matrix of N measurements,
each in format (x, y, a, h) where (x, y) is the bounding box center
position, a the aspect ratio, and h the height.
only_position (Optional[bool]): If True, distance computation is
done with respect to the bounding box center position only.
metric (str): Metric type, 'gaussian' or 'maha'.
Returns
An array of length N, where the i-th element contains the squared
Mahalanobis distance between (mean, covariance) and `measurements[i]`.
"""
mean, covariance = self.project(mean, covariance)
if only_position:
mean, covariance = mean[:2], covariance[:2, :2]
measurements = measurements[:, :2]
d = measurements - mean
if metric == 'gaussian':
return np.sum(d * d, axis=1)
elif metric == 'maha':
cholesky_factor = np.linalg.cholesky(covariance)
z = scipy.linalg.solve_triangular(
cholesky_factor,
d.T,
lower=True,
check_finite=False,
overwrite_b=True)
squared_maha = np.sum(z * z, axis=0)
return squared_maha
else:
raise ValueError('invalid distance metric')

View File

@@ -0,0 +1,93 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/danbochman/SORT/blob/danny_opencv/kalman_filter.py
"""
import numpy as np
from numpy import dot, zeros, eye
from numpy.linalg import inv
use_numba = True
try:
import numba as nb
@nb.njit(fastmath=True, cache=True)
def nb_predict(x, F, P, Q):
x = dot(F, x)
P = dot(dot(F, P), F.T) + Q
return x, P
@nb.njit(fastmath=True, cache=True)
def nb_update(x, z, H, P, R, _I):
y = z - np.dot(H, x)
PHT = dot(P, H.T)
S = dot(H, PHT) + R
K = dot(PHT, inv(S))
x = x + dot(K, y)
I_KH = _I - dot(K, H)
P = dot(dot(I_KH, P), I_KH.T) + dot(dot(K, R), K.T)
return x, P
except:
use_numba = False
print(
'Warning: Unable to use numba in PP-Tracking, please install numba, for example(python3.7): `pip install numba==0.56.4`'
)
pass
class OCSORTKalmanFilter:
def __init__(self, dim_x, dim_z):
self.dim_x = dim_x
self.dim_z = dim_z
self.x = zeros((dim_x, 1))
self.P = eye(dim_x)
self.Q = eye(dim_x)
self.F = eye(dim_x)
self.H = zeros((dim_z, dim_x))
self.R = eye(dim_z)
self.M = zeros((dim_z, dim_z))
self._I = eye(dim_x)
def predict(self):
if use_numba:
self.x, self.P = nb_predict(self.x, self.F, self.P, self.Q)
else:
self.x = dot(self.F, self.x)
self.P = dot(dot(self.F, self.P), self.F.T) + self.Q
def update(self, z):
if z is None:
return
if use_numba:
self.x, self.P = nb_update(self.x, z, self.H, self.P, self.R,
self._I)
else:
y = z - np.dot(self.H, self.x)
PHT = dot(self.P, self.H.T)
S = dot(self.H, PHT) + self.R
K = dot(PHT, inv(S))
self.x = self.x + dot(K, y)
I_KH = self._I - dot(K, self.H)
self.P = dot(dot(I_KH, self.P), I_KH.T) + dot(dot(K, self.R), K.T)

View File

@@ -0,0 +1,30 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import base_jde_tracker
from . import base_sde_tracker
from .base_jde_tracker import *
from .base_sde_tracker import *
from . import jde_tracker
from . import deepsort_tracker
from . import ocsort_tracker
from . import center_tracker
from .jde_tracker import *
from .deepsort_tracker import *
from .ocsort_tracker import *
from .botsort_tracker import *
from .center_tracker import *

View File

@@ -0,0 +1,311 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/multitracker.py
"""
import numpy as np
from collections import defaultdict
from collections import deque, OrderedDict
from ..matching import jde_matching as matching
from ppdet.core.workspace import register, serializable
import warnings
warnings.filterwarnings("ignore")
__all__ = [
'TrackState',
'BaseTrack',
'STrack',
'joint_stracks',
'sub_stracks',
'remove_duplicate_stracks',
]
class TrackState(object):
New = 0
Tracked = 1
Lost = 2
Removed = 3
@register
@serializable
class BaseTrack(object):
_count_dict = defaultdict(int) # support single class and multi classes
track_id = 0
is_activated = False
state = TrackState.New
history = OrderedDict()
features = []
curr_feat = None
score = 0
start_frame = 0
frame_id = 0
time_since_update = 0
# multi-camera
location = (np.inf, np.inf)
@property
def end_frame(self):
return self.frame_id
@staticmethod
def next_id(cls_id):
BaseTrack._count_dict[cls_id] += 1
return BaseTrack._count_dict[cls_id]
# @even: reset track id
@staticmethod
def init_count(num_classes):
"""
Initiate _count for all object classes
:param num_classes:
"""
for cls_id in range(num_classes):
BaseTrack._count_dict[cls_id] = 0
@staticmethod
def reset_track_count(cls_id):
BaseTrack._count_dict[cls_id] = 0
def activate(self, *args):
raise NotImplementedError
def predict(self):
raise NotImplementedError
def update(self, *args, **kwargs):
raise NotImplementedError
def mark_lost(self):
self.state = TrackState.Lost
def mark_removed(self):
self.state = TrackState.Removed
@register
@serializable
class STrack(BaseTrack):
def __init__(self, tlwh, score, cls_id, buff_size=30, temp_feat=None):
# wait activate
self._tlwh = np.asarray(tlwh, dtype=np.float32)
self.score = score
self.cls_id = cls_id
self.track_len = 0
self.kalman_filter = None
self.mean, self.covariance = None, None
self.is_activated = False
self.use_reid = True if temp_feat is not None else False
if self.use_reid:
self.smooth_feat = None
self.update_features(temp_feat)
self.features = deque([], maxlen=buff_size)
self.alpha = 0.9
def update_features(self, feat):
# L2 normalizing, this function has no use for BYTETracker
feat /= np.linalg.norm(feat)
self.curr_feat = feat
if self.smooth_feat is None:
self.smooth_feat = feat
else:
self.smooth_feat = self.alpha * self.smooth_feat + (1.0 - self.alpha
) * feat
self.features.append(feat)
self.smooth_feat /= np.linalg.norm(self.smooth_feat)
def predict(self):
mean_state = self.mean.copy()
if self.state != TrackState.Tracked:
mean_state[7] = 0
self.mean, self.covariance = self.kalman_filter.predict(mean_state,
self.covariance)
@staticmethod
def multi_predict(tracks, kalman_filter):
if len(tracks) > 0:
multi_mean = np.asarray([track.mean.copy() for track in tracks])
multi_covariance = np.asarray(
[track.covariance for track in tracks])
for i, st in enumerate(tracks):
if st.state != TrackState.Tracked:
multi_mean[i][7] = 0
multi_mean, multi_covariance = kalman_filter.multi_predict(
multi_mean, multi_covariance)
for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
tracks[i].mean = mean
tracks[i].covariance = cov
@staticmethod
def multi_gmc(stracks, H=np.eye(2, 3)):
if len(stracks) > 0:
multi_mean = np.asarray([st.mean.copy() for st in stracks])
multi_covariance = np.asarray([st.covariance for st in stracks])
R = H[:2, :2]
R8x8 = np.kron(np.eye(4, dtype=float), R)
t = H[:2, 2]
for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
mean = R8x8.dot(mean)
mean[:2] += t
cov = R8x8.dot(cov).dot(R8x8.transpose())
stracks[i].mean = mean
stracks[i].covariance = cov
def reset_track_id(self):
self.reset_track_count(self.cls_id)
def activate(self, kalman_filter, frame_id):
"""Start a new track"""
self.kalman_filter = kalman_filter
# update track id for the object class
self.track_id = self.next_id(self.cls_id)
self.mean, self.covariance = self.kalman_filter.initiate(
self.tlwh_to_xyah(self._tlwh))
self.track_len = 0
self.state = TrackState.Tracked # set flag 'tracked'
if frame_id == 1: # to record the first frame's detection result
self.is_activated = True
self.frame_id = frame_id
self.start_frame = frame_id
def re_activate(self, new_track, frame_id, new_id=False):
self.mean, self.covariance = self.kalman_filter.update(
self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh))
if self.use_reid:
self.update_features(new_track.curr_feat)
self.track_len = 0
self.state = TrackState.Tracked
self.is_activated = True
self.frame_id = frame_id
if new_id: # update track id for the object class
self.track_id = self.next_id(self.cls_id)
def update(self, new_track, frame_id, update_feature=True):
self.frame_id = frame_id
self.track_len += 1
new_tlwh = new_track.tlwh
self.mean, self.covariance = self.kalman_filter.update(
self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh))
self.state = TrackState.Tracked # set flag 'tracked'
self.is_activated = True # set flag 'activated'
self.score = new_track.score
if update_feature and self.use_reid:
self.update_features(new_track.curr_feat)
@property
def tlwh(self):
"""Get current position in bounding box format `(top left x, top left y,
width, height)`.
"""
if self.mean is None:
return self._tlwh.copy()
ret = self.mean[:4].copy()
ret[2] *= ret[3]
ret[:2] -= ret[2:] / 2
return ret
@property
def tlbr(self):
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
`(top left, bottom right)`.
"""
ret = self.tlwh.copy()
ret[2:] += ret[:2]
return ret
@staticmethod
def tlwh_to_xyah(tlwh):
"""Convert bounding box to format `(center x, center y, aspect ratio,
height)`, where the aspect ratio is `width / height`.
"""
ret = np.asarray(tlwh).copy()
ret[:2] += ret[2:] / 2
ret[2] /= ret[3]
return ret
def to_xyah(self):
return self.tlwh_to_xyah(self.tlwh)
@staticmethod
def tlbr_to_tlwh(tlbr):
ret = np.asarray(tlbr).copy()
ret[2:] -= ret[:2]
return ret
@staticmethod
def tlwh_to_tlbr(tlwh):
ret = np.asarray(tlwh).copy()
ret[2:] += ret[:2]
return ret
def __repr__(self):
return 'OT_({}-{})_({}-{})'.format(self.cls_id, self.track_id,
self.start_frame, self.end_frame)
def joint_stracks(tlista, tlistb):
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
def sub_stracks(tlista, tlistb):
stracks = {}
for t in tlista:
stracks[t.track_id] = t
for t in tlistb:
tid = t.track_id
if stracks.get(tid, 0):
del stracks[tid]
return list(stracks.values())
def remove_duplicate_stracks(stracksa, stracksb):
pdist = matching.iou_distance(stracksa, stracksb)
pairs = np.where(pdist < 0.15)
dupa, dupb = list(), list()
for p, q in zip(*pairs):
timep = stracksa[p].frame_id - stracksa[p].start_frame
timeq = stracksb[q].frame_id - stracksb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i, t in enumerate(stracksa) if not i in dupa]
resb = [t for i, t in enumerate(stracksb) if not i in dupb]
return resa, resb

View File

@@ -0,0 +1,156 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/nwojke/deep_sort/blob/master/deep_sort/track.py
"""
import datetime
from ppdet.core.workspace import register, serializable
__all__ = ['TrackState', 'Track']
class TrackState(object):
"""
Enumeration type for the single target track state. Newly created tracks are
classified as `tentative` until enough evidence has been collected. Then,
the track state is changed to `confirmed`. Tracks that are no longer alive
are classified as `deleted` to mark them for removal from the set of active
tracks.
"""
Tentative = 1
Confirmed = 2
Deleted = 3
@register
@serializable
class Track(object):
"""
A single target track with state space `(x, y, a, h)` and associated
velocities, where `(x, y)` is the center of the bounding box, `a` is the
aspect ratio and `h` is the height.
Args:
mean (ndarray): Mean vector of the initial state distribution.
covariance (ndarray): Covariance matrix of the initial state distribution.
track_id (int): A unique track identifier.
n_init (int): Number of consecutive detections before the track is confirmed.
The track state is set to `Deleted` if a miss occurs within the first
`n_init` frames.
max_age (int): The maximum number of consecutive misses before the track
state is set to `Deleted`.
cls_id (int): The category id of the tracked box.
score (float): The confidence score of the tracked box.
feature (Optional[ndarray]): Feature vector of the detection this track
originates from. If not None, this feature is added to the `features` cache.
Attributes:
hits (int): Total number of measurement updates.
age (int): Total number of frames since first occurance.
time_since_update (int): Total number of frames since last measurement
update.
state (TrackState): The current track state.
features (List[ndarray]): A cache of features. On each measurement update,
the associated feature vector is added to this list.
"""
def __init__(self,
mean,
covariance,
track_id,
n_init,
max_age,
cls_id,
score,
feature=None):
self.mean = mean
self.covariance = covariance
self.track_id = track_id
self.hits = 1
self.age = 1
self.time_since_update = 0
self.cls_id = cls_id
self.score = score
self.start_time = datetime.datetime.now()
self.state = TrackState.Tentative
self.features = []
self.feat = feature
if feature is not None:
self.features.append(feature)
self._n_init = n_init
self._max_age = max_age
def to_tlwh(self):
"""Get position in format `(top left x, top left y, width, height)`."""
ret = self.mean[:4].copy()
ret[2] *= ret[3]
ret[:2] -= ret[2:] / 2
return ret
def to_tlbr(self):
"""Get position in bounding box format `(min x, miny, max x, max y)`."""
ret = self.to_tlwh()
ret[2:] = ret[:2] + ret[2:]
return ret
def predict(self, kalman_filter):
"""
Propagate the state distribution to the current time step using a Kalman
filter prediction step.
"""
self.mean, self.covariance = kalman_filter.predict(self.mean,
self.covariance)
self.age += 1
self.time_since_update += 1
def update(self, kalman_filter, detection):
"""
Perform Kalman filter measurement update step and update the associated
detection feature cache.
"""
self.mean, self.covariance = kalman_filter.update(self.mean,
self.covariance,
detection.to_xyah())
self.features.append(detection.feature)
self.feat = detection.feature
self.cls_id = detection.cls_id
self.score = detection.score
self.hits += 1
self.time_since_update = 0
if self.state == TrackState.Tentative and self.hits >= self._n_init:
self.state = TrackState.Confirmed
def mark_missed(self):
"""Mark this track as missed (no association at the current time step).
"""
if self.state == TrackState.Tentative:
self.state = TrackState.Deleted
elif self.time_since_update > self._max_age:
self.state = TrackState.Deleted
def is_tentative(self):
"""Returns True if this track is tentative (unconfirmed)."""
return self.state == TrackState.Tentative
def is_confirmed(self):
"""Returns True if this track is confirmed."""
return self.state == TrackState.Confirmed
def is_deleted(self):
"""Returns True if this track is dead and should be deleted."""
return self.state == TrackState.Deleted

View File

@@ -0,0 +1,242 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/WWangYuHsiang/SMILEtrack/blob/main/BoT-SORT/tracker/bot_sort.py
"""
import cv2
import matplotlib.pyplot as plt
import numpy as np
from collections import deque
from ..matching import jde_matching as matching
from ..motion import GMC
from .base_jde_tracker import TrackState, STrack
from .base_jde_tracker import joint_stracks, sub_stracks, remove_duplicate_stracks
from ..motion import KalmanFilter
from ppdet.core.workspace import register, serializable
@register
@serializable
class BOTSORTTracker(object):
"""
BOTSORT tracker, support single class
Args:
track_high_thresh (float): threshold of detection high score
track_low_thresh (float): threshold of remove detection score
new_track_thresh (float): threshold of new track score
match_thresh (float): iou threshold for associate
track_buffer (int): tracking reserved frames,default 30
min_box_area (float): reserved min box
camera_motion (bool): Whether use camera motion, default False
cmc_method (str): camera motion method,defalut sparseOptFlow
frame_rate (int): fps buffer_size=int(frame_rate / 30.0 * track_buffer)
"""
def __init__(self,
track_high_thresh=0.3,
track_low_thresh=0.2,
new_track_thresh=0.4,
match_thresh=0.7,
track_buffer=30,
min_box_area=0,
camera_motion=False,
cmc_method='sparseOptFlow',
frame_rate=30):
self.tracked_stracks = [] # type: list[STrack]
self.lost_stracks = [] # type: list[STrack]
self.removed_stracks = [] # type: list[STrack]
self.frame_id = 0
self.track_high_thresh = track_high_thresh
self.track_low_thresh = track_low_thresh
self.new_track_thresh = new_track_thresh
self.match_thresh = match_thresh
self.buffer_size = int(frame_rate / 30.0 * track_buffer)
self.max_time_lost = self.buffer_size
self.kalman_filter = KalmanFilter()
self.min_box_area = min_box_area
self.camera_motion = camera_motion
self.gmc = GMC(method=cmc_method)
def update(self, output_results, img=None):
self.frame_id += 1
activated_starcks = []
refind_stracks = []
lost_stracks = []
removed_stracks = []
if len(output_results):
bboxes = output_results[:, 2:6]
scores = output_results[:, 1]
classes = output_results[:, 0]
# Remove bad detections
lowest_inds = scores > self.track_low_thresh
bboxes = bboxes[lowest_inds]
scores = scores[lowest_inds]
classes = classes[lowest_inds]
# Find high threshold detections
remain_inds = scores > self.track_high_thresh
dets = bboxes[remain_inds]
scores_keep = scores[remain_inds]
classes_keep = classes[remain_inds]
else:
bboxes = []
scores = []
classes = []
dets = []
scores_keep = []
classes_keep = []
if len(dets) > 0:
'''Detections'''
detections = [
STrack(STrack.tlbr_to_tlwh(tlbr), s, c)
for (tlbr, s, c) in zip(dets, scores_keep, classes_keep)
]
else:
detections = []
''' Add newly detected tracklets to tracked_stracks'''
unconfirmed = []
tracked_stracks = [] # type: list[STrack]
for track in self.tracked_stracks:
if not track.is_activated:
unconfirmed.append(track)
else:
tracked_stracks.append(track)
''' Step 2: First association, with high score detection boxes'''
strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
# Predict the current location with KF
STrack.multi_predict(strack_pool, self.kalman_filter)
# Fix camera motion
if self.camera_motion:
warp = self.gmc.apply(img[0], dets)
STrack.multi_gmc(strack_pool, warp)
STrack.multi_gmc(unconfirmed, warp)
# Associate with high score detection boxes
ious_dists = matching.iou_distance(strack_pool, detections)
matches, u_track, u_detection = matching.linear_assignment(
ious_dists, thresh=self.match_thresh)
for itracked, idet in matches:
track = strack_pool[itracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(detections[idet], self.frame_id)
activated_starcks.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_stracks.append(track)
''' Step 3: Second association, with low score detection boxes'''
if len(scores):
inds_high = scores < self.track_high_thresh
inds_low = scores > self.track_low_thresh
inds_second = np.logical_and(inds_low, inds_high)
dets_second = bboxes[inds_second]
scores_second = scores[inds_second]
classes_second = classes[inds_second]
else:
dets_second = []
scores_second = []
classes_second = []
# association the untrack to the low score detections
if len(dets_second) > 0:
'''Detections'''
detections_second = [
STrack(STrack.tlbr_to_tlwh(tlbr), s, c) for (tlbr, s, c) in
zip(dets_second, scores_second, classes_second)
]
else:
detections_second = []
r_tracked_stracks = [
strack_pool[i] for i in u_track
if strack_pool[i].state == TrackState.Tracked
]
dists = matching.iou_distance(r_tracked_stracks, detections_second)
matches, u_track, u_detection_second = matching.linear_assignment(
dists, thresh=0.5)
for itracked, idet in matches:
track = r_tracked_stracks[itracked]
det = detections_second[idet]
if track.state == TrackState.Tracked:
track.update(det, self.frame_id)
activated_starcks.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_stracks.append(track)
for it in u_track:
track = r_tracked_stracks[it]
if not track.state == TrackState.Lost:
track.mark_lost()
lost_stracks.append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detections[i] for i in u_detection]
dists = matching.iou_distance(unconfirmed, detections)
matches, u_unconfirmed, u_detection = matching.linear_assignment(
dists, thresh=0.7)
for itracked, idet in matches:
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_starcks.append(unconfirmed[itracked])
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed()
removed_stracks.append(track)
""" Step 4: Init new stracks"""
for inew in u_detection:
track = detections[inew]
if track.score < self.new_track_thresh:
continue
track.activate(self.kalman_filter, self.frame_id)
activated_starcks.append(track)
""" Step 5: Update state"""
for track in self.lost_stracks:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_stracks.append(track)
""" Merge """
self.tracked_stracks = [
t for t in self.tracked_stracks if t.state == TrackState.Tracked
]
self.tracked_stracks = joint_stracks(self.tracked_stracks,
activated_starcks)
self.tracked_stracks = joint_stracks(self.tracked_stracks,
refind_stracks)
self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
self.lost_stracks.extend(lost_stracks)
self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
self.removed_stracks.extend(removed_stracks)
self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(
self.tracked_stracks, self.lost_stracks)
# output_stracks = [track for track in self.tracked_stracks if track.is_activated]
output_stracks = [track for track in self.tracked_stracks]
return output_stracks

View File

@@ -0,0 +1,149 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/xingyizhou/CenterTrack/blob/master/src/lib/utils/tracker.py
"""
import copy
import numpy as np
import sklearn
from ppdet.core.workspace import register, serializable
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
__all__ = ['CenterTracker']
@register
@serializable
class CenterTracker(object):
__shared__ = ['num_classes']
def __init__(self,
num_classes=1,
min_box_area=0,
vertical_ratio=-1,
track_thresh=0.4,
pre_thresh=0.5,
new_thresh=0.4,
out_thresh=0.4,
hungarian=False):
self.num_classes = num_classes
self.min_box_area = min_box_area
self.vertical_ratio = vertical_ratio
self.track_thresh = track_thresh
self.pre_thresh = max(track_thresh, pre_thresh)
self.new_thresh = max(track_thresh, new_thresh)
self.out_thresh = max(track_thresh, out_thresh)
self.hungarian = hungarian
self.reset()
def init_track(self, results):
print('Initialize tracking!')
for item in results:
if item['score'] > self.new_thresh:
self.id_count += 1
item['tracking_id'] = self.id_count
if not ('ct' in item):
bbox = item['bbox']
item['ct'] = [(bbox[0] + bbox[2]) / 2,
(bbox[1] + bbox[3]) / 2]
self.tracks.append(item)
def reset(self):
self.id_count = 0
self.tracks = []
def update(self, results, public_det=None):
N = len(results)
M = len(self.tracks)
dets = np.array([det['ct'] + det['tracking'] for det in results],
np.float32) # N x 2
track_size = np.array([((track['bbox'][2] - track['bbox'][0]) * \
(track['bbox'][3] - track['bbox'][1])) \
for track in self.tracks], np.float32) # M
track_cat = np.array([track['class'] for track in self.tracks],
np.int32) # M
item_size = np.array([((item['bbox'][2] - item['bbox'][0]) * \
(item['bbox'][3] - item['bbox'][1])) \
for item in results], np.float32) # N
item_cat = np.array([item['class'] for item in results], np.int32) # N
tracks = np.array([pre_det['ct'] for pre_det in self.tracks],
np.float32) # M x 2
dist = (((tracks.reshape(1, -1, 2) - \
dets.reshape(-1, 1, 2)) ** 2).sum(axis=2)) # N x M
invalid = ((dist > track_size.reshape(1, M)) + \
(dist > item_size.reshape(N, 1)) + \
(item_cat.reshape(N, 1) != track_cat.reshape(1, M))) > 0
dist = dist + invalid * 1e18
if self.hungarian:
item_score = np.array([item['score'] for item in results],
np.float32)
dist[dist > 1e18] = 1e18
from sklearn.utils.linear_assignment_ import linear_assignment
matched_indices = linear_assignment(dist)
else:
matched_indices = greedy_assignment(copy.deepcopy(dist))
unmatched_dets = [d for d in range(dets.shape[0]) \
if not (d in matched_indices[:, 0])]
unmatched_tracks = [d for d in range(tracks.shape[0]) \
if not (d in matched_indices[:, 1])]
if self.hungarian:
matches = []
for m in matched_indices:
if dist[m[0], m[1]] > 1e16:
unmatched_dets.append(m[0])
unmatched_tracks.append(m[1])
else:
matches.append(m)
matches = np.array(matches).reshape(-1, 2)
else:
matches = matched_indices
ret = []
for m in matches:
track = results[m[0]]
track['tracking_id'] = self.tracks[m[1]]['tracking_id']
ret.append(track)
# Private detection: create tracks for all un-matched detections
for i in unmatched_dets:
track = results[i]
if track['score'] > self.new_thresh:
self.id_count += 1
track['tracking_id'] = self.id_count
ret.append(track)
self.tracks = ret
return ret
def greedy_assignment(dist):
matched_indices = []
if dist.shape[1] == 0:
return np.array(matched_indices, np.int32).reshape(-1, 2)
for i in range(dist.shape[0]):
j = dist[i].argmin()
if dist[i][j] < 1e16:
dist[:, j] = 1e18
matched_indices.append([i, j])
return np.array(matched_indices, np.int32).reshape(-1, 2)

View File

@@ -0,0 +1,189 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/nwojke/deep_sort/blob/master/deep_sort/tracker.py
"""
import numpy as np
from ..motion import KalmanFilter
from ..matching.deepsort_matching import NearestNeighborDistanceMetric
from ..matching.deepsort_matching import iou_cost, min_cost_matching, matching_cascade, gate_cost_matrix
from .base_sde_tracker import Track
from ..utils import Detection
from ppdet.core.workspace import register, serializable
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
__all__ = ['DeepSORTTracker']
@register
@serializable
class DeepSORTTracker(object):
"""
DeepSORT tracker
Args:
input_size (list): input feature map size to reid model, [h, w] format,
[64, 192] as default.
min_box_area (int): min box area to filter out low quality boxes
vertical_ratio (float): w/h, the vertical ratio of the bbox to filter
bad results, set 1.6 default for pedestrian tracking. If set <=0
means no need to filter bboxes.
budget (int): If not None, fix samples per class to at most this number.
Removes the oldest samples when the budget is reached.
max_age (int): maximum number of missed misses before a track is deleted
n_init (float): Number of frames that a track remains in initialization
phase. Number of consecutive detections before the track is confirmed.
The track state is set to `Deleted` if a miss occurs within the first
`n_init` frames.
metric_type (str): either "euclidean" or "cosine", the distance metric
used for measurement to track association.
matching_threshold (float): samples with larger distance are
considered an invalid match.
max_iou_distance (float): max iou distance threshold
motion (object): KalmanFilter instance
"""
def __init__(self,
input_size=[64, 192],
min_box_area=0,
vertical_ratio=-1,
budget=100,
max_age=70,
n_init=3,
metric_type='cosine',
matching_threshold=0.2,
max_iou_distance=0.9,
motion='KalmanFilter'):
self.input_size = input_size
self.min_box_area = min_box_area
self.vertical_ratio = vertical_ratio
self.max_age = max_age
self.n_init = n_init
self.metric = NearestNeighborDistanceMetric(metric_type,
matching_threshold, budget)
self.max_iou_distance = max_iou_distance
if motion == 'KalmanFilter':
self.motion = KalmanFilter()
self.tracks = []
self._next_id = 1
def predict(self):
"""
Propagate track state distributions one time step forward.
This function should be called once every time step, before `update`.
"""
for track in self.tracks:
track.predict(self.motion)
def update(self, pred_dets, pred_embs):
"""
Perform measurement update and track management.
Args:
pred_dets (np.array): Detection results of the image, the shape is
[N, 6], means 'cls_id, score, x0, y0, x1, y1'.
pred_embs (np.array): Embedding results of the image, the shape is
[N, 128], usually pred_embs.shape[1] is a multiple of 128.
"""
pred_cls_ids = pred_dets[:, 0:1]
pred_scores = pred_dets[:, 1:2]
pred_xyxys = pred_dets[:, 2:6]
pred_tlwhs = np.concatenate((pred_xyxys[:, 0:2], pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1), axis=1)
detections = [
Detection(tlwh, score, feat, cls_id)
for tlwh, score, feat, cls_id in zip(pred_tlwhs, pred_scores,
pred_embs, pred_cls_ids)
]
# Run matching cascade.
matches, unmatched_tracks, unmatched_detections = \
self._match(detections)
# Update track set.
for track_idx, detection_idx in matches:
self.tracks[track_idx].update(self.motion,
detections[detection_idx])
for track_idx in unmatched_tracks:
self.tracks[track_idx].mark_missed()
for detection_idx in unmatched_detections:
self._initiate_track(detections[detection_idx])
self.tracks = [t for t in self.tracks if not t.is_deleted()]
# Update distance metric.
active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
features, targets = [], []
for track in self.tracks:
if not track.is_confirmed():
continue
features += track.features
targets += [track.track_id for _ in track.features]
track.features = []
self.metric.partial_fit(
np.asarray(features), np.asarray(targets), active_targets)
output_stracks = self.tracks
return output_stracks
def _match(self, detections):
def gated_metric(tracks, dets, track_indices, detection_indices):
features = np.array([dets[i].feature for i in detection_indices])
targets = np.array([tracks[i].track_id for i in track_indices])
cost_matrix = self.metric.distance(features, targets)
cost_matrix = gate_cost_matrix(self.motion, cost_matrix, tracks,
dets, track_indices,
detection_indices)
return cost_matrix
# Split track set into confirmed and unconfirmed tracks.
confirmed_tracks = [
i for i, t in enumerate(self.tracks) if t.is_confirmed()
]
unconfirmed_tracks = [
i for i, t in enumerate(self.tracks) if not t.is_confirmed()
]
# Associate confirmed tracks using appearance features.
matches_a, unmatched_tracks_a, unmatched_detections = \
matching_cascade(
gated_metric, self.metric.matching_threshold, self.max_age,
self.tracks, detections, confirmed_tracks)
# Associate remaining tracks together with unconfirmed tracks using IOU.
iou_track_candidates = unconfirmed_tracks + [
k for k in unmatched_tracks_a
if self.tracks[k].time_since_update == 1
]
unmatched_tracks_a = [
k for k in unmatched_tracks_a
if self.tracks[k].time_since_update != 1
]
matches_b, unmatched_tracks_b, unmatched_detections = \
min_cost_matching(
iou_cost, self.max_iou_distance, self.tracks,
detections, iou_track_candidates, unmatched_detections)
matches = matches_a + matches_b
unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
return matches, unmatched_tracks, unmatched_detections
def _initiate_track(self, detection):
mean, covariance = self.motion.initiate(detection.to_xyah())
self.tracks.append(
Track(mean, covariance, self._next_id, self.n_init, self.max_age,
detection.cls_id, detection.score, detection.feature))
self._next_id += 1

View File

@@ -0,0 +1,353 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/multitracker.py
"""
import numpy as np
from collections import defaultdict
from ..matching import jde_matching as matching
from ..motion import KalmanFilter
from .base_jde_tracker import TrackState, STrack
from .base_jde_tracker import joint_stracks, sub_stracks, remove_duplicate_stracks
from ppdet.core.workspace import register, serializable
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
__all__ = ['JDETracker']
@register
@serializable
class JDETracker(object):
__shared__ = ['num_classes']
"""
JDE tracker, support single class and multi classes
Args:
use_byte (bool): Whether use ByteTracker, default False
num_classes (int): the number of classes
det_thresh (float): threshold of detection score
track_buffer (int): buffer for tracker
min_box_area (int): min box area to filter out low quality boxes
vertical_ratio (float): w/h, the vertical ratio of the bbox to filter
bad results. If set <= 0 means no need to filter bboxesusually set
1.6 for pedestrian tracking.
tracked_thresh (float): linear assignment threshold of tracked
stracks and detections
r_tracked_thresh (float): linear assignment threshold of
tracked stracks and unmatched detections
unconfirmed_thresh (float): linear assignment threshold of
unconfirmed stracks and unmatched detections
conf_thres (float): confidence threshold for tracking, also used in
ByteTracker as higher confidence threshold
match_thres (float): linear assignment threshold of tracked
stracks and detections in ByteTracker
low_conf_thres (float): lower confidence threshold for tracking in
ByteTracker
input_size (list): input feature map size to reid model, [h, w] format,
[64, 192] as default.
motion (str): motion model, KalmanFilter as default
metric_type (str): either "euclidean" or "cosine", the distance metric
used for measurement to track association.
"""
def __init__(self,
use_byte=False,
num_classes=1,
det_thresh=0.3,
track_buffer=30,
min_box_area=0,
vertical_ratio=0,
tracked_thresh=0.7,
r_tracked_thresh=0.5,
unconfirmed_thresh=0.7,
conf_thres=0,
match_thres=0.8,
low_conf_thres=0.2,
input_size=[64, 192],
motion='KalmanFilter',
metric_type='euclidean'):
self.use_byte = use_byte
self.num_classes = num_classes
self.det_thresh = det_thresh if not use_byte else conf_thres + 0.1
self.track_buffer = track_buffer
self.min_box_area = min_box_area
self.vertical_ratio = vertical_ratio
self.tracked_thresh = tracked_thresh
self.r_tracked_thresh = r_tracked_thresh
self.unconfirmed_thresh = unconfirmed_thresh
self.conf_thres = conf_thres
self.match_thres = match_thres
self.low_conf_thres = low_conf_thres
self.input_size = input_size
if motion == 'KalmanFilter':
self.motion = KalmanFilter()
self.metric_type = metric_type
self.frame_id = 0
self.tracked_tracks_dict = defaultdict(list) # dict(list[STrack])
self.lost_tracks_dict = defaultdict(list) # dict(list[STrack])
self.removed_tracks_dict = defaultdict(list) # dict(list[STrack])
self.max_time_lost = 0
# max_time_lost will be calculated: int(frame_rate / 30.0 * track_buffer)
def update(self, pred_dets, pred_embs=None):
"""
Processes the image frame and finds bounding box(detections).
Associates the detection with corresponding tracklets and also handles
lost, removed, refound and active tracklets.
Args:
pred_dets (np.array): Detection results of the image, the shape is
[N, 6], means 'cls_id, score, x0, y0, x1, y1'.
pred_embs (np.array): Embedding results of the image, the shape is
[N, 128] or [N, 512].
Return:
output_stracks_dict (dict(list)): The list contains information
regarding the online_tracklets for the received image tensor.
"""
self.frame_id += 1
if self.frame_id == 1:
STrack.init_count(self.num_classes)
activated_tracks_dict = defaultdict(list)
refined_tracks_dict = defaultdict(list)
lost_tracks_dict = defaultdict(list)
removed_tracks_dict = defaultdict(list)
output_tracks_dict = defaultdict(list)
pred_dets_dict = defaultdict(list)
pred_embs_dict = defaultdict(list)
# unify single and multi classes detection and embedding results
for cls_id in range(self.num_classes):
cls_idx = (pred_dets[:, 0:1] == cls_id).squeeze(-1)
pred_dets_dict[cls_id] = pred_dets[cls_idx]
if pred_embs is not None:
pred_embs_dict[cls_id] = pred_embs[cls_idx]
else:
pred_embs_dict[cls_id] = None
for cls_id in range(self.num_classes):
""" Step 1: Get detections by class"""
pred_dets_cls = pred_dets_dict[cls_id]
pred_embs_cls = pred_embs_dict[cls_id]
remain_inds = (pred_dets_cls[:, 1:2] > self.conf_thres).squeeze(-1)
if remain_inds.sum() > 0:
pred_dets_cls = pred_dets_cls[remain_inds]
if pred_embs_cls is None:
# in original ByteTrack
detections = [
STrack(
STrack.tlbr_to_tlwh(tlbrs[2:6]),
tlbrs[1],
cls_id,
30,
temp_feat=None) for tlbrs in pred_dets_cls
]
else:
pred_embs_cls = pred_embs_cls[remain_inds]
detections = [
STrack(
STrack.tlbr_to_tlwh(tlbrs[2:6]), tlbrs[1], cls_id,
30, temp_feat) for (tlbrs, temp_feat) in
zip(pred_dets_cls, pred_embs_cls)
]
else:
detections = []
''' Add newly detected tracklets to tracked_stracks'''
unconfirmed_dict = defaultdict(list)
tracked_tracks_dict = defaultdict(list)
for track in self.tracked_tracks_dict[cls_id]:
if not track.is_activated:
# previous tracks which are not active in the current frame are added in unconfirmed list
unconfirmed_dict[cls_id].append(track)
else:
# Active tracks are added to the local list 'tracked_stracks'
tracked_tracks_dict[cls_id].append(track)
""" Step 2: First association, with embedding"""
# building tracking pool for the current frame
track_pool_dict = defaultdict(list)
track_pool_dict[cls_id] = joint_stracks(
tracked_tracks_dict[cls_id], self.lost_tracks_dict[cls_id])
# Predict the current location with KalmanFilter
STrack.multi_predict(track_pool_dict[cls_id], self.motion)
if pred_embs_cls is None:
# in original ByteTrack
dists = matching.iou_distance(track_pool_dict[cls_id],
detections)
matches, u_track, u_detection = matching.linear_assignment(
dists, thresh=self.match_thres) # not self.tracked_thresh
else:
dists = matching.embedding_distance(
track_pool_dict[cls_id],
detections,
metric=self.metric_type)
dists = matching.fuse_motion(
self.motion, dists, track_pool_dict[cls_id], detections)
matches, u_track, u_detection = matching.linear_assignment(
dists, thresh=self.tracked_thresh)
for i_tracked, idet in matches:
# i_tracked is the id of the track and idet is the detection
track = track_pool_dict[cls_id][i_tracked]
det = detections[idet]
if track.state == TrackState.Tracked:
# If the track is active, add the detection to the track
track.update(detections[idet], self.frame_id)
activated_tracks_dict[cls_id].append(track)
else:
# We have obtained a detection from a track which is not active,
# hence put the track in refind_stracks list
track.re_activate(det, self.frame_id, new_id=False)
refined_tracks_dict[cls_id].append(track)
# None of the steps below happen if there are no undetected tracks.
""" Step 3: Second association, with IOU"""
if self.use_byte:
inds_low = pred_dets_dict[cls_id][:, 1:2] > self.low_conf_thres
inds_high = pred_dets_dict[cls_id][:, 1:2] < self.conf_thres
inds_second = np.logical_and(inds_low, inds_high).squeeze(-1)
pred_dets_cls_second = pred_dets_dict[cls_id][inds_second]
# association the untrack to the low score detections
if len(pred_dets_cls_second) > 0:
if pred_embs_dict[cls_id] is None:
# in original ByteTrack
detections_second = [
STrack(
STrack.tlbr_to_tlwh(tlbrs[2:6]),
tlbrs[1],
cls_id,
30,
temp_feat=None)
for tlbrs in pred_dets_cls_second
]
else:
pred_embs_cls_second = pred_embs_dict[cls_id][
inds_second]
detections_second = [
STrack(
STrack.tlbr_to_tlwh(tlbrs[2:6]), tlbrs[1],
cls_id, 30, temp_feat) for (tlbrs, temp_feat) in
zip(pred_dets_cls_second, pred_embs_cls_second)
]
else:
detections_second = []
r_tracked_stracks = [
track_pool_dict[cls_id][i] for i in u_track
if track_pool_dict[cls_id][i].state == TrackState.Tracked
]
dists = matching.iou_distance(r_tracked_stracks,
detections_second)
matches, u_track, u_detection_second = matching.linear_assignment(
dists, thresh=0.4) # not r_tracked_thresh
else:
detections = [detections[i] for i in u_detection]
r_tracked_stracks = []
for i in u_track:
if track_pool_dict[cls_id][i].state == TrackState.Tracked:
r_tracked_stracks.append(track_pool_dict[cls_id][i])
dists = matching.iou_distance(r_tracked_stracks, detections)
matches, u_track, u_detection = matching.linear_assignment(
dists, thresh=self.r_tracked_thresh)
for i_tracked, idet in matches:
track = r_tracked_stracks[i_tracked]
det = detections[
idet] if not self.use_byte else detections_second[idet]
if track.state == TrackState.Tracked:
track.update(det, self.frame_id)
activated_tracks_dict[cls_id].append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refined_tracks_dict[cls_id].append(track)
for it in u_track:
track = r_tracked_stracks[it]
if not track.state == TrackState.Lost:
track.mark_lost()
lost_tracks_dict[cls_id].append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detections[i] for i in u_detection]
dists = matching.iou_distance(unconfirmed_dict[cls_id], detections)
matches, u_unconfirmed, u_detection = matching.linear_assignment(
dists, thresh=self.unconfirmed_thresh)
for i_tracked, idet in matches:
unconfirmed_dict[cls_id][i_tracked].update(detections[idet],
self.frame_id)
activated_tracks_dict[cls_id].append(unconfirmed_dict[cls_id][
i_tracked])
for it in u_unconfirmed:
track = unconfirmed_dict[cls_id][it]
track.mark_removed()
removed_tracks_dict[cls_id].append(track)
""" Step 4: Init new stracks"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.motion, self.frame_id)
activated_tracks_dict[cls_id].append(track)
""" Step 5: Update state"""
for track in self.lost_tracks_dict[cls_id]:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_tracks_dict[cls_id].append(track)
self.tracked_tracks_dict[cls_id] = [
t for t in self.tracked_tracks_dict[cls_id]
if t.state == TrackState.Tracked
]
self.tracked_tracks_dict[cls_id] = joint_stracks(
self.tracked_tracks_dict[cls_id], activated_tracks_dict[cls_id])
self.tracked_tracks_dict[cls_id] = joint_stracks(
self.tracked_tracks_dict[cls_id], refined_tracks_dict[cls_id])
self.lost_tracks_dict[cls_id] = sub_stracks(
self.lost_tracks_dict[cls_id], self.tracked_tracks_dict[cls_id])
self.lost_tracks_dict[cls_id].extend(lost_tracks_dict[cls_id])
self.lost_tracks_dict[cls_id] = sub_stracks(
self.lost_tracks_dict[cls_id], self.removed_tracks_dict[cls_id])
self.removed_tracks_dict[cls_id].extend(removed_tracks_dict[cls_id])
self.tracked_tracks_dict[cls_id], self.lost_tracks_dict[
cls_id] = remove_duplicate_stracks(
self.tracked_tracks_dict[cls_id],
self.lost_tracks_dict[cls_id])
# get scores of lost tracks
output_tracks_dict[cls_id] = [
track for track in self.tracked_tracks_dict[cls_id]
if track.is_activated
]
logger.debug('===========Frame {}=========='.format(self.frame_id))
logger.debug('Activated: {}'.format(
[track.track_id for track in activated_tracks_dict[cls_id]]))
logger.debug('Refind: {}'.format(
[track.track_id for track in refined_tracks_dict[cls_id]]))
logger.debug('Lost: {}'.format(
[track.track_id for track in lost_tracks_dict[cls_id]]))
logger.debug('Removed: {}'.format(
[track.track_id for track in removed_tracks_dict[cls_id]]))
return output_tracks_dict

View File

@@ -0,0 +1,371 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/noahcao/OC_SORT/blob/master/trackers/ocsort_tracker/ocsort.py
"""
import numpy as np
from ..matching.ocsort_matching import associate, linear_assignment, iou_batch, associate_only_iou
from ..motion.ocsort_kalman_filter import OCSORTKalmanFilter
from ppdet.core.workspace import register, serializable
def k_previous_obs(observations, cur_age, k):
if len(observations) == 0:
return [-1, -1, -1, -1, -1]
for i in range(k):
dt = k - i
if cur_age - dt in observations:
return observations[cur_age - dt]
max_age = max(observations.keys())
return observations[max_age]
def convert_bbox_to_z(bbox):
"""
Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
[x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
the aspect ratio
"""
w = bbox[2] - bbox[0]
h = bbox[3] - bbox[1]
x = bbox[0] + w / 2.
y = bbox[1] + h / 2.
s = w * h # scale is just area
r = w / float(h + 1e-6)
return np.array([x, y, s, r]).reshape((4, 1))
def convert_x_to_bbox(x, score=None):
"""
Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
[x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
"""
w = np.sqrt(x[2] * x[3])
h = x[2] / w
if (score == None):
return np.array(
[x[0] - w / 2., x[1] - h / 2., x[0] + w / 2.,
x[1] + h / 2.]).reshape((1, 4))
else:
score = np.array([score])
return np.array([
x[0] - w / 2., x[1] - h / 2., x[0] + w / 2., x[1] + h / 2., score
]).reshape((1, 5))
def speed_direction(bbox1, bbox2):
cx1, cy1 = (bbox1[0] + bbox1[2]) / 2.0, (bbox1[1] + bbox1[3]) / 2.0
cx2, cy2 = (bbox2[0] + bbox2[2]) / 2.0, (bbox2[1] + bbox2[3]) / 2.0
speed = np.array([cy2 - cy1, cx2 - cx1])
norm = np.sqrt((cy2 - cy1)**2 + (cx2 - cx1)**2) + 1e-6
return speed / norm
class KalmanBoxTracker(object):
"""
This class represents the internal state of individual tracked objects observed as bbox.
Args:
bbox (np.array): bbox in [x1,y1,x2,y2,score] format.
delta_t (int): delta_t of previous observation
"""
count = 0
def __init__(self, bbox, delta_t=3):
self.kf = OCSORTKalmanFilter(dim_x=7, dim_z=4)
self.kf.F = np.array([[1., 0, 0, 0, 1., 0, 0], [0, 1., 0, 0, 0, 1., 0],
[0, 0, 1., 0, 0, 0, 1], [0, 0, 0, 1., 0, 0, 0],
[0, 0, 0, 0, 1., 0, 0], [0, 0, 0, 0, 0, 1., 0],
[0, 0, 0, 0, 0, 0, 1.]])
self.kf.H = np.array([[1., 0, 0, 0, 0, 0, 0], [0, 1., 0, 0, 0, 0, 0],
[0, 0, 1., 0, 0, 0, 0], [0, 0, 0, 1., 0, 0, 0]])
self.kf.R[2:, 2:] *= 10.
self.kf.P[4:, 4:] *= 1000.
# give high uncertainty to the unobservable initial velocities
self.kf.P *= 10.
self.kf.Q[-1, -1] *= 0.01
self.kf.Q[4:, 4:] *= 0.01
self.score = bbox[4]
self.kf.x[:4] = convert_bbox_to_z(bbox)
self.time_since_update = 0
self.id = KalmanBoxTracker.count
KalmanBoxTracker.count += 1
self.history = []
self.hits = 0
self.hit_streak = 0
self.age = 0
"""
NOTE: [-1,-1,-1,-1,-1] is a compromising placeholder for non-observation status, the same for the return of
function k_previous_obs. It is ugly and I do not like it. But to support generate observation array in a
fast and unified way, which you would see below k_observations = np.array([k_previous_obs(...]]), let's bear it for now.
"""
self.last_observation = np.array([-1, -1, -1, -1, -1]) # placeholder
self.observations = dict()
self.history_observations = []
self.velocity = None
self.delta_t = delta_t
def update(self, bbox, angle_cost=False):
"""
Updates the state vector with observed bbox.
"""
if bbox is not None:
if angle_cost and self.last_observation.sum(
) >= 0: # no previous observation
previous_box = None
for i in range(self.delta_t):
dt = self.delta_t - i
if self.age - dt in self.observations:
previous_box = self.observations[self.age - dt]
break
if previous_box is None:
previous_box = self.last_observation
"""
Estimate the track speed direction with observations \Delta t steps away
"""
self.velocity = speed_direction(previous_box, bbox)
"""
Insert new observations. This is a ugly way to maintain both self.observations
and self.history_observations. Bear it for the moment.
"""
self.last_observation = bbox
self.observations[self.age] = bbox
self.history_observations.append(bbox)
self.time_since_update = 0
self.history = []
self.hits += 1
self.hit_streak += 1
self.kf.update(convert_bbox_to_z(bbox))
else:
self.kf.update(bbox)
def predict(self):
"""
Advances the state vector and returns the predicted bounding box estimate.
"""
if ((self.kf.x[6] + self.kf.x[2]) <= 0):
self.kf.x[6] *= 0.0
self.kf.predict()
self.age += 1
if (self.time_since_update > 0):
self.hit_streak = 0
self.time_since_update += 1
self.history.append(convert_x_to_bbox(self.kf.x, score=self.score))
return self.history[-1]
def get_state(self):
return convert_x_to_bbox(self.kf.x, score=self.score)
@register
@serializable
class OCSORTTracker(object):
"""
OCSORT tracker, support single class
Args:
det_thresh (float): threshold of detection score
max_age (int): maximum number of missed misses before a track is deleted
min_hits (int): minimum hits for associate
iou_threshold (float): iou threshold for associate
delta_t (int): delta_t of previous observation
inertia (float): vdc_weight of angle_diff_cost for associate
vertical_ratio (float): w/h, the vertical ratio of the bbox to filter
bad results. If set <= 0 means no need to filter bboxesusually set
1.6 for pedestrian tracking.
min_box_area (int): min box area to filter out low quality boxes
use_byte (bool): Whether use ByteTracker, default False
"""
def __init__(self,
det_thresh=0.6,
max_age=30,
min_hits=3,
iou_threshold=0.3,
delta_t=3,
inertia=0.2,
vertical_ratio=-1,
min_box_area=0,
use_byte=False,
use_angle_cost=False):
self.det_thresh = det_thresh
self.max_age = max_age
self.min_hits = min_hits
self.iou_threshold = iou_threshold
self.delta_t = delta_t
self.inertia = inertia
self.vertical_ratio = vertical_ratio
self.min_box_area = min_box_area
self.use_byte = use_byte
self.use_angle_cost = use_angle_cost
self.trackers = []
self.frame_count = 0
KalmanBoxTracker.count = 0
def update(self, pred_dets, pred_embs=None):
"""
Args:
pred_dets (np.array): Detection results of the image, the shape is
[N, 6], means 'cls_id, score, x0, y0, x1, y1'.
pred_embs (np.array): Embedding results of the image, the shape is
[N, 128] or [N, 512], default as None.
Return:
tracking boxes (np.array): [M, 6], means 'x0, y0, x1, y1, score, id'.
"""
if pred_dets is None:
return np.empty((0, 6))
self.frame_count += 1
bboxes = pred_dets[:, 2:]
scores = pred_dets[:, 1:2]
dets = np.concatenate((bboxes, scores), axis=1)
scores = scores.squeeze(-1)
inds_low = scores > 0.1
inds_high = scores < self.det_thresh
inds_second = np.logical_and(inds_low, inds_high)
# self.det_thresh > score > 0.1, for second matching
dets_second = dets[inds_second] # detections for second matching
remain_inds = scores > self.det_thresh
dets = dets[remain_inds]
# get predicted locations from existing trackers.
trks = np.zeros((len(self.trackers), 5))
to_del = []
ret = []
for t, trk in enumerate(trks):
pos = self.trackers[t].predict()[0]
trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
if np.any(np.isnan(pos)):
to_del.append(t)
trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
for t in reversed(to_del):
self.trackers.pop(t)
if self.use_angle_cost:
velocities = np.array([
trk.velocity if trk.velocity is not None else np.array((0, 0))
for trk in self.trackers
])
k_observations = np.array([
k_previous_obs(trk.observations, trk.age, self.delta_t)
for trk in self.trackers
])
last_boxes = np.array([trk.last_observation for trk in self.trackers])
"""
First round of association
"""
if self.use_angle_cost:
matched, unmatched_dets, unmatched_trks = associate(
dets, trks, self.iou_threshold, velocities, k_observations,
self.inertia)
else:
matched, unmatched_dets, unmatched_trks = associate_only_iou(
dets, trks, self.iou_threshold)
for m in matched:
self.trackers[m[1]].update(
dets[m[0], :], angle_cost=self.use_angle_cost)
"""
Second round of associaton by OCR
"""
# BYTE association
if self.use_byte and len(dets_second) > 0 and unmatched_trks.shape[
0] > 0:
u_trks = trks[unmatched_trks]
iou_left = iou_batch(
dets_second,
u_trks) # iou between low score detections and unmatched tracks
iou_left = np.array(iou_left)
if iou_left.max() > self.iou_threshold:
"""
NOTE: by using a lower threshold, e.g., self.iou_threshold - 0.1, you may
get a higher performance especially on MOT17/MOT20 datasets. But we keep it
uniform here for simplicity
"""
matched_indices = linear_assignment(-iou_left)
to_remove_trk_indices = []
for m in matched_indices:
det_ind, trk_ind = m[0], unmatched_trks[m[1]]
if iou_left[m[0], m[1]] < self.iou_threshold:
continue
self.trackers[trk_ind].update(
dets_second[det_ind, :], angle_cost=self.use_angle_cost)
to_remove_trk_indices.append(trk_ind)
unmatched_trks = np.setdiff1d(unmatched_trks,
np.array(to_remove_trk_indices))
if unmatched_dets.shape[0] > 0 and unmatched_trks.shape[0] > 0:
left_dets = dets[unmatched_dets]
left_trks = last_boxes[unmatched_trks]
iou_left = iou_batch(left_dets, left_trks)
iou_left = np.array(iou_left)
if iou_left.max() > self.iou_threshold:
"""
NOTE: by using a lower threshold, e.g., self.iou_threshold - 0.1, you may
get a higher performance especially on MOT17/MOT20 datasets. But we keep it
uniform here for simplicity
"""
rematched_indices = linear_assignment(-iou_left)
to_remove_det_indices = []
to_remove_trk_indices = []
for m in rematched_indices:
det_ind, trk_ind = unmatched_dets[m[0]], unmatched_trks[m[
1]]
if iou_left[m[0], m[1]] < self.iou_threshold:
continue
self.trackers[trk_ind].update(
dets[det_ind, :], angle_cost=self.use_angle_cost)
to_remove_det_indices.append(det_ind)
to_remove_trk_indices.append(trk_ind)
unmatched_dets = np.setdiff1d(unmatched_dets,
np.array(to_remove_det_indices))
unmatched_trks = np.setdiff1d(unmatched_trks,
np.array(to_remove_trk_indices))
for m in unmatched_trks:
self.trackers[m].update(None)
# create and initialise new trackers for unmatched detections
for i in unmatched_dets:
trk = KalmanBoxTracker(dets[i, :], delta_t=self.delta_t)
self.trackers.append(trk)
i = len(self.trackers)
for trk in reversed(self.trackers):
if trk.last_observation.sum() < 0:
d = trk.get_state()[0]
else:
d = trk.last_observation # tlbr + score
if (trk.time_since_update < 1) and (
trk.hit_streak >= self.min_hits or
self.frame_count <= self.min_hits):
# +1 as MOT benchmark requires positive
ret.append(np.concatenate((d, [trk.id + 1])).reshape(1, -1))
i -= 1
# remove dead tracklet
if (trk.time_since_update > self.max_age):
self.trackers.pop(i)
if (len(ret) > 0):
return np.concatenate(ret)
return np.empty((0, 6))

View File

@@ -0,0 +1,265 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import cv2
import time
import numpy as np
from .visualization import plot_tracking_dict, plot_tracking
__all__ = [
'MOTTimer',
'Detection',
'write_mot_results',
'save_vis_results',
'load_det_results',
'preprocess_reid',
'get_crops',
'clip_box',
'scale_coords',
]
class MOTTimer(object):
"""
This class used to compute and print the current FPS while evaling.
"""
def __init__(self):
self.total_time = 0.
self.calls = 0
self.start_time = 0.
self.diff = 0.
self.average_time = 0.
self.duration = 0.
def tic(self):
# using time.time instead of time.clock because time time.clock
# does not normalize for multithreading
self.start_time = time.time()
def toc(self, average=True):
self.diff = time.time() - self.start_time
self.total_time += self.diff
self.calls += 1
self.average_time = self.total_time / self.calls
if average:
self.duration = self.average_time
else:
self.duration = self.diff
return self.duration
def clear(self):
self.total_time = 0.
self.calls = 0
self.start_time = 0.
self.diff = 0.
self.average_time = 0.
self.duration = 0.
class Detection(object):
"""
This class represents a bounding box detection in a single image.
Args:
tlwh (Tensor): Bounding box in format `(top left x, top left y,
width, height)`.
score (Tensor): Bounding box confidence score.
feature (Tensor): A feature vector that describes the object
contained in this image.
cls_id (Tensor): Bounding box category id.
"""
def __init__(self, tlwh, score, feature, cls_id):
self.tlwh = np.asarray(tlwh, dtype=np.float32)
self.score = float(score)
self.feature = np.asarray(feature, dtype=np.float32)
self.cls_id = int(cls_id)
def to_tlbr(self):
"""
Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
`(top left, bottom right)`.
"""
ret = self.tlwh.copy()
ret[2:] += ret[:2]
return ret
def to_xyah(self):
"""
Convert bounding box to format `(center x, center y, aspect ratio,
height)`, where the aspect ratio is `width / height`.
"""
ret = self.tlwh.copy()
ret[:2] += ret[2:] / 2
ret[2] /= ret[3]
return ret
def write_mot_results(filename, results, data_type='mot', num_classes=1):
# support single and multi classes
if data_type in ['mot', 'mcmot']:
save_format = '{frame},{id},{x1},{y1},{w},{h},{score},{cls_id},-1,-1\n'
elif data_type == 'kitti':
save_format = '{frame} {id} car 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
else:
raise ValueError(data_type)
f = open(filename, 'w')
for cls_id in range(num_classes):
for frame_id, tlwhs, tscores, track_ids in results[cls_id]:
if data_type == 'kitti':
frame_id -= 1
for tlwh, score, track_id in zip(tlwhs, tscores, track_ids):
if track_id < 0: continue
if data_type == 'mot':
cls_id = -1
x1, y1, w, h = tlwh
x2, y2 = x1 + w, y1 + h
line = save_format.format(
frame=frame_id,
id=track_id,
x1=x1,
y1=y1,
x2=x2,
y2=y2,
w=w,
h=h,
score=score,
cls_id=cls_id)
f.write(line)
print('MOT results save in {}'.format(filename))
def save_vis_results(data,
frame_id,
online_ids,
online_tlwhs,
online_scores,
average_time,
show_image,
save_dir,
num_classes=1,
ids2names=[]):
if show_image or save_dir is not None:
assert 'ori_image' in data
img0 = data['ori_image'].numpy()[0]
if online_ids is None:
online_im = img0
else:
if isinstance(online_tlwhs, dict):
online_im = plot_tracking_dict(
img0,
num_classes,
online_tlwhs,
online_ids,
online_scores,
frame_id=frame_id,
fps=1. / average_time,
ids2names=ids2names)
else:
online_im = plot_tracking(
img0,
online_tlwhs,
online_ids,
online_scores,
frame_id=frame_id,
fps=1. / average_time,
ids2names=ids2names)
if show_image:
cv2.imshow('online_im', online_im)
if save_dir is not None:
cv2.imwrite(
os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), online_im)
def load_det_results(det_file, num_frames):
assert os.path.exists(det_file) and os.path.isfile(det_file), \
'{} is not exist or not a file.'.format(det_file)
labels = np.loadtxt(det_file, dtype='float32', delimiter=',')
assert labels.shape[1] == 7, \
"Each line of {} should have 7 items: '[frame_id],[x0],[y0],[w],[h],[score],[class_id]'.".format(det_file)
results_list = []
for frame_i in range(num_frames):
results = {'bbox': [], 'score': [], 'cls_id': []}
lables_with_frame = labels[labels[:, 0] == frame_i + 1]
# each line of lables_with_frame:
# [frame_id],[x0],[y0],[w],[h],[score],[class_id]
for l in lables_with_frame:
results['bbox'].append(l[1:5])
results['score'].append(l[5:6])
results['cls_id'].append(l[6:7])
results_list.append(results)
return results_list
def scale_coords(coords, input_shape, im_shape, scale_factor):
# Note: ratio has only one value, scale_factor[0] == scale_factor[1]
#
# This function only used for JDE YOLOv3 or other detectors with
# LetterBoxResize and JDEBBoxPostProcess, coords output from detector had
# not scaled back to the origin image.
ratio = scale_factor[0]
pad_w = (input_shape[1] - int(im_shape[1])) / 2
pad_h = (input_shape[0] - int(im_shape[0])) / 2
coords[:, 0::2] -= pad_w
coords[:, 1::2] -= pad_h
coords[:, 0:4] /= ratio
coords[:, :4] = np.clip(coords[:, :4], a_min=0, a_max=coords[:, :4].max())
return coords.round()
def clip_box(xyxy, ori_image_shape):
H, W = ori_image_shape
xyxy[:, 0::2] = np.clip(xyxy[:, 0::2], a_min=0, a_max=W)
xyxy[:, 1::2] = np.clip(xyxy[:, 1::2], a_min=0, a_max=H)
w = xyxy[:, 2:3] - xyxy[:, 0:1]
h = xyxy[:, 3:4] - xyxy[:, 1:2]
mask = np.logical_and(h > 0, w > 0)
keep_idx = np.nonzero(mask)
return xyxy[keep_idx[0]], keep_idx
def get_crops(xyxy, ori_img, w, h):
crops = []
xyxy = xyxy.astype(np.int64)
ori_img = ori_img.numpy()
ori_img = np.squeeze(ori_img, axis=0).transpose(1, 0, 2) # [h,w,3]->[w,h,3]
for i, bbox in enumerate(xyxy):
crop = ori_img[bbox[0]:bbox[2], bbox[1]:bbox[3], :]
crops.append(crop)
crops = preprocess_reid(crops, w, h)
return crops
def preprocess_reid(imgs,
w=64,
h=192,
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]):
im_batch = []
for img in imgs:
img = cv2.resize(img, (w, h))
img = img[:, :, ::-1].astype('float32').transpose((2, 0, 1)) / 255
img_mean = np.array(mean).reshape((3, 1, 1))
img_std = np.array(std).reshape((3, 1, 1))
img -= img_mean
img /= img_std
img = np.expand_dims(img, axis=0)
im_batch.append(img)
im_batch = np.concatenate(im_batch, 0)
return im_batch

View File

@@ -0,0 +1,146 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cv2
import numpy as np
def get_color(idx):
idx = idx * 3
color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
return color
def plot_tracking(image,
tlwhs,
obj_ids,
scores=None,
frame_id=0,
fps=0.,
ids2names=[]):
im = np.ascontiguousarray(np.copy(image))
im_h, im_w = im.shape[:2]
top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255
text_scale = max(1, image.shape[1] / 1600.)
text_thickness = 2
line_thickness = max(1, int(image.shape[1] / 500.))
radius = max(5, int(im_w / 140.))
cv2.putText(
im,
'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)),
(0, int(15 * text_scale)),
cv2.FONT_HERSHEY_PLAIN,
text_scale, (0, 0, 255),
thickness=2)
for i, tlwh in enumerate(tlwhs):
x1, y1, w, h = tlwh
intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
obj_id = int(obj_ids[i])
id_text = '{}'.format(int(obj_id))
if ids2names != []:
assert len(
ids2names) == 1, "plot_tracking only supports single classes."
id_text = '{}_'.format(ids2names[0]) + id_text
_line_thickness = 1 if obj_id <= 0 else line_thickness
color = get_color(abs(obj_id))
cv2.rectangle(
im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness)
cv2.putText(
im,
id_text, (intbox[0], intbox[1] - 10),
cv2.FONT_HERSHEY_PLAIN,
text_scale, (0, 0, 255),
thickness=text_thickness)
if scores is not None:
text = '{:.2f}'.format(float(scores[i]))
cv2.putText(
im,
text, (intbox[0], intbox[1] + 10),
cv2.FONT_HERSHEY_PLAIN,
text_scale, (0, 255, 255),
thickness=text_thickness)
return im
def plot_tracking_dict(image,
num_classes,
tlwhs_dict,
obj_ids_dict,
scores_dict,
frame_id=0,
fps=0.,
ids2names=[]):
im = np.ascontiguousarray(np.copy(image))
im_h, im_w = im.shape[:2]
top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255
text_scale = max(1, image.shape[1] / 1600.)
text_thickness = 2
line_thickness = max(1, int(image.shape[1] / 500.))
radius = max(5, int(im_w / 140.))
for cls_id in range(num_classes):
tlwhs = tlwhs_dict[cls_id]
obj_ids = obj_ids_dict[cls_id]
scores = scores_dict[cls_id]
cv2.putText(
im,
'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)),
(0, int(15 * text_scale)),
cv2.FONT_HERSHEY_PLAIN,
text_scale, (0, 0, 255),
thickness=2)
for i, tlwh in enumerate(tlwhs):
x1, y1, w, h = tlwh
intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
obj_id = int(obj_ids[i])
id_text = '{}'.format(int(obj_id))
if ids2names != []:
id_text = '{}_{}'.format(ids2names[cls_id], id_text)
else:
id_text = 'class{}_{}'.format(cls_id, id_text)
_line_thickness = 1 if obj_id <= 0 else line_thickness
color = get_color(abs(obj_id))
cv2.rectangle(
im,
intbox[0:2],
intbox[2:4],
color=color,
thickness=line_thickness)
cv2.putText(
im,
id_text, (intbox[0], intbox[1] - 10),
cv2.FONT_HERSHEY_PLAIN,
text_scale, (0, 0, 255),
thickness=text_thickness)
if scores is not None:
text = '{:.2f}'.format(float(scores[i]))
cv2.putText(
im,
text, (intbox[0], intbox[1] + 10),
cv2.FONT_HERSHEY_PLAIN,
text_scale, (0, 255, 255),
thickness=text_thickness)
return im