更换文档检测模型

2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions
--- a/paddle_detection/deploy/pptracking/python/mot/mtmct/init.py
+++ b/paddle_detection/deploy/pptracking/python/mot/mtmct/init.py
@@ -0,0 +1,24 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import utils
+from . import postprocess
+from .utils import *
+from .postprocess import *
+
+# The following codes are strongly related to zone and camera parameters
+from . import camera_utils
+from . import zone
+from .camera_utils import *
+from .zone import *
--- a/paddle_detection/deploy/pptracking/python/mot/mtmct/camera_utils.py
+++ b/paddle_detection/deploy/pptracking/python/mot/mtmct/camera_utils.py
@@ -0,0 +1,288 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is based on https://github.com/LCFractal/AIC21-MTMC/tree/main/reid/reid-matching/tools
+
+Note: The following codes are strongly related to camera parameters of the AIC21 test-set S06,
+    so they can only be used in S06, and can not be used for other MTMCT datasets.
+"""
+
+import numpy as np
+try:
+    from sklearn.cluster import AgglomerativeClustering
+except:
+    print(
+        'Warning: Unable to use MTMCT in PP-Tracking, please install sklearn, for example: `pip install sklearn`'
+    )
+    pass
+from .utils import get_dire, get_match, get_cid_tid, combin_feature, combin_cluster
+from .utils import normalize, intracam_ignore, visual_rerank
+
+__all__ = [
+    'st_filter',
+    'get_labels_with_camera',
+]
+
+CAM_DIST = [[0, 40, 55, 100, 120, 145], [40, 0, 15, 60, 80, 105],
+            [55, 15, 0, 40, 65, 90], [100, 60, 40, 0, 20, 45],
+            [120, 80, 65, 20, 0, 25], [145, 105, 90, 45, 25, 0]]
+
+
+def st_filter(st_mask, cid_tids, cid_tid_dict):
+    count = len(cid_tids)
+    for i in range(count):
+        i_tracklet = cid_tid_dict[cid_tids[i]]
+        i_cid = i_tracklet['cam']
+        i_dire = get_dire(i_tracklet['zone_list'], i_cid)
+        i_iot = i_tracklet['io_time']
+        for j in range(count):
+            j_tracklet = cid_tid_dict[cid_tids[j]]
+            j_cid = j_tracklet['cam']
+            j_dire = get_dire(j_tracklet['zone_list'], j_cid)
+            j_iot = j_tracklet['io_time']
+
+            match_dire = True
+            cam_dist = CAM_DIST[i_cid - 41][j_cid - 41]
+            # if time overlopped
+            if i_iot[0] - cam_dist < j_iot[0] and j_iot[0] < i_iot[
+                    1] + cam_dist:
+                match_dire = False
+            if i_iot[0] - cam_dist < j_iot[1] and j_iot[1] < i_iot[
+                    1] + cam_dist:
+                match_dire = False
+
+            # not match after go out
+            if i_dire[1] in [1, 2]:  # i out
+                if i_iot[0] < j_iot[1] + cam_dist:
+                    match_dire = False
+
+            if i_dire[1] in [1, 2]:
+                if i_dire[0] in [3] and i_cid > j_cid:
+                    match_dire = False
+                if i_dire[0] in [4] and i_cid < j_cid:
+                    match_dire = False
+
+            if i_cid in [41] and i_dire[1] in [4]:
+                if i_iot[0] < j_iot[1] + cam_dist:
+                    match_dire = False
+                if i_iot[1] > 199:
+                    match_dire = False
+            if i_cid in [46] and i_dire[1] in [3]:
+                if i_iot[0] < j_iot[1] + cam_dist:
+                    match_dire = False
+
+            # match after come into
+            if i_dire[0] in [1, 2]:
+                if i_iot[1] > j_iot[0] - cam_dist:
+                    match_dire = False
+
+            if i_dire[0] in [1, 2]:
+                if i_dire[1] in [3] and i_cid > j_cid:
+                    match_dire = False
+                if i_dire[1] in [4] and i_cid < j_cid:
+                    match_dire = False
+
+            is_ignore = False
+            if ((i_dire[0] == i_dire[1] and i_dire[0] in [3, 4]) or
+                (j_dire[0] == j_dire[1] and j_dire[0] in [3, 4])):
+                is_ignore = True
+
+            if not is_ignore:
+                # direction conflict
+                if (i_dire[0] in [3] and j_dire[0] in [4]) or (
+                        i_dire[1] in [3] and j_dire[1] in [4]):
+                    match_dire = False
+                # filter before going next scene
+                if i_dire[1] in [3] and i_cid < j_cid:
+                    if i_iot[1] > j_iot[1] - cam_dist:
+                        match_dire = False
+                if i_dire[1] in [4] and i_cid > j_cid:
+                    if i_iot[1] > j_iot[1] - cam_dist:
+                        match_dire = False
+
+                if i_dire[0] in [3] and i_cid < j_cid:
+                    if i_iot[0] < j_iot[0] + cam_dist:
+                        match_dire = False
+                if i_dire[0] in [4] and i_cid > j_cid:
+                    if i_iot[0] < j_iot[0] + cam_dist:
+                        match_dire = False
+                ## 3-30
+                ## 4-1
+                if i_dire[0] in [3] and i_cid > j_cid:
+                    if i_iot[1] > j_iot[0] - cam_dist:
+                        match_dire = False
+                if i_dire[0] in [4] and i_cid < j_cid:
+                    if i_iot[1] > j_iot[0] - cam_dist:
+                        match_dire = False
+                # filter before going next scene
+                ## 4-7
+                if i_dire[1] in [3] and i_cid > j_cid:
+                    if i_iot[0] < j_iot[1] + cam_dist:
+                        match_dire = False
+                if i_dire[1] in [4] and i_cid < j_cid:
+                    if i_iot[0] < j_iot[1] + cam_dist:
+                        match_dire = False
+            else:
+                if i_iot[1] > 199:
+                    if i_dire[0] in [3] and i_cid < j_cid:
+                        if i_iot[0] < j_iot[0] + cam_dist:
+                            match_dire = False
+                    if i_dire[0] in [4] and i_cid > j_cid:
+                        if i_iot[0] < j_iot[0] + cam_dist:
+                            match_dire = False
+                    if i_dire[0] in [3] and i_cid > j_cid:
+                        match_dire = False
+                    if i_dire[0] in [4] and i_cid < j_cid:
+                        match_dire = False
+                if i_iot[0] < 1:
+                    if i_dire[1] in [3] and i_cid > j_cid:
+                        match_dire = False
+                    if i_dire[1] in [4] and i_cid < j_cid:
+                        match_dire = False
+
+            if not match_dire:
+                st_mask[i, j] = 0.0
+                st_mask[j, i] = 0.0
+    return st_mask
+
+
+def subcam_list(cid_tid_dict, cid_tids):
+    sub_3_4 = dict()
+    sub_4_3 = dict()
+    for cid_tid in cid_tids:
+        cid, tid = cid_tid
+        tracklet = cid_tid_dict[cid_tid]
+        zs, ze = get_dire(tracklet['zone_list'], cid)
+        if zs in [3] and cid not in [46]:  # 4 to 3
+            if not cid + 1 in sub_4_3:
+                sub_4_3[cid + 1] = []
+            sub_4_3[cid + 1].append(cid_tid)
+        if ze in [4] and cid not in [41]:  # 4 to 3
+            if not cid in sub_4_3:
+                sub_4_3[cid] = []
+            sub_4_3[cid].append(cid_tid)
+        if zs in [4] and cid not in [41]:  # 3 to 4
+            if not cid - 1 in sub_3_4:
+                sub_3_4[cid - 1] = []
+            sub_3_4[cid - 1].append(cid_tid)
+        if ze in [3] and cid not in [46]:  # 3 to 4
+            if not cid in sub_3_4:
+                sub_3_4[cid] = []
+            sub_3_4[cid].append(cid_tid)
+    sub_cid_tids = dict()
+    for i in sub_3_4:
+        sub_cid_tids[(i, i + 1)] = sub_3_4[i]
+    for i in sub_4_3:
+        sub_cid_tids[(i, i - 1)] = sub_4_3[i]
+    return sub_cid_tids
+
+
+def subcam_list2(cid_tid_dict, cid_tids):
+    sub_dict = dict()
+    for cid_tid in cid_tids:
+        cid, tid = cid_tid
+        if cid not in [41]:
+            if not cid in sub_dict:
+                sub_dict[cid] = []
+            sub_dict[cid].append(cid_tid)
+        if cid not in [46]:
+            if not cid + 1 in sub_dict:
+                sub_dict[cid + 1] = []
+            sub_dict[cid + 1].append(cid_tid)
+    return sub_dict
+
+
+def get_sim_matrix(cid_tid_dict,
+                   cid_tids,
+                   use_ff=True,
+                   use_rerank=True,
+                   use_st_filter=False):
+    # Note: camera releated get_sim_matrix function,
+    # which is different from the one in utils.py.
+    count = len(cid_tids)
+
+    q_arr = np.array(
+        [cid_tid_dict[cid_tids[i]]['mean_feat'] for i in range(count)])
+    g_arr = np.array(
+        [cid_tid_dict[cid_tids[i]]['mean_feat'] for i in range(count)])
+    q_arr = normalize(q_arr, axis=1)
+    g_arr = normalize(g_arr, axis=1)
+
+    st_mask = np.ones((count, count), dtype=np.float32)
+    st_mask = intracam_ignore(st_mask, cid_tids)
+
+    # different from utils.py
+    if use_st_filter:
+        st_mask = st_filter(st_mask, cid_tids, cid_tid_dict)
+
+    visual_sim_matrix = visual_rerank(
+        q_arr, g_arr, cid_tids, use_ff=use_ff, use_rerank=use_rerank)
+    visual_sim_matrix = visual_sim_matrix.astype('float32')
+
+    np.set_printoptions(precision=3)
+    sim_matrix = visual_sim_matrix * st_mask
+
+    np.fill_diagonal(sim_matrix, 0)
+    return sim_matrix
+
+
+def get_labels_with_camera(cid_tid_dict,
+                           cid_tids,
+                           use_ff=True,
+                           use_rerank=True,
+                           use_st_filter=False):
+    # 1st cluster
+    sub_cid_tids = subcam_list(cid_tid_dict, cid_tids)
+    sub_labels = dict()
+    dis_thrs = [0.7, 0.5, 0.5, 0.5, 0.5, 0.7, 0.5, 0.5, 0.5, 0.5]
+
+    for i, sub_c_to_c in enumerate(sub_cid_tids):
+        sim_matrix = get_sim_matrix(
+            cid_tid_dict,
+            sub_cid_tids[sub_c_to_c],
+            use_ff=use_ff,
+            use_rerank=use_rerank,
+            use_st_filter=use_st_filter)
+        cluster_labels = AgglomerativeClustering(
+            n_clusters=None,
+            distance_threshold=1 - dis_thrs[i],
+            affinity='precomputed',
+            linkage='complete').fit_predict(1 - sim_matrix)
+        labels = get_match(cluster_labels)
+        cluster_cid_tids = get_cid_tid(labels, sub_cid_tids[sub_c_to_c])
+        sub_labels[sub_c_to_c] = cluster_cid_tids
+    labels, sub_cluster = combin_cluster(sub_labels, cid_tids)
+
+    # 2nd cluster
+    cid_tid_dict_new = combin_feature(cid_tid_dict, sub_cluster)
+    sub_cid_tids = subcam_list2(cid_tid_dict_new, cid_tids)
+    sub_labels = dict()
+    for i, sub_c_to_c in enumerate(sub_cid_tids):
+        sim_matrix = get_sim_matrix(
+            cid_tid_dict_new,
+            sub_cid_tids[sub_c_to_c],
+            use_ff=use_ff,
+            use_rerank=use_rerank,
+            use_st_filter=use_st_filter)
+        cluster_labels = AgglomerativeClustering(
+            n_clusters=None,
+            distance_threshold=1 - 0.1,
+            affinity='precomputed',
+            linkage='complete').fit_predict(1 - sim_matrix)
+        labels = get_match(cluster_labels)
+        cluster_cid_tids = get_cid_tid(labels, sub_cid_tids[sub_c_to_c])
+        sub_labels[sub_c_to_c] = cluster_cid_tids
+    labels, sub_cluster = combin_cluster(sub_labels, cid_tids)
+
+    return labels
--- a/paddle_detection/deploy/pptracking/python/mot/mtmct/postprocess.py
+++ b/paddle_detection/deploy/pptracking/python/mot/mtmct/postprocess.py
@@ -0,0 +1,383 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is based on https://github.com/LCFractal/AIC21-MTMC/tree/main/reid/reid-matching/tools
+"""
+
+import os
+import re
+import cv2
+from tqdm import tqdm
+import numpy as np
+try:
+    import motmetrics as mm
+except:
+    print(
+        'Warning: Unable to use motmetrics in MTMCT in PP-Tracking, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics'
+    )
+    pass
+from functools import reduce
+
+from .utils import parse_pt_gt, parse_pt, compare_dataframes_mtmc
+from .utils import get_labels, getData, gen_new_mot
+from .camera_utils import get_labels_with_camera
+from .zone import Zone
+from ..visualize import plot_tracking
+
+__all__ = [
+    'trajectory_fusion',
+    'sub_cluster',
+    'gen_res',
+    'print_mtmct_result',
+    'get_mtmct_matching_results',
+    'save_mtmct_crops',
+    'save_mtmct_vis_results',
+]
+
+
+def trajectory_fusion(mot_feature, cid, cid_bias, use_zone=False, zone_path=''):
+    cur_bias = cid_bias[cid]
+    mot_list_break = {}
+    if use_zone:
+        zones = Zone(zone_path=zone_path)
+        zones.set_cam(cid)
+        mot_list = parse_pt(mot_feature, zones)
+    else:
+        mot_list = parse_pt(mot_feature)
+
+    if use_zone:
+        mot_list = zones.break_mot(mot_list, cid)
+        mot_list = zones.filter_mot(mot_list, cid)  # filter by zone
+        mot_list = zones.filter_bbox(mot_list, cid)  # filter bbox
+
+    mot_list_break = gen_new_mot(mot_list)  # save break feature for gen result
+
+    tid_data = dict()
+    for tid in mot_list:
+        tracklet = mot_list[tid]
+        if len(tracklet) <= 1:
+            continue
+        frame_list = list(tracklet.keys())
+        frame_list.sort()
+        # filter area too large
+        zone_list = [tracklet[f]['zone'] for f in frame_list]
+        feature_list = [
+            tracklet[f]['feat'] for f in frame_list
+            if (tracklet[f]['bbox'][3] - tracklet[f]['bbox'][1]) *
+            (tracklet[f]['bbox'][2] - tracklet[f]['bbox'][0]) > 2000
+        ]
+        if len(feature_list) < 2:
+            feature_list = [tracklet[f]['feat'] for f in frame_list]
+        io_time = [
+            cur_bias + frame_list[0] / 10., cur_bias + frame_list[-1] / 10.
+        ]
+        all_feat = np.array([feat for feat in feature_list])
+        mean_feat = np.mean(all_feat, axis=0)
+        tid_data[tid] = {
+            'cam': cid,
+            'tid': tid,
+            'mean_feat': mean_feat,
+            'zone_list': zone_list,
+            'frame_list': frame_list,
+            'tracklet': tracklet,
+            'io_time': io_time
+        }
+    return tid_data, mot_list_break
+
+
+def sub_cluster(cid_tid_dict,
+                scene_cluster,
+                use_ff=True,
+                use_rerank=True,
+                use_camera=False,
+                use_st_filter=False):
+    '''
+    cid_tid_dict: all camera_id and track_id
+    scene_cluster: like [41, 42, 43, 44, 45, 46] in AIC21 MTMCT S06 test videos
+    '''
+    assert (len(scene_cluster) != 0), "Error: scene_cluster length equals 0"
+    cid_tids = sorted(
+        [key for key in cid_tid_dict.keys() if key[0] in scene_cluster])
+    if use_camera:
+        clu = get_labels_with_camera(
+            cid_tid_dict,
+            cid_tids,
+            use_ff=use_ff,
+            use_rerank=use_rerank,
+            use_st_filter=use_st_filter)
+    else:
+        clu = get_labels(
+            cid_tid_dict,
+            cid_tids,
+            use_ff=use_ff,
+            use_rerank=use_rerank,
+            use_st_filter=use_st_filter)
+    new_clu = list()
+    for c_list in clu:
+        if len(c_list) <= 1: continue
+        cam_list = [cid_tids[c][0] for c in c_list]
+        if len(cam_list) != len(set(cam_list)): continue
+        new_clu.append([cid_tids[c] for c in c_list])
+    all_clu = new_clu
+    cid_tid_label = dict()
+    for i, c_list in enumerate(all_clu):
+        for c in c_list:
+            cid_tid_label[c] = i + 1
+    return cid_tid_label
+
+
+def gen_res(output_dir_filename,
+            scene_cluster,
+            map_tid,
+            mot_list_breaks,
+            use_roi=False,
+            roi_dir=''):
+    f_w = open(output_dir_filename, 'w')
+    for idx, mot_feature in enumerate(mot_list_breaks):
+        cid = scene_cluster[idx]
+        img_rects = parse_pt_gt(mot_feature)
+        if use_roi:
+            assert (roi_dir != ''), "Error: roi_dir is not empty!"
+            roi = cv2.imread(os.path.join(roi_dir, f'c{cid:03d}/roi.jpg'), 0)
+            height, width = roi.shape
+
+        for fid in img_rects:
+            tid_rects = img_rects[fid]
+            fid = int(fid) + 1
+            for tid_rect in tid_rects:
+                tid = tid_rect[0]
+                rect = tid_rect[1:]
+                cx = 0.5 * rect[0] + 0.5 * rect[2]
+                cy = 0.5 * rect[1] + 0.5 * rect[3]
+                w = rect[2] - rect[0]
+                w = min(w * 1.2, w + 40)
+                h = rect[3] - rect[1]
+                h = min(h * 1.2, h + 40)
+                rect[2] -= rect[0]
+                rect[3] -= rect[1]
+                rect[0] = max(0, rect[0])
+                rect[1] = max(0, rect[1])
+                x1, y1 = max(0, cx - 0.5 * w), max(0, cy - 0.5 * h)
+                if use_roi:
+                    x2, y2 = min(width, cx + 0.5 * w), min(height, cy + 0.5 * h)
+                else:
+                    x2, y2 = cx + 0.5 * w, cy + 0.5 * h
+                w, h = x2 - x1, y2 - y1
+                new_rect = list(map(int, [x1, y1, w, h]))
+                rect = list(map(int, rect))
+                if (cid, tid) in map_tid:
+                    new_tid = map_tid[(cid, tid)]
+                    f_w.write(
+                        str(cid) + ' ' + str(new_tid) + ' ' + str(fid) + ' ' +
+                        ' '.join(map(str, new_rect)) + ' -1 -1'
+                        '\n')
+    print('gen_res: write file in {}'.format(output_dir_filename))
+    f_w.close()
+
+
+def print_mtmct_result(gt_file, pred_file):
+    names = [
+        'CameraId', 'Id', 'FrameId', 'X', 'Y', 'Width', 'Height', 'Xworld',
+        'Yworld'
+    ]
+    gt = getData(gt_file, names=names)
+    pred = getData(pred_file, names=names)
+    summary = compare_dataframes_mtmc(gt, pred)
+    print('MTMCT summary: ', summary.columns.tolist())
+
+    formatters = {
+        'idf1': '{:2.2f}'.format,
+        'idp': '{:2.2f}'.format,
+        'idr': '{:2.2f}'.format,
+        'mota': '{:2.2f}'.format
+    }
+    summary = summary[['idf1', 'idp', 'idr', 'mota']]
+    summary.loc[:, 'idp'] *= 100
+    summary.loc[:, 'idr'] *= 100
+    summary.loc[:, 'idf1'] *= 100
+    summary.loc[:, 'mota'] *= 100
+    try:
+        import motmetrics as mm
+    except Exception as e:
+        raise RuntimeError(
+            'Unable to use motmetrics in MTMCT in PP-Tracking, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics'
+        )
+    print(
+        mm.io.render_summary(
+            summary,
+            formatters=formatters,
+            namemap=mm.io.motchallenge_metric_names))
+
+
+def get_mtmct_matching_results(pred_mtmct_file, secs_interval=0.5,
+                               video_fps=20):
+    res = np.loadtxt(pred_mtmct_file)  # 'cid, tid, fid, x1, y1, w, h, -1, -1'
+    camera_ids = list(map(int, np.unique(res[:, 0])))
+
+    res = res[:, :7]
+    # each line in res: 'cid, tid, fid, x1, y1, w, h'
+
+    camera_tids = []
+    camera_results = dict()
+    for c_id in camera_ids:
+        camera_results[c_id] = res[res[:, 0] == c_id]
+        tids = np.unique(camera_results[c_id][:, 1])
+        tids = list(map(int, tids))
+        camera_tids.append(tids)
+
+    # select common tids throughout each video
+    common_tids = reduce(np.intersect1d, camera_tids)
+    if len(common_tids) == 0:
+        print(
+            'No common tracked ids in these videos, please check your MOT result or select new videos.'
+        )
+        return None, None
+
+    # get mtmct matching results by cid_tid_fid_results[c_id][t_id][f_id]
+    cid_tid_fid_results = dict()
+    cid_tid_to_fids = dict()
+    interval = int(secs_interval * video_fps)  # preferably less than 10
+    for c_id in camera_ids:
+        cid_tid_fid_results[c_id] = dict()
+        cid_tid_to_fids[c_id] = dict()
+        for t_id in common_tids:
+            tid_mask = camera_results[c_id][:, 1] == t_id
+            cid_tid_fid_results[c_id][t_id] = dict()
+
+            camera_trackid_results = camera_results[c_id][tid_mask]
+            fids = np.unique(camera_trackid_results[:, 2])
+            fids = fids[fids % interval == 0]
+            fids = list(map(int, fids))
+            cid_tid_to_fids[c_id][t_id] = fids
+
+            for f_id in fids:
+                st_frame = f_id
+                ed_frame = f_id + interval
+
+                st_mask = camera_trackid_results[:, 2] >= st_frame
+                ed_mask = camera_trackid_results[:, 2] < ed_frame
+                frame_mask = np.logical_and(st_mask, ed_mask)
+                cid_tid_fid_results[c_id][t_id][f_id] = camera_trackid_results[
+                    frame_mask]
+
+    return camera_results, cid_tid_fid_results
+
+
+def save_mtmct_crops(cid_tid_fid_res,
+                     images_dir,
+                     crops_dir,
+                     width=300,
+                     height=200):
+    camera_ids = cid_tid_fid_res.keys()
+    seqs_folder = os.listdir(images_dir)
+    seqs = []
+    for x in seqs_folder:
+        if os.path.isdir(os.path.join(images_dir, x)):
+            seqs.append(x)
+    assert len(seqs) == len(camera_ids)
+    seqs.sort()
+
+    if not os.path.exists(crops_dir):
+        os.makedirs(crops_dir)
+
+    common_tids = list(cid_tid_fid_res[list(camera_ids)[0]].keys())
+
+    # get crops by name 'tid_cid_fid.jpg
+    for t_id in common_tids:
+        for i, c_id in enumerate(camera_ids):
+            infer_dir = os.path.join(images_dir, seqs[i])
+            if os.path.exists(os.path.join(infer_dir, 'img1')):
+                infer_dir = os.path.join(infer_dir, 'img1')
+            all_images = os.listdir(infer_dir)
+            all_images.sort()
+
+            for f_id in cid_tid_fid_res[c_id][t_id].keys():
+                frame_idx = f_id - 1 if f_id > 0 else 0
+                im_path = os.path.join(infer_dir, all_images[frame_idx])
+
+                im = cv2.imread(im_path)  # (H, W, 3)
+
+                # only select one track
+                track = cid_tid_fid_res[c_id][t_id][f_id][0]
+
+                cid, tid, fid, x1, y1, w, h = [int(v) for v in track]
+                clip = im[y1:(y1 + h), x1:(x1 + w)]
+                clip = cv2.resize(clip, (width, height))
+
+                cv2.imwrite(
+                    os.path.join(crops_dir,
+                                 'tid{:06d}_cid{:06d}_fid{:06d}.jpg'.format(
+                                     tid, cid, fid)), clip)
+
+            print("Finish cropping image of tracked_id {} in camera: {}".format(
+                t_id, c_id))
+
+
+def save_mtmct_vis_results(camera_results,
+                           images_dir,
+                           save_dir,
+                           save_videos=False):
+    # camera_results: 'cid, tid, fid, x1, y1, w, h'
+    camera_ids = camera_results.keys()
+    seqs_folder = os.listdir(images_dir)
+    seqs = []
+    for x in seqs_folder:
+        if os.path.isdir(os.path.join(images_dir, x)):
+            seqs.append(x)
+    assert len(seqs) == len(camera_ids)
+    seqs.sort()
+
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+
+    for i, c_id in enumerate(camera_ids):
+        print("Start visualization for camera {} of sequence {}.".format(
+            c_id, seqs[i]))
+        cid_save_dir = os.path.join(save_dir, '{}'.format(seqs[i]))
+        if not os.path.exists(cid_save_dir):
+            os.makedirs(cid_save_dir)
+
+        infer_dir = os.path.join(images_dir, seqs[i])
+        if os.path.exists(os.path.join(infer_dir, 'img1')):
+            infer_dir = os.path.join(infer_dir, 'img1')
+        all_images = os.listdir(infer_dir)
+        all_images.sort()
+
+        for f_id, im_path in enumerate(all_images):
+            img = cv2.imread(os.path.join(infer_dir, im_path))
+            tracks = camera_results[c_id][camera_results[c_id][:, 2] == f_id]
+            if tracks.shape[0] > 0:
+                tracked_ids = tracks[:, 1]
+                xywhs = tracks[:, 3:]
+                online_im = plot_tracking(
+                    img, xywhs, tracked_ids, scores=None, frame_id=f_id)
+            else:
+                online_im = img
+                print('Frame {} of seq {} has no tracking results'.format(
+                    f_id, seqs[i]))
+
+            cv2.imwrite(
+                os.path.join(cid_save_dir, '{:05d}.jpg'.format(f_id)),
+                online_im)
+            if f_id % 40 == 0:
+                print('Processing frame {}'.format(f_id))
+
+        if save_videos:
+            output_video_path = os.path.join(cid_save_dir, '..',
+                                             '{}_mtmct_vis.mp4'.format(seqs[i]))
+            cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(
+                cid_save_dir, output_video_path)
+            os.system(cmd_str)
+            print('Save camera {} video in {}.'.format(seqs[i],
+                                                       output_video_path))
--- a/paddle_detection/deploy/pptracking/python/mot/mtmct/utils.py
+++ b/paddle_detection/deploy/pptracking/python/mot/mtmct/utils.py
@@ -0,0 +1,604 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is based on https://github.com/LCFractal/AIC21-MTMC/tree/main/reid/reid-matching/tools
+"""
+
+import os
+import re
+import cv2
+import gc
+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+import warnings
+warnings.filterwarnings("ignore")
+
+__all__ = [
+    'parse_pt', 'parse_bias', 'get_dire', 'parse_pt_gt',
+    'compare_dataframes_mtmc', 'get_sim_matrix', 'get_labels', 'getData',
+    'gen_new_mot'
+]
+
+
+def parse_pt(mot_feature, zones=None):
+    mot_list = dict()
+    for line in mot_feature:
+        fid = int(re.sub('[a-z,A-Z]', "", mot_feature[line]['frame']))
+        tid = mot_feature[line]['id']
+        bbox = list(map(lambda x: int(float(x)), mot_feature[line]['bbox']))
+        if tid not in mot_list:
+            mot_list[tid] = dict()
+        out_dict = mot_feature[line]
+        if zones is not None:
+            out_dict['zone'] = zones.get_zone(bbox)
+        else:
+            out_dict['zone'] = None
+        mot_list[tid][fid] = out_dict
+    return mot_list
+
+
+def gen_new_mot(mot_list):
+    out_dict = dict()
+    for tracklet in mot_list:
+        tracklet = mot_list[tracklet]
+        for f in tracklet:
+            out_dict[tracklet[f]['imgname']] = tracklet[f]
+    return out_dict
+
+
+def mergesetfeat1_notrk(P, neg_vector, in_feats, in_labels):
+    out_feats = []
+    for i in range(in_feats.shape[0]):
+        camera_id = in_labels[i, 1]
+        feat = in_feats[i] - neg_vector[camera_id]
+        feat = P[camera_id].dot(feat)
+        feat = feat / np.linalg.norm(feat, ord=2)
+        out_feats.append(feat)
+    out_feats = np.vstack(out_feats)
+    return out_feats
+
+
+def compute_P2(prb_feats, gal_feats, gal_labels, la=3.0):
+    X = gal_feats
+    neg_vector = {}
+    u_labels = np.unique(gal_labels[:, 1])
+    P = {}
+    for label in u_labels:
+        curX = gal_feats[gal_labels[:, 1] == label, :]
+        neg_vector[label] = np.mean(curX, axis=0)
+        P[label] = np.linalg.inv(
+            curX.T.dot(curX) + curX.shape[0] * la * np.eye(X.shape[1]))
+    return P, neg_vector
+
+
+def parse_bias(cameras_bias):
+    cid_bias = dict()
+    for cameras in cameras_bias.keys():
+        cameras_id = re.sub('[a-z,A-Z]', "", cameras)
+        cameras_id = int(cameras_id)
+        bias = cameras_bias[cameras]
+        cid_bias[cameras_id] = float(bias)
+    return cid_bias
+
+
+def get_dire(zone_list, cid):
+    zs, ze = zone_list[0], zone_list[-1]
+    return (zs, ze)
+
+
+def intracam_ignore(st_mask, cid_tids):
+    count = len(cid_tids)
+    for i in range(count):
+        for j in range(count):
+            if cid_tids[i][0] == cid_tids[j][0]:
+                st_mask[i, j] = 0.
+    return st_mask
+
+
+def mergesetfeat(in_feats, in_labels, in_tracks):
+    trackset = list(set(list(in_tracks)))
+    out_feats = []
+    out_labels = []
+    for track in trackset:
+        feat = np.mean(in_feats[in_tracks == track], axis=0)
+        feat = feat / np.linalg.norm(feat, ord=2)
+        label = in_labels[in_tracks == track][0]
+        out_feats.append(feat)
+        out_labels.append(label)
+    out_feats = np.vstack(out_feats)
+    out_labels = np.vstack(out_labels)
+    return out_feats, out_labels
+
+
+def mergesetfeat3(X, labels, gX, glabels, beta=0.08, knn=20, lr=0.5):
+    for i in range(0, X.shape[0]):
+        if i % 1000 == 0:
+            print('feat3:%d/%d' % (i, X.shape[0]))
+        knnX = gX[glabels[:, 1] != labels[i, 1], :]
+        sim = knnX.dot(X[i, :])
+        knnX = knnX[sim > 0, :]
+        sim = sim[sim > 0]
+        if len(sim) > 0:
+            idx = np.argsort(-sim)
+            if len(sim) > 2 * knn:
+                sim = sim[idx[:2 * knn]]
+                knnX = knnX[idx[:2 * knn], :]
+            else:
+                sim = sim[idx]
+                knnX = knnX[idx, :]
+                knn = min(knn, len(sim))
+            knn_pos_weight = np.exp((sim[:knn] - 1) / beta)
+            knn_neg_weight = np.ones(len(sim) - knn)
+            knn_pos_prob = knn_pos_weight / np.sum(knn_pos_weight)
+            knn_neg_prob = knn_neg_weight / np.sum(knn_neg_weight)
+            X[i, :] += lr * (knn_pos_prob.dot(knnX[:knn, :]) -
+                             knn_neg_prob.dot(knnX[knn:, :]))
+            X[i, :] /= np.linalg.norm(X[i, :])
+    return X
+
+
+def run_fic(prb_feats, gal_feats, prb_labels, gal_labels, la=3.0):
+    P, neg_vector = compute_P2(prb_feats, gal_feats, gal_labels, la)
+    prb_feats_new = mergesetfeat1_notrk(P, neg_vector, prb_feats, prb_labels)
+    gal_feats_new = mergesetfeat1_notrk(P, neg_vector, gal_feats, gal_labels)
+    return prb_feats_new, gal_feats_new
+
+
+def run_fac(prb_feats,
+            gal_feats,
+            prb_labels,
+            gal_labels,
+            beta=0.08,
+            knn=20,
+            lr=0.5,
+            prb_epoch=2,
+            gal_epoch=3):
+    gal_feats_new = gal_feats.copy()
+    for i in range(prb_epoch):
+        gal_feats_new = mergesetfeat3(gal_feats_new, gal_labels, gal_feats,
+                                      gal_labels, beta, knn, lr)
+    prb_feats_new = prb_feats.copy()
+    for i in range(gal_epoch):
+        prb_feats_new = mergesetfeat3(prb_feats_new, prb_labels, gal_feats_new,
+                                      gal_labels, beta, knn, lr)
+    return prb_feats_new, gal_feats_new
+
+
+def euclidean_distance(qf, gf):
+    m = qf.shape[0]
+    n = gf.shape[0]
+    dist_mat = 2 - 2 * np.matmul(qf, gf.T)
+    return dist_mat
+
+
+def find_topk(a, k, axis=-1, largest=True, sorted=True):
+    if axis is None:
+        axis_size = a.size
+    else:
+        axis_size = a.shape[axis]
+    assert 1 <= k <= axis_size
+
+    a = np.asanyarray(a)
+    if largest:
+        index_array = np.argpartition(a, axis_size - k, axis=axis)
+        topk_indices = np.take(index_array, -np.arange(k) - 1, axis=axis)
+    else:
+        index_array = np.argpartition(a, k - 1, axis=axis)
+        topk_indices = np.take(index_array, np.arange(k), axis=axis)
+    topk_values = np.take_along_axis(a, topk_indices, axis=axis)
+    if sorted:
+        sorted_indices_in_topk = np.argsort(topk_values, axis=axis)
+        if largest:
+            sorted_indices_in_topk = np.flip(sorted_indices_in_topk, axis=axis)
+        sorted_topk_values = np.take_along_axis(
+            topk_values, sorted_indices_in_topk, axis=axis)
+        sorted_topk_indices = np.take_along_axis(
+            topk_indices, sorted_indices_in_topk, axis=axis)
+        return sorted_topk_values, sorted_topk_indices
+    return topk_values, topk_indices
+
+
+def batch_numpy_topk(qf, gf, k1, N=6000):
+    m = qf.shape[0]
+    n = gf.shape[0]
+    initial_rank = []
+    for j in range(n // N + 1):
+        temp_gf = gf[j * N:j * N + N]
+        temp_qd = []
+        for i in range(m // N + 1):
+            temp_qf = qf[i * N:i * N + N]
+            temp_d = euclidean_distance(temp_qf, temp_gf)
+            temp_qd.append(temp_d)
+        temp_qd = np.concatenate(temp_qd, axis=0)
+        temp_qd = temp_qd / (np.max(temp_qd, axis=0)[0])
+        temp_qd = temp_qd.T
+        initial_rank.append(
+            find_topk(
+                temp_qd, k=k1, axis=1, largest=False, sorted=True)[1])
+    del temp_qd
+    del temp_gf
+    del temp_qf
+    del temp_d
+    initial_rank = np.concatenate(initial_rank, axis=0)
+    return initial_rank
+
+
+def batch_euclidean_distance(qf, gf, N=6000):
+    m = qf.shape[0]
+    n = gf.shape[0]
+    dist_mat = []
+    for j in range(n // N + 1):
+        temp_gf = gf[j * N:j * N + N]
+        temp_qd = []
+        for i in range(m // N + 1):
+            temp_qf = qf[i * N:i * N + N]
+            temp_d = euclidean_distance(temp_qf, temp_gf)
+            temp_qd.append(temp_d)
+        temp_qd = np.concatenate(temp_qd, axis=0)
+        temp_qd = temp_qd / (np.max(temp_qd, axis=0)[0])
+        dist_mat.append(temp_qd.T)
+    del temp_qd
+    del temp_gf
+    del temp_qf
+    del temp_d
+    dist_mat = np.concatenate(dist_mat, axis=0)
+    return dist_mat
+
+
+def batch_v(feat, R, all_num):
+    V = np.zeros((all_num, all_num), dtype=np.float32)
+    m = feat.shape[0]
+    for i in tqdm(range(m)):
+        temp_gf = feat[i].reshape(1, -1)
+        temp_qd = euclidean_distance(temp_gf, feat)
+        temp_qd = temp_qd / (np.max(temp_qd))
+        temp_qd = temp_qd.reshape(-1)
+        temp_qd = temp_qd[R[i].tolist()]
+        weight = np.exp(-temp_qd)
+        weight = weight / np.sum(weight)
+        V[i, R[i]] = weight.astype(np.float32)
+    return V
+
+
+def k_reciprocal_neigh(initial_rank, i, k1):
+    forward_k_neigh_index = initial_rank[i, :k1 + 1]
+    backward_k_neigh_index = initial_rank[forward_k_neigh_index, :k1 + 1]
+    fi = np.where(backward_k_neigh_index == i)[0]
+    return forward_k_neigh_index[fi]
+
+
+def ReRank2(probFea, galFea, k1=20, k2=6, lambda_value=0.3):
+    query_num = probFea.shape[0]
+    all_num = query_num + galFea.shape[0]
+    feat = np.concatenate((probFea, galFea), axis=0)
+
+    initial_rank = batch_numpy_topk(feat, feat, k1 + 1, N=6000)
+    del probFea
+    del galFea
+    gc.collect()  # empty memory
+    R = []
+    for i in tqdm(range(all_num)):
+        # k-reciprocal neighbors
+        k_reciprocal_index = k_reciprocal_neigh(initial_rank, i, k1)
+        k_reciprocal_expansion_index = k_reciprocal_index
+        for j in range(len(k_reciprocal_index)):
+            candidate = k_reciprocal_index[j]
+            candidate_k_reciprocal_index = k_reciprocal_neigh(
+                initial_rank, candidate, int(np.around(k1 / 2)))
+            if len(
+                    np.intersect1d(candidate_k_reciprocal_index,
+                                   k_reciprocal_index)) > 2. / 3 * len(
+                                       candidate_k_reciprocal_index):
+                k_reciprocal_expansion_index = np.append(
+                    k_reciprocal_expansion_index, candidate_k_reciprocal_index)
+        k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index)
+        R.append(k_reciprocal_expansion_index)
+
+    gc.collect()  # empty memory
+    V = batch_v(feat, R, all_num)
+    del R
+    gc.collect()  # empty memory
+    initial_rank = initial_rank[:, :k2]
+
+    # Faster version
+    if k2 != 1:
+        V_qe = np.zeros_like(V, dtype=np.float16)
+        for i in range(all_num):
+            V_qe[i, :] = np.mean(V[initial_rank[i], :], axis=0)
+        V = V_qe
+        del V_qe
+    del initial_rank
+    gc.collect()  # empty memory
+    invIndex = []
+    for i in range(all_num):
+        invIndex.append(np.where(V[:, i] != 0)[0])
+    jaccard_dist = np.zeros((query_num, all_num), dtype=np.float32)
+    for i in tqdm(range(query_num)):
+        temp_min = np.zeros(shape=[1, all_num], dtype=np.float32)
+        indNonZero = np.where(V[i, :] != 0)[0]
+        indImages = [invIndex[ind] for ind in indNonZero]
+        for j in range(len(indNonZero)):
+            temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + np.minimum(
+                V[i, indNonZero[j]], V[indImages[j], indNonZero[j]])
+        jaccard_dist[i] = 1 - temp_min / (2. - temp_min)
+    del V
+    gc.collect()  # empty memory
+    original_dist = batch_euclidean_distance(feat, feat[:query_num, :])
+    final_dist = jaccard_dist * (1 - lambda_value
+                                 ) + original_dist * lambda_value
+    del original_dist
+    del jaccard_dist
+    final_dist = final_dist[:query_num, query_num:]
+    return final_dist
+
+
+def visual_rerank(prb_feats,
+                  gal_feats,
+                  cid_tids,
+                  use_ff=False,
+                  use_rerank=False):
+    """Rerank by visual cures."""
+    gal_labels = np.array([[0, item[0]] for item in cid_tids])
+    prb_labels = gal_labels.copy()
+    if use_ff:
+        print('current use ff finetuned parameters....')
+        # Step1-1: fic. finetuned parameters: [la]
+        prb_feats, gal_feats = run_fic(prb_feats, gal_feats, prb_labels,
+                                       gal_labels, 3.0)
+        # Step1=2: fac. finetuned parameters: [beta,knn,lr,prb_epoch,gal_epoch]
+        prb_feats, gal_feats = run_fac(prb_feats, gal_feats, prb_labels,
+                                       gal_labels, 0.08, 20, 0.5, 1, 1)
+    if use_rerank:
+        print('current use rerank finetuned parameters....')
+        # Step2: k-reciprocal. finetuned parameters: [k1,k2,lambda_value]
+        sims = ReRank2(prb_feats, gal_feats, 20, 3, 0.3)
+    else:
+        sims = 1.0 - np.dot(prb_feats, gal_feats.T)
+
+    # NOTE: sims here is actually dist, the smaller the more similar
+    return 1.0 - sims
+
+
+def normalize(nparray, axis=0):
+    try:
+        from sklearn import preprocessing
+    except Exception as e:
+        raise RuntimeError(
+            'Unable to use sklearn in MTMCT in PP-Tracking, please install sklearn, for example: `pip install sklearn`'
+        )
+    nparray = preprocessing.normalize(nparray, norm='l2', axis=axis)
+    return nparray
+
+
+def get_match(cluster_labels):
+    cluster_dict = dict()
+    cluster = list()
+    for i, l in enumerate(cluster_labels):
+        if l in list(cluster_dict.keys()):
+            cluster_dict[l].append(i)
+        else:
+            cluster_dict[l] = [i]
+    for idx in cluster_dict:
+        cluster.append(cluster_dict[idx])
+    return cluster
+
+
+def get_cid_tid(cluster_labels, cid_tids):
+    cluster = list()
+    for labels in cluster_labels:
+        cid_tid_list = list()
+        for label in labels:
+            cid_tid_list.append(cid_tids[label])
+        cluster.append(cid_tid_list)
+    return cluster
+
+
+def combin_feature(cid_tid_dict, sub_cluster):
+    for sub_ct in sub_cluster:
+        if len(sub_ct) < 2: continue
+        mean_feat = np.array([cid_tid_dict[i]['mean_feat'] for i in sub_ct])
+        for i in sub_ct:
+            cid_tid_dict[i]['mean_feat'] = mean_feat.mean(axis=0)
+    return cid_tid_dict
+
+
+def combin_cluster(sub_labels, cid_tids):
+    cluster = list()
+    for sub_c_to_c in sub_labels:
+        if len(cluster) < 1:
+            cluster = sub_labels[sub_c_to_c]
+            continue
+        for c_ts in sub_labels[sub_c_to_c]:
+            is_add = False
+            for i_c, c_set in enumerate(cluster):
+                if len(set(c_ts) & set(c_set)) > 0:
+                    new_list = list(set(c_ts) | set(c_set))
+                    cluster[i_c] = new_list
+                    is_add = True
+                    break
+            if not is_add:
+                cluster.append(c_ts)
+    labels = list()
+    num_tr = 0
+    for c_ts in cluster:
+        label_list = list()
+        for c_t in c_ts:
+            label_list.append(cid_tids.index(c_t))
+            num_tr += 1
+        label_list.sort()
+        labels.append(label_list)
+    return labels, cluster
+
+
+def parse_pt_gt(mot_feature):
+    img_rects = dict()
+    for line in mot_feature:
+        fid = int(re.sub('[a-z,A-Z]', "", mot_feature[line]['frame']))
+        tid = mot_feature[line]['id']
+        rect = list(map(lambda x: int(float(x)), mot_feature[line]['bbox']))
+        if fid not in img_rects:
+            img_rects[fid] = list()
+        rect.insert(0, tid)
+        img_rects[fid].append(rect)
+    return img_rects
+
+
+# eval result
+def compare_dataframes_mtmc(gts, ts):
+    try:
+        import motmetrics as mm
+    except Exception as e:
+        raise RuntimeError(
+            'Unable to use motmetrics in MTMCT in PP-Tracking, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics'
+        )
+    """Compute ID-based evaluation metrics for MTMCT
+    Return:
+        df (pandas.DataFrame): Results of the evaluations in a df with only the 'idf1', 'idp', and 'idr' columns.
+    """
+    gtds = []
+    tsds = []
+    gtcams = gts['CameraId'].drop_duplicates().tolist()
+    tscams = ts['CameraId'].drop_duplicates().tolist()
+    maxFrameId = 0
+
+    for k in sorted(gtcams):
+        gtd = gts.query('CameraId == %d' % k)
+        gtd = gtd[['FrameId', 'Id', 'X', 'Y', 'Width', 'Height']]
+        # max FrameId in gtd only
+        mfid = gtd['FrameId'].max()
+        gtd['FrameId'] += maxFrameId
+        gtd = gtd.set_index(['FrameId', 'Id'])
+        gtds.append(gtd)
+
+        if k in tscams:
+            tsd = ts.query('CameraId == %d' % k)
+            tsd = tsd[['FrameId', 'Id', 'X', 'Y', 'Width', 'Height']]
+            # max FrameId among both gtd and tsd
+            mfid = max(mfid, tsd['FrameId'].max())
+            tsd['FrameId'] += maxFrameId
+            tsd = tsd.set_index(['FrameId', 'Id'])
+            tsds.append(tsd)
+
+        maxFrameId += mfid
+
+    # compute multi-camera tracking evaluation stats
+    multiCamAcc = mm.utils.compare_to_groundtruth(
+        pd.concat(gtds), pd.concat(tsds), 'iou')
+    metrics = list(mm.metrics.motchallenge_metrics)
+    metrics.extend(['num_frames', 'idfp', 'idfn', 'idtp'])
+    mh = mm.metrics.create()
+    summary = mh.compute(multiCamAcc, metrics=metrics, name='MultiCam')
+    return summary
+
+
+def get_sim_matrix(cid_tid_dict,
+                   cid_tids,
+                   use_ff=True,
+                   use_rerank=True,
+                   use_st_filter=False):
+    # Note: camera independent get_sim_matrix function,
+    # which is different from the one in camera_utils.py.
+    count = len(cid_tids)
+
+    q_arr = np.array(
+        [cid_tid_dict[cid_tids[i]]['mean_feat'] for i in range(count)])
+    g_arr = np.array(
+        [cid_tid_dict[cid_tids[i]]['mean_feat'] for i in range(count)])
+    q_arr = normalize(q_arr, axis=1)
+    g_arr = normalize(g_arr, axis=1)
+
+    st_mask = np.ones((count, count), dtype=np.float32)
+    st_mask = intracam_ignore(st_mask, cid_tids)
+
+    visual_sim_matrix = visual_rerank(
+        q_arr, g_arr, cid_tids, use_ff=use_ff, use_rerank=use_rerank)
+    visual_sim_matrix = visual_sim_matrix.astype('float32')
+
+    np.set_printoptions(precision=3)
+    sim_matrix = visual_sim_matrix * st_mask
+
+    np.fill_diagonal(sim_matrix, 0)
+    return sim_matrix
+
+
+def get_labels(cid_tid_dict,
+               cid_tids,
+               use_ff=True,
+               use_rerank=True,
+               use_st_filter=False):
+    try:
+        from sklearn.cluster import AgglomerativeClustering
+    except Exception as e:
+        raise RuntimeError(
+            'Unable to use sklearn in MTMCT in PP-Tracking, please install sklearn, for example: `pip install sklearn`'
+        )
+    # 1st cluster
+    sim_matrix = get_sim_matrix(
+        cid_tid_dict,
+        cid_tids,
+        use_ff=use_ff,
+        use_rerank=use_rerank,
+        use_st_filter=use_st_filter)
+    cluster_labels = AgglomerativeClustering(
+        n_clusters=None,
+        distance_threshold=0.5,
+        affinity='precomputed',
+        linkage='complete').fit_predict(1 - sim_matrix)
+    labels = get_match(cluster_labels)
+    sub_cluster = get_cid_tid(labels, cid_tids)
+
+    # 2nd cluster
+    cid_tid_dict_new = combin_feature(cid_tid_dict, sub_cluster)
+    sim_matrix = get_sim_matrix(
+        cid_tid_dict_new,
+        cid_tids,
+        use_ff=use_ff,
+        use_rerank=use_rerank,
+        use_st_filter=use_st_filter)
+    cluster_labels = AgglomerativeClustering(
+        n_clusters=None,
+        distance_threshold=0.9,
+        affinity='precomputed',
+        linkage='complete').fit_predict(1 - sim_matrix)
+    labels = get_match(cluster_labels)
+    sub_cluster = get_cid_tid(labels, cid_tids)
+
+    return labels
+
+
+def getData(fpath, names=None, sep='\s+|\t+|,'):
+    """ Get the necessary track data from a file handle.
+    Args:
+        fpath (str) : Original path of file reading from.
+        names (list[str]): List of column names for the data.
+        sep (str): Allowed separators regular expression string.
+    Return:
+        df (pandas.DataFrame): Data frame containing the data loaded from the
+            stream with optionally assigned column names. No index is set on the data.
+    """
+    try:
+        df = pd.read_csv(
+            fpath,
+            sep=sep,
+            index_col=None,
+            skipinitialspace=True,
+            header=None,
+            names=names,
+            engine='python')
+        return df
+
+    except Exception as e:
+        raise ValueError("Could not read input from %s. Error: %s" %
+                         (fpath, repr(e)))
--- a/paddle_detection/deploy/pptracking/python/mot/mtmct/zone.py
+++ b/paddle_detection/deploy/pptracking/python/mot/mtmct/zone.py
@@ -0,0 +1,412 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is based on https://github.com/LCFractal/AIC21-MTMC/tree/main/reid/reid-matching/tools
+
+Note: The following codes are strongly related to zone of the AIC21 test-set S06,
+    so they can only be used in S06, and can not be used for other MTMCT datasets.
+"""
+
+import os
+import cv2
+import numpy as np
+try:
+    from sklearn.cluster import AgglomerativeClustering
+except:
+    print(
+        'Warning: Unable to use MTMCT in PP-Tracking, please install sklearn, for example: `pip install sklearn`'
+    )
+    pass
+
+BBOX_B = 10 / 15
+
+
+class Zone(object):
+    def __init__(self, zone_path='datasets/zone'):
+        # 0: b 1: g 3: r 123:w
+        # w r not high speed
+        # b g high speed
+        assert zone_path != '', "Error: zone_path is not empty!"
+        zones = {}
+        for img_name in os.listdir(zone_path):
+            camnum = int(img_name.split('.')[0][-3:])
+            zone_img = cv2.imread(os.path.join(zone_path, img_name))
+            zones[camnum] = zone_img
+        self.zones = zones
+        self.current_cam = 0
+
+    def set_cam(self, cam):
+        self.current_cam = cam
+
+    def get_zone(self, bbox):
+        cx = int((bbox[0] + bbox[2]) / 2)
+        cy = int((bbox[1] + bbox[3]) / 2)
+        pix = self.zones[self.current_cam][max(cy - 1, 0), max(cx - 1, 0), :]
+        zone_num = 0
+        if pix[0] > 50 and pix[1] > 50 and pix[2] > 50:  # w
+            zone_num = 1
+        if pix[0] < 50 and pix[1] < 50 and pix[2] > 50:  # r
+            zone_num = 2
+        if pix[0] < 50 and pix[1] > 50 and pix[2] < 50:  # g
+            zone_num = 3
+        if pix[0] > 50 and pix[1] < 50 and pix[2] < 50:  # b
+            zone_num = 4
+        return zone_num
+
+    def is_ignore(self, zone_list, frame_list, cid):
+        # 0 not in any corssroad, 1 white 2 red 3 green 4 bule
+        zs, ze = zone_list[0], zone_list[-1]
+        fs, fe = frame_list[0], frame_list[-1]
+        if zs == ze:
+            # if always on one section, excluding
+            if ze in [1, 2]:
+                return 2
+            if zs != 0 and 0 in zone_list:
+                return 0
+            if fe - fs > 1500:
+                return 2
+            if fs < 2:
+                if cid in [45]:
+                    if ze in [3, 4]:
+                        return 1
+                    else:
+                        return 2
+            if fe > 1999:
+                if cid in [41]:
+                    if ze not in [3]:
+                        return 2
+                    else:
+                        return 0
+            if fs < 2 or fe > 1999:
+                if ze in [3, 4]:
+                    return 0
+            if ze in [3, 4]:
+                return 1
+            return 2
+        else:
+            # if camera section change
+            if cid in [41, 42, 43, 44, 45, 46]:
+                # come from road extension, exclusing
+                if zs == 1 and ze == 2:
+                    return 2
+                if zs == 2 and ze == 1:
+                    return 2
+            if cid in [41]:
+                # On 41 camera, no vehicle come into 42 camera
+                if (zs in [1, 2]) and ze == 4:
+                    return 2
+                if zs == 4 and (ze in [1, 2]):
+                    return 2
+            if cid in [46]:
+                # On 46 camera，no vehicle come into 45
+                if (zs in [1, 2]) and ze == 3:
+                    return 2
+                if zs == 3 and (ze in [1, 2]):
+                    return 2
+            return 0
+
+    def filter_mot(self, mot_list, cid):
+        new_mot_list = dict()
+        sub_mot_list = dict()
+        for tracklet in mot_list:
+            tracklet_dict = mot_list[tracklet]
+            frame_list = list(tracklet_dict.keys())
+            frame_list.sort()
+            zone_list = []
+            for f in frame_list:
+                zone_list.append(tracklet_dict[f]['zone'])
+            if self.is_ignore(zone_list, frame_list, cid) == 0:
+                new_mot_list[tracklet] = tracklet_dict
+            if self.is_ignore(zone_list, frame_list, cid) == 1:
+                sub_mot_list[tracklet] = tracklet_dict
+        return new_mot_list
+
+    def filter_bbox(self, mot_list, cid):
+        new_mot_list = dict()
+        yh = self.zones[cid].shape[0]
+        for tracklet in mot_list:
+            tracklet_dict = mot_list[tracklet]
+            frame_list = list(tracklet_dict.keys())
+            frame_list.sort()
+            bbox_list = []
+            for f in frame_list:
+                bbox_list.append(tracklet_dict[f]['bbox'])
+            bbox_x = [b[0] for b in bbox_list]
+            bbox_y = [b[1] for b in bbox_list]
+            bbox_w = [b[2] - b[0] for b in bbox_list]
+            bbox_h = [b[3] - b[1] for b in bbox_list]
+            new_frame_list = list()
+            if 0 in bbox_x or 0 in bbox_y:
+                b0 = [
+                    i for i, f in enumerate(frame_list)
+                    if bbox_x[i] < 5 or bbox_y[i] + bbox_h[i] > yh - 5
+                ]
+                if len(b0) == len(frame_list):
+                    if cid in [41, 42, 44, 45, 46]:
+                        continue
+                    max_w = max(bbox_w)
+                    max_h = max(bbox_h)
+                    for i, f in enumerate(frame_list):
+                        if bbox_w[i] > max_w * BBOX_B and bbox_h[
+                                i] > max_h * BBOX_B:
+                            new_frame_list.append(f)
+                else:
+                    l_i, r_i = 0, len(frame_list) - 1
+                    if len(b0) == 0:
+                        continue
+                    if b0[0] == 0:
+                        for i in range(len(b0) - 1):
+                            if b0[i] + 1 == b0[i + 1]:
+                                l_i = b0[i + 1]
+                            else:
+                                break
+                    if b0[-1] == len(frame_list) - 1:
+                        for i in range(len(b0) - 1):
+                            i = len(b0) - 1 - i
+                            if b0[i] - 1 == b0[i - 1]:
+                                r_i = b0[i - 1]
+                            else:
+                                break
+
+                    max_lw, max_lh = bbox_w[l_i], bbox_h[l_i]
+                    max_rw, max_rh = bbox_w[r_i], bbox_h[r_i]
+                    for i, f in enumerate(frame_list):
+                        if i < l_i:
+                            if bbox_w[i] > max_lw * BBOX_B and bbox_h[
+                                    i] > max_lh * BBOX_B:
+                                new_frame_list.append(f)
+                        elif i > r_i:
+                            if bbox_w[i] > max_rw * BBOX_B and bbox_h[
+                                    i] > max_rh * BBOX_B:
+                                new_frame_list.append(f)
+                        else:
+                            new_frame_list.append(f)
+                new_tracklet_dict = dict()
+                for f in new_frame_list:
+                    new_tracklet_dict[f] = tracklet_dict[f]
+                new_mot_list[tracklet] = new_tracklet_dict
+            else:
+                new_mot_list[tracklet] = tracklet_dict
+        return new_mot_list
+
+    def break_mot(self, mot_list, cid):
+        new_mot_list = dict()
+        new_num_tracklets = max(mot_list) + 1
+        for tracklet in mot_list:
+            tracklet_dict = mot_list[tracklet]
+            frame_list = list(tracklet_dict.keys())
+            frame_list.sort()
+            zone_list = []
+            back_tracklet = False
+            new_zone_f = 0
+            pre_frame = frame_list[0]
+            time_break = False
+            for f in frame_list:
+                if f - pre_frame > 100:
+                    if cid in [44, 45]:
+                        time_break = True
+                        break
+                if not cid in [41, 44, 45, 46]:
+                    break
+                pre_frame = f
+                new_zone = tracklet_dict[f]['zone']
+                if len(zone_list) > 0 and zone_list[-1] == new_zone:
+                    continue
+                if new_zone_f > 1:
+                    if len(zone_list) > 1 and new_zone in zone_list:
+                        back_tracklet = True
+                    zone_list.append(new_zone)
+                    new_zone_f = 0
+                else:
+                    new_zone_f += 1
+            if back_tracklet:
+                new_tracklet_dict = dict()
+                pre_bbox = -1
+                pre_arrow = 0
+                have_break = False
+                for f in frame_list:
+                    now_bbox = tracklet_dict[f]['bbox']
+                    if type(pre_bbox) == int:
+                        if pre_bbox == -1:
+                            pre_bbox = now_bbox
+                    now_arrow = now_bbox[0] - pre_bbox[0]
+                    if pre_arrow * now_arrow < 0 and len(
+                            new_tracklet_dict) > 15 and not have_break:
+                        new_mot_list[tracklet] = new_tracklet_dict
+                        new_tracklet_dict = dict()
+                        have_break = True
+                    if have_break:
+                        tracklet_dict[f]['id'] = new_num_tracklets
+                    new_tracklet_dict[f] = tracklet_dict[f]
+                    pre_bbox, pre_arrow = now_bbox, now_arrow
+
+                if have_break:
+                    new_mot_list[new_num_tracklets] = new_tracklet_dict
+                    new_num_tracklets += 1
+                else:
+                    new_mot_list[tracklet] = new_tracklet_dict
+            elif time_break:
+                new_tracklet_dict = dict()
+                have_break = False
+                pre_frame = frame_list[0]
+                for f in frame_list:
+                    if f - pre_frame > 100:
+                        new_mot_list[tracklet] = new_tracklet_dict
+                        new_tracklet_dict = dict()
+                        have_break = True
+                    new_tracklet_dict[f] = tracklet_dict[f]
+                    pre_frame = f
+                if have_break:
+                    new_mot_list[new_num_tracklets] = new_tracklet_dict
+                    new_num_tracklets += 1
+                else:
+                    new_mot_list[tracklet] = new_tracklet_dict
+            else:
+                new_mot_list[tracklet] = tracklet_dict
+        return new_mot_list
+
+    def intra_matching(self, mot_list, sub_mot_list):
+        sub_zone_dict = dict()
+        new_mot_list = dict()
+        new_mot_list, new_sub_mot_list = self.do_intra_matching2(mot_list,
+                                                                 sub_mot_list)
+        return new_mot_list
+
+    def do_intra_matching2(self, mot_list, sub_list):
+        new_zone_dict = dict()
+
+        def get_trac_info(tracklet1):
+            t1_f = list(tracklet1)
+            t1_f.sort()
+            t1_fs = t1_f[0]
+            t1_fe = t1_f[-1]
+            t1_zs = tracklet1[t1_fs]['zone']
+            t1_ze = tracklet1[t1_fe]['zone']
+            t1_boxs = tracklet1[t1_fs]['bbox']
+            t1_boxe = tracklet1[t1_fe]['bbox']
+            t1_boxs = [(t1_boxs[2] + t1_boxs[0]) / 2,
+                       (t1_boxs[3] + t1_boxs[1]) / 2]
+            t1_boxe = [(t1_boxe[2] + t1_boxe[0]) / 2,
+                       (t1_boxe[3] + t1_boxe[1]) / 2]
+            return t1_fs, t1_fe, t1_zs, t1_ze, t1_boxs, t1_boxe
+
+        for t1id in sub_list:
+            tracklet1 = sub_list[t1id]
+            if tracklet1 == -1:
+                continue
+            t1_fs, t1_fe, t1_zs, t1_ze, t1_boxs, t1_boxe = get_trac_info(
+                tracklet1)
+            sim_dict = dict()
+            for t2id in mot_list:
+                tracklet2 = mot_list[t2id]
+                t2_fs, t2_fe, t2_zs, t2_ze, t2_boxs, t2_boxe = get_trac_info(
+                    tracklet2)
+                if t1_ze == t2_zs:
+                    if abs(t2_fs - t1_fe) < 5 and abs(t2_boxe[0] - t1_boxs[
+                            0]) < 50 and abs(t2_boxe[1] - t1_boxs[1]) < 50:
+                        t1_feat = tracklet1[t1_fe]['feat']
+                        t2_feat = tracklet2[t2_fs]['feat']
+                        sim_dict[t2id] = np.matmul(t1_feat, t2_feat)
+                if t1_zs == t2_ze:
+                    if abs(t2_fe - t1_fs) < 5 and abs(t2_boxs[0] - t1_boxe[
+                            0]) < 50 and abs(t2_boxs[1] - t1_boxe[1]) < 50:
+                        t1_feat = tracklet1[t1_fs]['feat']
+                        t2_feat = tracklet2[t2_fe]['feat']
+                        sim_dict[t2id] = np.matmul(t1_feat, t2_feat)
+            if len(sim_dict) > 0:
+                max_sim = 0
+                max_id = 0
+                for t2id in sim_dict:
+                    if sim_dict[t2id] > max_sim:
+                        sim_dict[t2id] = max_sim
+                        max_id = t2id
+                if max_sim > 0.5:
+                    t2 = mot_list[max_id]
+                    for t1f in tracklet1:
+                        if t1f not in t2:
+                            tracklet1[t1f]['id'] = max_id
+                            t2[t1f] = tracklet1[t1f]
+                    mot_list[max_id] = t2
+                    sub_list[t1id] = -1
+        return mot_list, sub_list
+
+    def do_intra_matching(self, sub_zone_dict, sub_zone):
+        new_zone_dict = dict()
+        id_list = list(sub_zone_dict)
+        id2index = dict()
+        for index, id in enumerate(id_list):
+            id2index[id] = index
+
+        def get_trac_info(tracklet1):
+            t1_f = list(tracklet1)
+            t1_f.sort()
+            t1_fs = t1_f[0]
+            t1_fe = t1_f[-1]
+            t1_zs = tracklet1[t1_fs]['zone']
+            t1_ze = tracklet1[t1_fe]['zone']
+            t1_boxs = tracklet1[t1_fs]['bbox']
+            t1_boxe = tracklet1[t1_fe]['bbox']
+            t1_boxs = [(t1_boxs[2] + t1_boxs[0]) / 2,
+                       (t1_boxs[3] + t1_boxs[1]) / 2]
+            t1_boxe = [(t1_boxe[2] + t1_boxe[0]) / 2,
+                       (t1_boxe[3] + t1_boxe[1]) / 2]
+            return t1_fs, t1_fe, t1_zs, t1_ze, t1_boxs, t1_boxe
+
+        sim_matrix = np.zeros([len(id_list), len(id_list)])
+
+        for t1id in sub_zone_dict:
+            tracklet1 = sub_zone_dict[t1id]
+            t1_fs, t1_fe, t1_zs, t1_ze, t1_boxs, t1_boxe = get_trac_info(
+                tracklet1)
+            t1_feat = tracklet1[t1_fe]['feat']
+            for t2id in sub_zone_dict:
+                if t1id == t2id:
+                    continue
+                tracklet2 = sub_zone_dict[t2id]
+                t2_fs, t2_fe, t2_zs, t2_ze, t2_boxs, t2_boxe = get_trac_info(
+                    tracklet2)
+                if t1_zs != t1_ze and t2_ze != t2_zs or t1_fe > t2_fs:
+                    continue
+                if abs(t1_boxe[0] - t2_boxs[0]) > 50 or abs(t1_boxe[1] -
+                                                            t2_boxs[1]) > 50:
+                    continue
+                if t2_fs - t1_fe > 5:
+                    continue
+                t2_feat = tracklet2[t2_fs]['feat']
+                sim_matrix[id2index[t1id], id2index[t2id]] = np.matmul(t1_feat,
+                                                                       t2_feat)
+                sim_matrix[id2index[t2id], id2index[t1id]] = np.matmul(t1_feat,
+                                                                       t2_feat)
+        sim_matrix = 1 - sim_matrix
+        cluster_labels = AgglomerativeClustering(
+            n_clusters=None,
+            distance_threshold=0.7,
+            affinity='precomputed',
+            linkage='complete').fit_predict(sim_matrix)
+        new_zone_dict = dict()
+        label2id = dict()
+        for index, label in enumerate(cluster_labels):
+            tracklet = sub_zone_dict[id_list[index]]
+            if label not in label2id:
+                new_id = tracklet[list(tracklet)[0]]
+                new_tracklet = dict()
+            else:
+                new_id = label2id[label]
+                new_tracklet = new_zone_dict[label2id[label]]
+            for tf in tracklet:
+                tracklet[tf]['id'] = new_id
+                new_tracklet[tf] = tracklet[tf]
+            new_zone_dict[label] = new_tracklet
+
+        return new_zone_dict