更换文档检测模型

2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions
--- a/paddle_detection/ppdet/metrics/init.py
+++ b/paddle_detection/ppdet/metrics/init.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import metrics
+from . import keypoint_metrics
+
+from .metrics import *
+from .keypoint_metrics import *
+from .pose3d_metrics import *
+
+__all__ = metrics.__all__ + keypoint_metrics.__all__
+
+from . import mot_metrics
+from .mot_metrics import *
+__all__ = metrics.__all__ + mot_metrics.__all__
+
+from . import mcmot_metrics
+from .mcmot_metrics import *
+__all__ = metrics.__all__ + mcmot_metrics.__all__
+
+from . import culane_metrics
+from .culane_metrics import *
+__all__ = metrics.__all__ + culane_metrics.__all__
--- a/paddle_detection/ppdet/metrics/coco_utils.py
+++ b/paddle_detection/ppdet/metrics/coco_utils.py
@@ -0,0 +1,188 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import numpy as np
+import itertools
+
+from ppdet.metrics.json_results import get_det_res, get_det_poly_res, get_seg_res, get_solov2_segm_res, get_keypoint_res, get_pose3d_res
+from ppdet.metrics.map_utils import draw_pr_curve
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+
+def get_infer_results(outs, catid, bias=0):
+    """
+    Get result at the stage of inference.
+    The output format is dictionary containing bbox or mask result.
+
+    For example, bbox result is a list and each element contains
+    image_id, category_id, bbox and score.
+    """
+    if outs is None or len(outs) == 0:
+        raise ValueError(
+            'The number of valid detection result if zero. Please use reasonable model and check input data.'
+        )
+
+    im_id = outs['im_id']
+
+    infer_res = {}
+    if 'bbox' in outs:
+        if len(outs['bbox']) > 0 and len(outs['bbox'][0]) > 6:
+            infer_res['bbox'] = get_det_poly_res(
+                outs['bbox'], outs['bbox_num'], im_id, catid, bias=bias)
+        else:
+            infer_res['bbox'] = get_det_res(
+                outs['bbox'], outs['bbox_num'], im_id, catid, bias=bias)
+
+    if 'mask' in outs:
+        # mask post process
+        infer_res['mask'] = get_seg_res(outs['mask'], outs['bbox'],
+                                        outs['bbox_num'], im_id, catid)
+
+    if 'segm' in outs:
+        infer_res['segm'] = get_solov2_segm_res(outs, im_id, catid)
+
+    if 'keypoint' in outs:
+        infer_res['keypoint'] = get_keypoint_res(outs, im_id)
+        outs['bbox_num'] = [len(infer_res['keypoint'])]
+
+    if 'pose3d' in outs:
+        infer_res['pose3d'] = get_pose3d_res(outs, im_id)
+        outs['bbox_num'] = [len(infer_res['pose3d'])]
+
+    return infer_res
+
+
+def cocoapi_eval(jsonfile,
+                 style,
+                 coco_gt=None,
+                 anno_file=None,
+                 max_dets=(100, 300, 1000),
+                 classwise=False,
+                 sigmas=None,
+                 use_area=True):
+    """
+    Args:
+        jsonfile (str): Evaluation json file, eg: bbox.json, mask.json.
+        style (str): COCOeval style, can be `bbox` , `segm` , `proposal`, `keypoints` and `keypoints_crowd`.
+        coco_gt (str): Whether to load COCOAPI through anno_file,
+                 eg: coco_gt = COCO(anno_file)
+        anno_file (str): COCO annotations file.
+        max_dets (tuple): COCO evaluation maxDets.
+        classwise (bool): Whether per-category AP and draw P-R Curve or not.
+        sigmas (nparray): keypoint labelling sigmas.
+        use_area (bool): If gt annotations (eg. CrowdPose, AIC)
+                         do not have 'area', please set use_area=False.
+    """
+    assert coco_gt != None or anno_file != None
+    if style == 'keypoints_crowd':
+        #please install xtcocotools==1.6
+        from xtcocotools.coco import COCO
+        from xtcocotools.cocoeval import COCOeval
+    else:
+        from pycocotools.coco import COCO
+        from pycocotools.cocoeval import COCOeval
+
+    if coco_gt == None:
+        coco_gt = COCO(anno_file)
+    logger.info("Start evaluate...")
+    coco_dt = coco_gt.loadRes(jsonfile)
+    if style == 'proposal':
+        coco_eval = COCOeval(coco_gt, coco_dt, 'bbox')
+        coco_eval.params.useCats = 0
+        coco_eval.params.maxDets = list(max_dets)
+    elif style == 'keypoints_crowd':
+        coco_eval = COCOeval(coco_gt, coco_dt, style, sigmas, use_area)
+    else:
+        coco_eval = COCOeval(coco_gt, coco_dt, style)
+    coco_eval.evaluate()
+    coco_eval.accumulate()
+    coco_eval.summarize()
+    if classwise:
+        # Compute per-category AP and PR curve
+        try:
+            from terminaltables import AsciiTable
+        except Exception as e:
+            logger.error(
+                'terminaltables not found, plaese install terminaltables. '
+                'for example: `pip install terminaltables`.')
+            raise e
+        precisions = coco_eval.eval['precision']
+        cat_ids = coco_gt.getCatIds()
+        # precision: (iou, recall, cls, area range, max dets)
+        assert len(cat_ids) == precisions.shape[2]
+        results_per_category = []
+        for idx, catId in enumerate(cat_ids):
+            # area range index 0: all area ranges
+            # max dets index -1: typically 100 per image
+            nm = coco_gt.loadCats(catId)[0]
+            precision = precisions[:, :, idx, 0, -1]
+            precision = precision[precision > -1]
+            if precision.size:
+                ap = np.mean(precision)
+            else:
+                ap = float('nan')
+            results_per_category.append(
+                (str(nm["name"]), '{:0.3f}'.format(float(ap))))
+            pr_array = precisions[0, :, idx, 0, 2]
+            recall_array = np.arange(0.0, 1.01, 0.01)
+            draw_pr_curve(
+                pr_array,
+                recall_array,
+                out_dir=style + '_pr_curve',
+                file_name='{}_precision_recall_curve.jpg'.format(nm["name"]))
+
+        num_columns = min(6, len(results_per_category) * 2)
+        results_flatten = list(itertools.chain(*results_per_category))
+        headers = ['category', 'AP'] * (num_columns // 2)
+        results_2d = itertools.zip_longest(
+            * [results_flatten[i::num_columns] for i in range(num_columns)])
+        table_data = [headers]
+        table_data += [result for result in results_2d]
+        table = AsciiTable(table_data)
+        logger.info('Per-category of {} AP: \n{}'.format(style, table.table))
+        logger.info("per-category PR curve has output to {} folder.".format(
+            style + '_pr_curve'))
+    # flush coco evaluation result
+    sys.stdout.flush()
+    return coco_eval.stats
+
+
+def json_eval_results(metric, json_directory, dataset):
+    """
+    cocoapi eval with already exists proposal.json, bbox.json or mask.json
+    """
+    assert metric == 'COCO'
+    anno_file = dataset.get_anno()
+    json_file_list = ['proposal.json', 'bbox.json', 'mask.json']
+    if json_directory:
+        assert os.path.exists(
+            json_directory), "The json directory:{} does not exist".format(
+                json_directory)
+        for k, v in enumerate(json_file_list):
+            json_file_list[k] = os.path.join(str(json_directory), v)
+
+    coco_eval_style = ['proposal', 'bbox', 'segm']
+    for i, v_json in enumerate(json_file_list):
+        if os.path.exists(v_json):
+            cocoapi_eval(v_json, coco_eval_style[i], anno_file=anno_file)
+        else:
+            logger.info("{} not exists!".format(v_json))
--- a/paddle_detection/ppdet/metrics/culane_metrics.py
+++ b/paddle_detection/ppdet/metrics/culane_metrics.py
@@ -0,0 +1,327 @@
+import os
+import cv2
+import numpy as np
+import os.path as osp
+from functools import partial
+from .metrics import Metric
+from scipy.interpolate import splprep, splev
+from scipy.optimize import linear_sum_assignment
+from shapely.geometry import LineString, Polygon
+from ppdet.utils.logger import setup_logger
+
+logger = setup_logger(__name__)
+
+__all__ = [
+    'draw_lane', 'discrete_cross_iou', 'continuous_cross_iou', 'interp',
+    'culane_metric', 'load_culane_img_data', 'load_culane_data',
+    'eval_predictions', "CULaneMetric"
+]
+
+LIST_FILE = {
+    'train': 'list/train_gt.txt',
+    'val': 'list/val.txt',
+    'test': 'list/test.txt',
+}
+
+CATEGORYS = {
+    'normal': 'list/test_split/test0_normal.txt',
+    'crowd': 'list/test_split/test1_crowd.txt',
+    'hlight': 'list/test_split/test2_hlight.txt',
+    'shadow': 'list/test_split/test3_shadow.txt',
+    'noline': 'list/test_split/test4_noline.txt',
+    'arrow': 'list/test_split/test5_arrow.txt',
+    'curve': 'list/test_split/test6_curve.txt',
+    'cross': 'list/test_split/test7_cross.txt',
+    'night': 'list/test_split/test8_night.txt',
+}
+
+
+def draw_lane(lane, img=None, img_shape=None, width=30):
+    if img is None:
+        img = np.zeros(img_shape, dtype=np.uint8)
+    lane = lane.astype(np.int32)
+    for p1, p2 in zip(lane[:-1], lane[1:]):
+        cv2.line(
+            img, tuple(p1), tuple(p2), color=(255, 255, 255), thickness=width)
+    return img
+
+
+def discrete_cross_iou(xs, ys, width=30, img_shape=(590, 1640, 3)):
+    xs = [draw_lane(lane, img_shape=img_shape, width=width) > 0 for lane in xs]
+    ys = [draw_lane(lane, img_shape=img_shape, width=width) > 0 for lane in ys]
+
+    ious = np.zeros((len(xs), len(ys)))
+    for i, x in enumerate(xs):
+        for j, y in enumerate(ys):
+            ious[i, j] = (x & y).sum() / (x | y).sum()
+    return ious
+
+
+def continuous_cross_iou(xs, ys, width=30, img_shape=(590, 1640, 3)):
+    h, w, _ = img_shape
+    image = Polygon([(0, 0), (0, h - 1), (w - 1, h - 1), (w - 1, 0)])
+    xs = [
+        LineString(lane).buffer(
+            distance=width / 2., cap_style=1, join_style=2).intersection(image)
+        for lane in xs
+    ]
+    ys = [
+        LineString(lane).buffer(
+            distance=width / 2., cap_style=1, join_style=2).intersection(image)
+        for lane in ys
+    ]
+
+    ious = np.zeros((len(xs), len(ys)))
+    for i, x in enumerate(xs):
+        for j, y in enumerate(ys):
+            ious[i, j] = x.intersection(y).area / x.union(y).area
+
+    return ious
+
+
+def interp(points, n=50):
+    x = [x for x, _ in points]
+    y = [y for _, y in points]
+    tck, u = splprep([x, y], s=0, t=n, k=min(3, len(points) - 1))
+
+    u = np.linspace(0., 1., num=(len(u) - 1) * n + 1)
+    return np.array(splev(u, tck)).T
+
+
+def culane_metric(pred,
+                  anno,
+                  width=30,
+                  iou_thresholds=[0.5],
+                  official=True,
+                  img_shape=(590, 1640, 3)):
+    _metric = {}
+    for thr in iou_thresholds:
+        tp = 0
+        fp = 0 if len(anno) != 0 else len(pred)
+        fn = 0 if len(pred) != 0 else len(anno)
+        _metric[thr] = [tp, fp, fn]
+
+    interp_pred = np.array(
+        [interp(
+            pred_lane, n=5) for pred_lane in pred], dtype=object)  # (4, 50, 2)
+    interp_anno = np.array(
+        [interp(
+            anno_lane, n=5) for anno_lane in anno], dtype=object)  # (4, 50, 2)
+
+    if official:
+        ious = discrete_cross_iou(
+            interp_pred, interp_anno, width=width, img_shape=img_shape)
+    else:
+        ious = continuous_cross_iou(
+            interp_pred, interp_anno, width=width, img_shape=img_shape)
+
+    row_ind, col_ind = linear_sum_assignment(1 - ious)
+
+    _metric = {}
+    for thr in iou_thresholds:
+        tp = int((ious[row_ind, col_ind] > thr).sum())
+        fp = len(pred) - tp
+        fn = len(anno) - tp
+        _metric[thr] = [tp, fp, fn]
+    return _metric
+
+
+def load_culane_img_data(path):
+    with open(path, 'r') as data_file:
+        img_data = data_file.readlines()
+    img_data = [line.split() for line in img_data]
+    img_data = [list(map(float, lane)) for lane in img_data]
+    img_data = [[(lane[i], lane[i + 1]) for i in range(0, len(lane), 2)]
+                for lane in img_data]
+    img_data = [lane for lane in img_data if len(lane) >= 2]
+
+    return img_data
+
+
+def load_culane_data(data_dir, file_list_path):
+    with open(file_list_path, 'r') as file_list:
+        filepaths = [
+            os.path.join(data_dir,
+                         line[1 if line[0] == '/' else 0:].rstrip().replace(
+                             '.jpg', '.lines.txt'))
+            for line in file_list.readlines()
+        ]
+
+    data = []
+    for path in filepaths:
+        img_data = load_culane_img_data(path)
+        data.append(img_data)
+
+    return data
+
+
+def eval_predictions(pred_dir,
+                     anno_dir,
+                     list_path,
+                     iou_thresholds=[0.5],
+                     width=30,
+                     official=True,
+                     sequential=False):
+    logger.info('Calculating metric for List: {}'.format(list_path))
+    predictions = load_culane_data(pred_dir, list_path)
+    annotations = load_culane_data(anno_dir, list_path)
+    img_shape = (590, 1640, 3)
+    if sequential:
+        results = map(partial(
+            culane_metric,
+            width=width,
+            official=official,
+            iou_thresholds=iou_thresholds,
+            img_shape=img_shape),
+                      predictions,
+                      annotations)
+    else:
+        from multiprocessing import Pool, cpu_count
+        from itertools import repeat
+        with Pool(cpu_count()) as p:
+            results = p.starmap(culane_metric,
+                                zip(predictions, annotations,
+                                    repeat(width),
+                                    repeat(iou_thresholds),
+                                    repeat(official), repeat(img_shape)))
+
+    mean_f1, mean_prec, mean_recall, total_tp, total_fp, total_fn = 0, 0, 0, 0, 0, 0
+    ret = {}
+    for thr in iou_thresholds:
+        tp = sum(m[thr][0] for m in results)
+        fp = sum(m[thr][1] for m in results)
+        fn = sum(m[thr][2] for m in results)
+        precision = float(tp) / (tp + fp) if tp != 0 else 0
+        recall = float(tp) / (tp + fn) if tp != 0 else 0
+        f1 = 2 * precision * recall / (precision + recall) if tp != 0 else 0
+        logger.info('iou thr: {:.2f}, tp: {}, fp: {}, fn: {},'
+                    'precision: {}, recall: {}, f1: {}'.format(
+                        thr, tp, fp, fn, precision, recall, f1))
+        mean_f1 += f1 / len(iou_thresholds)
+        mean_prec += precision / len(iou_thresholds)
+        mean_recall += recall / len(iou_thresholds)
+        total_tp += tp
+        total_fp += fp
+        total_fn += fn
+        ret[thr] = {
+            'TP': tp,
+            'FP': fp,
+            'FN': fn,
+            'Precision': precision,
+            'Recall': recall,
+            'F1': f1
+        }
+    if len(iou_thresholds) > 2:
+        logger.info(
+            'mean result, total_tp: {}, total_fp: {}, total_fn: {},'
+            'precision: {}, recall: {}, f1: {}'.format(
+                total_tp, total_fp, total_fn, mean_prec, mean_recall, mean_f1))
+        ret['mean'] = {
+            'TP': total_tp,
+            'FP': total_fp,
+            'FN': total_fn,
+            'Precision': mean_prec,
+            'Recall': mean_recall,
+            'F1': mean_f1
+        }
+    return ret
+
+
+class CULaneMetric(Metric):
+    def __init__(self,
+                 cfg,
+                 output_eval=None,
+                 split="test",
+                 dataset_dir="dataset/CULane/"):
+        super(CULaneMetric, self).__init__()
+        self.output_eval = "evaluation" if output_eval is None else output_eval
+        self.dataset_dir = dataset_dir
+        self.split = split
+        self.list_path = osp.join(dataset_dir, LIST_FILE[split])
+        self.predictions = []
+        self.img_names = []
+        self.lanes = []
+        self.eval_results = {}
+        self.cfg = cfg
+        self.reset()
+
+    def reset(self):
+        self.predictions = []
+        self.img_names = []
+        self.lanes = []
+        self.eval_results = {}
+
+    def get_prediction_string(self, pred):
+        ys = np.arange(270, 590, 8) / self.cfg.ori_img_h
+        out = []
+        for lane in pred:
+            xs = lane(ys)
+            valid_mask = (xs >= 0) & (xs < 1)
+            xs = xs * self.cfg.ori_img_w
+            lane_xs = xs[valid_mask]
+            lane_ys = ys[valid_mask] * self.cfg.ori_img_h
+            lane_xs, lane_ys = lane_xs[::-1], lane_ys[::-1]
+            lane_str = ' '.join([
+                '{:.5f} {:.5f}'.format(x, y) for x, y in zip(lane_xs, lane_ys)
+            ])
+            if lane_str != '':
+                out.append(lane_str)
+
+        return '\n'.join(out)
+
+    def accumulate(self):
+        loss_lines = [[], [], [], []]
+        for idx, pred in enumerate(self.predictions):
+            output_dir = os.path.join(self.output_eval,
+                                      os.path.dirname(self.img_names[idx]))
+            output_filename = os.path.basename(self.img_names[
+                idx])[:-3] + 'lines.txt'
+            os.makedirs(output_dir, exist_ok=True)
+            output = self.get_prediction_string(pred)
+
+            # store loss lines
+            lanes = self.lanes[idx]
+            if len(lanes) - len(pred) in [1, 2, 3, 4]:
+                loss_lines[len(lanes) - len(pred) - 1].append(self.img_names[
+                    idx])
+
+            with open(os.path.join(output_dir, output_filename),
+                      'w') as out_file:
+                out_file.write(output)
+
+        for i, names in enumerate(loss_lines):
+            with open(
+                    os.path.join(output_dir, 'loss_{}_lines.txt'.format(i + 1)),
+                    'w') as f:
+                for name in names:
+                    f.write(name + '\n')
+
+        for cate, cate_file in CATEGORYS.items():
+            result = eval_predictions(
+                self.output_eval,
+                self.dataset_dir,
+                os.path.join(self.dataset_dir, cate_file),
+                iou_thresholds=[0.5],
+                official=True)
+
+        result = eval_predictions(
+            self.output_eval,
+            self.dataset_dir,
+            self.list_path,
+            iou_thresholds=np.linspace(0.5, 0.95, 10),
+            official=True)
+        self.eval_results['F1@50'] = result[0.5]['F1']
+        self.eval_results['result'] = result
+
+    def update(self, inputs, outputs):
+        assert len(inputs['img_name']) == len(outputs['lanes'])
+        self.predictions.extend(outputs['lanes'])
+        self.img_names.extend(inputs['img_name'])
+        self.lanes.extend(inputs['lane_line'])
+
+    def log(self):
+        logger.info(self.eval_results)
+
+    # abstract method for getting metric results
+    def get_results(self):
+        return self.eval_results
--- a/paddle_detection/ppdet/metrics/json_results.py
+++ b/paddle_detection/ppdet/metrics/json_results.py
@@ -0,0 +1,175 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import six
+import numpy as np
+
+
+def get_det_res(bboxes, bbox_nums, image_id, label_to_cat_id_map, bias=0):
+    det_res = []
+    k = 0
+    for i in range(len(bbox_nums)):
+        cur_image_id = int(image_id[i][0])
+        det_nums = bbox_nums[i]
+        for j in range(det_nums):
+            dt = bboxes[k]
+            k = k + 1
+            num_id, score, xmin, ymin, xmax, ymax = dt.tolist()
+            if int(num_id) < 0:
+                continue
+            category_id = label_to_cat_id_map[int(num_id)]
+            w = xmax - xmin + bias
+            h = ymax - ymin + bias
+            bbox = [xmin, ymin, w, h]
+            dt_res = {
+                'image_id': cur_image_id,
+                'category_id': category_id,
+                'bbox': bbox,
+                'score': score
+            }
+            det_res.append(dt_res)
+    return det_res
+
+
+def get_det_poly_res(bboxes, bbox_nums, image_id, label_to_cat_id_map, bias=0):
+    det_res = []
+    k = 0
+    for i in range(len(bbox_nums)):
+        cur_image_id = int(image_id[i][0])
+        det_nums = bbox_nums[i]
+        for j in range(det_nums):
+            dt = bboxes[k]
+            k = k + 1
+            num_id, score, x1, y1, x2, y2, x3, y3, x4, y4 = dt.tolist()
+            if int(num_id) < 0:
+                continue
+            category_id = label_to_cat_id_map[int(num_id)]
+            rbox = [x1, y1, x2, y2, x3, y3, x4, y4]
+            dt_res = {
+                'image_id': cur_image_id,
+                'category_id': category_id,
+                'bbox': rbox,
+                'score': score
+            }
+            det_res.append(dt_res)
+    return det_res
+
+
+def strip_mask(mask):
+    row = mask[0, 0, :]
+    col = mask[0, :, 0]
+    im_h = len(col) - np.count_nonzero(col == -1)
+    im_w = len(row) - np.count_nonzero(row == -1)
+    return mask[:, :im_h, :im_w]
+
+
+def get_seg_res(masks, bboxes, mask_nums, image_id, label_to_cat_id_map):
+    import pycocotools.mask as mask_util
+    seg_res = []
+    k = 0
+    for i in range(len(mask_nums)):
+        cur_image_id = int(image_id[i][0])
+        det_nums = mask_nums[i]
+        mask_i = masks[k:k + det_nums]
+        mask_i = strip_mask(mask_i)
+        for j in range(det_nums):
+            mask = mask_i[j].astype(np.uint8)
+            score = float(bboxes[k][1])
+            label = int(bboxes[k][0])
+            k = k + 1
+            if label == -1:
+                continue
+            cat_id = label_to_cat_id_map[label]
+            rle = mask_util.encode(
+                np.array(
+                    mask[:, :, None], order="F", dtype="uint8"))[0]
+            if six.PY3:
+                if 'counts' in rle:
+                    rle['counts'] = rle['counts'].decode("utf8")
+            sg_res = {
+                'image_id': cur_image_id,
+                'category_id': cat_id,
+                'segmentation': rle,
+                'score': score
+            }
+            seg_res.append(sg_res)
+    return seg_res
+
+
+def get_solov2_segm_res(results, image_id, num_id_to_cat_id_map):
+    import pycocotools.mask as mask_util
+    segm_res = []
+    # for each batch
+    segms = results['segm'].astype(np.uint8)
+    clsid_labels = results['cate_label']
+    clsid_scores = results['cate_score']
+    lengths = segms.shape[0]
+    im_id = int(image_id[0][0])
+    if lengths == 0 or segms is None:
+        return None
+    # for each sample
+    for i in range(lengths - 1):
+        clsid = int(clsid_labels[i])
+        catid = num_id_to_cat_id_map[clsid]
+        score = float(clsid_scores[i])
+        mask = segms[i]
+        segm = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0]
+        segm['counts'] = segm['counts'].decode('utf8')
+        coco_res = {
+            'image_id': im_id,
+            'category_id': catid,
+            'segmentation': segm,
+            'score': score
+        }
+        segm_res.append(coco_res)
+    return segm_res
+
+
+def get_keypoint_res(results, im_id):
+    anns = []
+    preds = results['keypoint']
+    for idx in range(im_id.shape[0]):
+        image_id = im_id[idx].item()
+        kpts, scores = preds[idx]
+        for kpt, score in zip(kpts, scores):
+            kpt = kpt.flatten()
+            ann = {
+                'image_id': image_id,
+                'category_id': 1,  # XXX hard code
+                'keypoints': kpt.tolist(),
+                'score': float(score)
+            }
+            x = kpt[0::3]
+            y = kpt[1::3]
+            x0, x1, y0, y1 = np.min(x).item(), np.max(x).item(), np.min(y).item(
+            ), np.max(y).item()
+            ann['area'] = (x1 - x0) * (y1 - y0)
+            ann['bbox'] = [x0, y0, x1 - x0, y1 - y0]
+            anns.append(ann)
+    return anns
+
+
+def get_pose3d_res(results, im_id):
+    anns = []
+    preds = results['pose3d']
+    for idx in range(im_id.shape[0]):
+        image_id = im_id[idx].item()
+        pose3d = preds[idx]
+        ann = {
+            'image_id': image_id,
+            'category_id': 1,  # XXX hard code
+            'pose3d': pose3d.tolist(),
+            'score': float(1.)
+        }
+        anns.append(ann)
+    return anns
--- a/paddle_detection/ppdet/metrics/keypoint_metrics.py
+++ b/paddle_detection/ppdet/metrics/keypoint_metrics.py
@@ -0,0 +1,571 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import os
+import json
+from collections import defaultdict, OrderedDict
+import numpy as np
+import paddle
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+from ..modeling.keypoint_utils import oks_nms, keypoint_pck_accuracy, keypoint_auc, keypoint_epe
+from scipy.io import loadmat, savemat
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+__all__ = [
+    'KeyPointTopDownCOCOEval', 'KeyPointTopDownCOCOWholeBadyHandEval',
+    'KeyPointTopDownMPIIEval'
+]
+
+
+class KeyPointTopDownCOCOEval(object):
+    """refer to
+        https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
+        Copyright (c) Microsoft, under the MIT License.
+    """
+
+    def __init__(self,
+                 anno_file,
+                 num_samples,
+                 num_joints,
+                 output_eval,
+                 iou_type='keypoints',
+                 in_vis_thre=0.2,
+                 oks_thre=0.9,
+                 save_prediction_only=False):
+        super(KeyPointTopDownCOCOEval, self).__init__()
+        self.coco = COCO(anno_file)
+        self.num_samples = num_samples
+        self.num_joints = num_joints
+        self.iou_type = iou_type
+        self.in_vis_thre = in_vis_thre
+        self.oks_thre = oks_thre
+        self.output_eval = output_eval
+        self.res_file = os.path.join(output_eval, "keypoints_results.json")
+        self.save_prediction_only = save_prediction_only
+        self.reset()
+
+    def reset(self):
+        self.results = {
+            'all_preds': np.zeros(
+                (self.num_samples, self.num_joints, 3), dtype=np.float32),
+            'all_boxes': np.zeros((self.num_samples, 6)),
+            'image_path': []
+        }
+        self.eval_results = {}
+        self.idx = 0
+
+    def update(self, inputs, outputs):
+        kpts, _ = outputs['keypoint'][0]
+
+        num_images = inputs['image'].shape[0]
+        self.results['all_preds'][self.idx:self.idx + num_images, :, 0:
+                                  3] = kpts[:, :, 0:3]
+        self.results['all_boxes'][self.idx:self.idx + num_images, 0:2] = inputs[
+            'center'].numpy()[:, 0:2] if isinstance(
+                inputs['center'], paddle.Tensor) else inputs['center'][:, 0:2]
+        self.results['all_boxes'][self.idx:self.idx + num_images, 2:4] = inputs[
+            'scale'].numpy()[:, 0:2] if isinstance(
+                inputs['scale'], paddle.Tensor) else inputs['scale'][:, 0:2]
+        self.results['all_boxes'][self.idx:self.idx + num_images, 4] = np.prod(
+            inputs['scale'].numpy() * 200,
+            1) if isinstance(inputs['scale'], paddle.Tensor) else np.prod(
+                inputs['scale'] * 200, 1)
+        self.results['all_boxes'][
+            self.idx:self.idx + num_images,
+            5] = np.squeeze(inputs['score'].numpy()) if isinstance(
+                inputs['score'], paddle.Tensor) else np.squeeze(inputs['score'])
+        if isinstance(inputs['im_id'], paddle.Tensor):
+            self.results['image_path'].extend(inputs['im_id'].numpy())
+        else:
+            self.results['image_path'].extend(inputs['im_id'])
+        self.idx += num_images
+
+    def _write_coco_keypoint_results(self, keypoints):
+        data_pack = [{
+            'cat_id': 1,
+            'cls': 'person',
+            'ann_type': 'keypoints',
+            'keypoints': keypoints
+        }]
+        results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
+        if not os.path.exists(self.output_eval):
+            os.makedirs(self.output_eval)
+        with open(self.res_file, 'w') as f:
+            json.dump(results, f, sort_keys=True, indent=4)
+            logger.info(f'The keypoint result is saved to {self.res_file}.')
+        try:
+            json.load(open(self.res_file))
+        except Exception:
+            content = []
+            with open(self.res_file, 'r') as f:
+                for line in f:
+                    content.append(line)
+            content[-1] = ']'
+            with open(self.res_file, 'w') as f:
+                for c in content:
+                    f.write(c)
+
+    def _coco_keypoint_results_one_category_kernel(self, data_pack):
+        cat_id = data_pack['cat_id']
+        keypoints = data_pack['keypoints']
+        cat_results = []
+
+        for img_kpts in keypoints:
+            if len(img_kpts) == 0:
+                continue
+
+            _key_points = np.array(
+                [img_kpts[k]['keypoints'] for k in range(len(img_kpts))])
+            _key_points = _key_points.reshape(_key_points.shape[0], -1)
+
+            result = [{
+                'image_id': img_kpts[k]['image'],
+                'category_id': cat_id,
+                'keypoints': _key_points[k].tolist(),
+                'score': img_kpts[k]['score'],
+                'center': list(img_kpts[k]['center']),
+                'scale': list(img_kpts[k]['scale'])
+            } for k in range(len(img_kpts))]
+            cat_results.extend(result)
+
+        return cat_results
+
+    def get_final_results(self, preds, all_boxes, img_path):
+        _kpts = []
+        for idx, kpt in enumerate(preds):
+            _kpts.append({
+                'keypoints': kpt,
+                'center': all_boxes[idx][0:2],
+                'scale': all_boxes[idx][2:4],
+                'area': all_boxes[idx][4],
+                'score': all_boxes[idx][5],
+                'image': int(img_path[idx])
+            })
+        # image x person x (keypoints)
+        kpts = defaultdict(list)
+        for kpt in _kpts:
+            kpts[kpt['image']].append(kpt)
+
+        # rescoring and oks nms
+        num_joints = preds.shape[1]
+        in_vis_thre = self.in_vis_thre
+        oks_thre = self.oks_thre
+        oks_nmsed_kpts = []
+        for img in kpts.keys():
+            img_kpts = kpts[img]
+            for n_p in img_kpts:
+                box_score = n_p['score']
+                kpt_score = 0
+                valid_num = 0
+                for n_jt in range(0, num_joints):
+                    t_s = n_p['keypoints'][n_jt][2]
+                    if t_s > in_vis_thre:
+                        kpt_score = kpt_score + t_s
+                        valid_num = valid_num + 1
+                if valid_num != 0:
+                    kpt_score = kpt_score / valid_num
+                # rescoring
+                n_p['score'] = kpt_score * box_score
+
+            keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))],
+                           oks_thre)
+
+            if len(keep) == 0:
+                oks_nmsed_kpts.append(img_kpts)
+            else:
+                oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep])
+
+        self._write_coco_keypoint_results(oks_nmsed_kpts)
+
+    def accumulate(self):
+        self.get_final_results(self.results['all_preds'],
+                               self.results['all_boxes'],
+                               self.results['image_path'])
+        if self.save_prediction_only:
+            logger.info(f'The keypoint result is saved to {self.res_file} '
+                        'and do not evaluate the mAP.')
+            return
+        coco_dt = self.coco.loadRes(self.res_file)
+        coco_eval = COCOeval(self.coco, coco_dt, 'keypoints')
+        coco_eval.params.useSegm = None
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        coco_eval.summarize()
+
+        keypoint_stats = []
+        for ind in range(len(coco_eval.stats)):
+            keypoint_stats.append((coco_eval.stats[ind]))
+        self.eval_results['keypoint'] = keypoint_stats
+
+    def log(self):
+        if self.save_prediction_only:
+            return
+        stats_names = [
+            'AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
+            'AR .75', 'AR (M)', 'AR (L)'
+        ]
+        num_values = len(stats_names)
+        print(' '.join(['| {}'.format(name) for name in stats_names]) + ' |')
+        print('|---' * (num_values + 1) + '|')
+
+        print(' '.join([
+            '| {:.3f}'.format(value) for value in self.eval_results['keypoint']
+        ]) + ' |')
+
+    def get_results(self):
+        return self.eval_results
+
+
+class KeyPointTopDownCOCOWholeBadyHandEval(object):
+    def __init__(self,
+                 anno_file,
+                 num_samples,
+                 num_joints,
+                 output_eval,
+                 save_prediction_only=False):
+        super(KeyPointTopDownCOCOWholeBadyHandEval, self).__init__()
+        self.coco = COCO(anno_file)
+        self.num_samples = num_samples
+        self.num_joints = num_joints
+        self.output_eval = output_eval
+        self.res_file = os.path.join(output_eval, "keypoints_results.json")
+        self.save_prediction_only = save_prediction_only
+        self.parse_dataset()
+        self.reset()
+
+    def parse_dataset(self):
+        gt_db = []
+        num_joints = self.num_joints
+        coco = self.coco
+        img_ids = coco.getImgIds()
+        for img_id in img_ids:
+            ann_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False)
+            objs = coco.loadAnns(ann_ids)
+
+            for obj in objs:
+                for type in ['left', 'right']:
+                    if (obj[f'{type}hand_valid'] and
+                            max(obj[f'{type}hand_kpts']) > 0):
+
+                        joints = np.zeros((num_joints, 3), dtype=np.float32)
+                        joints_vis = np.zeros((num_joints, 3), dtype=np.float32)
+
+                        keypoints = np.array(obj[f'{type}hand_kpts'])
+                        keypoints = keypoints.reshape(-1, 3)
+                        joints[:, :2] = keypoints[:, :2]
+                        joints_vis[:, :2] = np.minimum(1, keypoints[:, 2:3])
+
+                        gt_db.append({
+                            'bbox': obj[f'{type}hand_box'],
+                            'gt_joints': joints,
+                            'joints_vis': joints_vis,
+                        })
+        self.db = gt_db
+
+    def reset(self):
+        self.results = {
+            'preds': np.zeros(
+                (self.num_samples, self.num_joints, 3), dtype=np.float32),
+        }
+        self.eval_results = {}
+        self.idx = 0
+
+    def update(self, inputs, outputs):
+        kpts, _ = outputs['keypoint'][0]
+        num_images = inputs['image'].shape[0]
+        self.results['preds'][self.idx:self.idx + num_images, :, 0:
+                              3] = kpts[:, :, 0:3]
+        self.idx += num_images
+
+    def accumulate(self):
+        self.get_final_results(self.results['preds'])
+        if self.save_prediction_only:
+            logger.info(f'The keypoint result is saved to {self.res_file} '
+                        'and do not evaluate the mAP.')
+            return
+
+        self.eval_results = self.evaluate(self.res_file, ('PCK', 'AUC', 'EPE'))
+
+    def get_final_results(self, preds):
+        kpts = []
+        for idx, kpt in enumerate(preds):
+            kpts.append({'keypoints': kpt.tolist()})
+
+        self._write_keypoint_results(kpts)
+
+    def _write_keypoint_results(self, keypoints):
+        if not os.path.exists(self.output_eval):
+            os.makedirs(self.output_eval)
+        with open(self.res_file, 'w') as f:
+            json.dump(keypoints, f, sort_keys=True, indent=4)
+            logger.info(f'The keypoint result is saved to {self.res_file}.')
+        try:
+            json.load(open(self.res_file))
+        except Exception:
+            content = []
+            with open(self.res_file, 'r') as f:
+                for line in f:
+                    content.append(line)
+            content[-1] = ']'
+            with open(self.res_file, 'w') as f:
+                for c in content:
+                    f.write(c)
+
+    def log(self):
+        if self.save_prediction_only:
+            return
+        for item, value in self.eval_results.items():
+            print("{} : {}".format(item, value))
+
+    def get_results(self):
+        return self.eval_results
+
+    def evaluate(self, res_file, metrics, pck_thr=0.2, auc_nor=30):
+        """Keypoint evaluation.
+
+        Args:
+            res_file (str): Json file stored prediction results.
+            metrics (str | list[str]): Metric to be performed.
+                Options: 'PCK', 'AUC', 'EPE'.
+            pck_thr (float): PCK threshold, default as 0.2.
+            auc_nor (float): AUC normalization factor, default as 30 pixel.
+
+        Returns:
+            List: Evaluation results for evaluation metric.
+        """
+        info_str = []
+
+        with open(res_file, 'r') as fin:
+            preds = json.load(fin)
+        assert len(preds) == len(self.db)
+
+        outputs = []
+        gts = []
+        masks = []
+        threshold_bbox = []
+
+        for pred, item in zip(preds, self.db):
+            outputs.append(np.array(pred['keypoints'])[:, :-1])
+            gts.append(np.array(item['gt_joints'])[:, :-1])
+            masks.append((np.array(item['joints_vis'])[:, 0]) > 0)
+            if 'PCK' in metrics:
+                bbox = np.array(item['bbox'])
+                bbox_thr = np.max(bbox[2:])
+                threshold_bbox.append(np.array([bbox_thr, bbox_thr]))
+
+        outputs = np.array(outputs)
+        gts = np.array(gts)
+        masks = np.array(masks)
+        threshold_bbox = np.array(threshold_bbox)
+
+        if 'PCK' in metrics:
+            _, pck, _ = keypoint_pck_accuracy(outputs, gts, masks, pck_thr,
+                                              threshold_bbox)
+            info_str.append(('PCK', pck))
+
+        if 'AUC' in metrics:
+            info_str.append(('AUC', keypoint_auc(outputs, gts, masks, auc_nor)))
+
+        if 'EPE' in metrics:
+            info_str.append(('EPE', keypoint_epe(outputs, gts, masks)))
+
+        name_value = OrderedDict(info_str)
+
+        return name_value
+
+
+class KeyPointTopDownMPIIEval(object):
+    def __init__(self,
+                 anno_file,
+                 num_samples,
+                 num_joints,
+                 output_eval,
+                 oks_thre=0.9,
+                 save_prediction_only=False):
+        super(KeyPointTopDownMPIIEval, self).__init__()
+        self.ann_file = anno_file
+        self.res_file = os.path.join(output_eval, "keypoints_results.json")
+        self.save_prediction_only = save_prediction_only
+        self.reset()
+
+    def reset(self):
+        self.results = []
+        self.eval_results = {}
+        self.idx = 0
+
+    def update(self, inputs, outputs):
+        kpts, _ = outputs['keypoint'][0]
+
+        num_images = inputs['image'].shape[0]
+        results = {}
+        results['preds'] = kpts[:, :, 0:3]
+        results['boxes'] = np.zeros((num_images, 6))
+        results['boxes'][:, 0:2] = inputs['center'].numpy()[:, 0:2]
+        results['boxes'][:, 2:4] = inputs['scale'].numpy()[:, 0:2]
+        results['boxes'][:, 4] = np.prod(inputs['scale'].numpy() * 200, 1)
+        results['boxes'][:, 5] = np.squeeze(inputs['score'].numpy())
+        results['image_path'] = inputs['image_file']
+
+        self.results.append(results)
+
+    def accumulate(self):
+        self._mpii_keypoint_results_save()
+        if self.save_prediction_only:
+            logger.info(f'The keypoint result is saved to {self.res_file} '
+                        'and do not evaluate the mAP.')
+            return
+
+        self.eval_results = self.evaluate(self.results)
+
+    def _mpii_keypoint_results_save(self):
+        results = []
+        for res in self.results:
+            if len(res) == 0:
+                continue
+            result = [{
+                'preds': res['preds'][k].tolist(),
+                'boxes': res['boxes'][k].tolist(),
+                'image_path': res['image_path'][k],
+            } for k in range(len(res))]
+            results.extend(result)
+        with open(self.res_file, 'w') as f:
+            json.dump(results, f, sort_keys=True, indent=4)
+            logger.info(f'The keypoint result is saved to {self.res_file}.')
+
+    def log(self):
+        if self.save_prediction_only:
+            return
+        for item, value in self.eval_results.items():
+            print("{} : {}".format(item, value))
+
+    def get_results(self):
+        return self.eval_results
+
+    def evaluate(self, outputs, savepath=None):
+        """Evaluate PCKh for MPII dataset. refer to
+        https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
+        Copyright (c) Microsoft, under the MIT License.
+
+        Args:
+            outputs(list(preds, boxes)):
+
+                * preds (np.ndarray[N,K,3]): The first two dimensions are
+                  coordinates, score is the third dimension of the array.
+                * boxes (np.ndarray[N,6]): [center[0], center[1], scale[0]
+                  , scale[1],area, score]
+
+        Returns:
+            dict: PCKh for each joint
+        """
+
+        kpts = []
+        for output in outputs:
+            preds = output['preds']
+            batch_size = preds.shape[0]
+            for i in range(batch_size):
+                kpts.append({'keypoints': preds[i]})
+
+        preds = np.stack([kpt['keypoints'] for kpt in kpts])
+
+        # convert 0-based index to 1-based index,
+        # and get the first two dimensions.
+        preds = preds[..., :2] + 1.0
+
+        if savepath is not None:
+            pred_file = os.path.join(savepath, 'pred.mat')
+            savemat(pred_file, mdict={'preds': preds})
+
+        SC_BIAS = 0.6
+        threshold = 0.5
+
+        gt_file = os.path.join(
+            os.path.dirname(self.ann_file), 'mpii_gt_val.mat')
+        gt_dict = loadmat(gt_file)
+        dataset_joints = gt_dict['dataset_joints']
+        jnt_missing = gt_dict['jnt_missing']
+        pos_gt_src = gt_dict['pos_gt_src']
+        headboxes_src = gt_dict['headboxes_src']
+
+        pos_pred_src = np.transpose(preds, [1, 2, 0])
+
+        head = np.where(dataset_joints == 'head')[1][0]
+        lsho = np.where(dataset_joints == 'lsho')[1][0]
+        lelb = np.where(dataset_joints == 'lelb')[1][0]
+        lwri = np.where(dataset_joints == 'lwri')[1][0]
+        lhip = np.where(dataset_joints == 'lhip')[1][0]
+        lkne = np.where(dataset_joints == 'lkne')[1][0]
+        lank = np.where(dataset_joints == 'lank')[1][0]
+
+        rsho = np.where(dataset_joints == 'rsho')[1][0]
+        relb = np.where(dataset_joints == 'relb')[1][0]
+        rwri = np.where(dataset_joints == 'rwri')[1][0]
+        rkne = np.where(dataset_joints == 'rkne')[1][0]
+        rank = np.where(dataset_joints == 'rank')[1][0]
+        rhip = np.where(dataset_joints == 'rhip')[1][0]
+
+        jnt_visible = 1 - jnt_missing
+        uv_error = pos_pred_src - pos_gt_src
+        uv_err = np.linalg.norm(uv_error, axis=1)
+        headsizes = headboxes_src[1, :, :] - headboxes_src[0, :, :]
+        headsizes = np.linalg.norm(headsizes, axis=0)
+        headsizes *= SC_BIAS
+        scale = headsizes * np.ones((len(uv_err), 1), dtype=np.float32)
+        scaled_uv_err = uv_err / scale
+        scaled_uv_err = scaled_uv_err * jnt_visible
+        jnt_count = np.sum(jnt_visible, axis=1)
+        less_than_threshold = (scaled_uv_err <= threshold) * jnt_visible
+        PCKh = 100. * np.sum(less_than_threshold, axis=1) / jnt_count
+
+        # save
+        rng = np.arange(0, 0.5 + 0.01, 0.01)
+        pckAll = np.zeros((len(rng), 16), dtype=np.float32)
+
+        for r, threshold in enumerate(rng):
+            less_than_threshold = (scaled_uv_err <= threshold) * jnt_visible
+            pckAll[r, :] = 100. * np.sum(less_than_threshold,
+                                         axis=1) / jnt_count
+
+        PCKh = np.ma.array(PCKh, mask=False)
+        PCKh.mask[6:8] = True
+
+        jnt_count = np.ma.array(jnt_count, mask=False)
+        jnt_count.mask[6:8] = True
+        jnt_ratio = jnt_count / np.sum(jnt_count).astype(np.float64)
+
+        name_value = [  #noqa
+            ('Head', PCKh[head]),
+            ('Shoulder', 0.5 * (PCKh[lsho] + PCKh[rsho])),
+            ('Elbow', 0.5 * (PCKh[lelb] + PCKh[relb])),
+            ('Wrist', 0.5 * (PCKh[lwri] + PCKh[rwri])),
+            ('Hip', 0.5 * (PCKh[lhip] + PCKh[rhip])),
+            ('Knee', 0.5 * (PCKh[lkne] + PCKh[rkne])),
+            ('Ankle', 0.5 * (PCKh[lank] + PCKh[rank])),
+            ('PCKh', np.sum(PCKh * jnt_ratio)),
+            ('PCKh@0.1', np.sum(pckAll[11, :] * jnt_ratio))
+        ]
+        name_value = OrderedDict(name_value)
+
+        return name_value
+
+    def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
+        """sort kpts and remove the repeated ones."""
+        kpts = sorted(kpts, key=lambda x: x[key])
+        num = len(kpts)
+        for i in range(num - 1, 0, -1):
+            if kpts[i][key] == kpts[i - 1][key]:
+                del kpts[i]
+
+        return kpts
--- a/paddle_detection/ppdet/metrics/map_utils.py
+++ b/paddle_detection/ppdet/metrics/map_utils.py
@@ -0,0 +1,436 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import os
+import sys
+import numpy as np
+import itertools
+import paddle
+from ppdet.modeling.rbox_utils import poly2rbox_np
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+__all__ = [
+    'draw_pr_curve',
+    'bbox_area',
+    'jaccard_overlap',
+    'prune_zero_padding',
+    'DetectionMAP',
+    'ap_per_class',
+    'compute_ap',
+]
+
+
+def draw_pr_curve(precision,
+                  recall,
+                  iou=0.5,
+                  out_dir='pr_curve',
+                  file_name='precision_recall_curve.jpg'):
+    if not os.path.exists(out_dir):
+        os.makedirs(out_dir)
+    output_path = os.path.join(out_dir, file_name)
+    try:
+        import matplotlib.pyplot as plt
+    except Exception as e:
+        logger.error('Matplotlib not found, plaese install matplotlib.'
+                     'for example: `pip install matplotlib`.')
+        raise e
+    plt.cla()
+    plt.figure('P-R Curve')
+    plt.title('Precision/Recall Curve(IoU={})'.format(iou))
+    plt.xlabel('Recall')
+    plt.ylabel('Precision')
+    plt.grid(True)
+    plt.plot(recall, precision)
+    plt.savefig(output_path)
+
+
+def bbox_area(bbox, is_bbox_normalized):
+    """
+    Calculate area of a bounding box
+    """
+    norm = 1. - float(is_bbox_normalized)
+    width = bbox[2] - bbox[0] + norm
+    height = bbox[3] - bbox[1] + norm
+    return width * height
+
+
+def jaccard_overlap(pred, gt, is_bbox_normalized=False):
+    """
+    Calculate jaccard overlap ratio between two bounding box
+    """
+    if pred[0] >= gt[2] or pred[2] <= gt[0] or \
+        pred[1] >= gt[3] or pred[3] <= gt[1]:
+        return 0.
+    inter_xmin = max(pred[0], gt[0])
+    inter_ymin = max(pred[1], gt[1])
+    inter_xmax = min(pred[2], gt[2])
+    inter_ymax = min(pred[3], gt[3])
+    inter_size = bbox_area([inter_xmin, inter_ymin, inter_xmax, inter_ymax],
+                           is_bbox_normalized)
+    pred_size = bbox_area(pred, is_bbox_normalized)
+    gt_size = bbox_area(gt, is_bbox_normalized)
+    overlap = float(inter_size) / (pred_size + gt_size - inter_size)
+    return overlap
+
+
+def calc_rbox_iou(pred, gt_poly):
+    """
+    calc iou between rotated bbox
+    """
+    # calc iou of bounding box for speedup
+    pred = np.array(pred, np.float32).reshape(-1, 2)
+    gt_poly = np.array(gt_poly, np.float32).reshape(-1, 2)
+    pred_rect = [
+        np.min(pred[:, 0]), np.min(pred[:, 1]), np.max(pred[:, 0]),
+        np.max(pred[:, 1])
+    ]
+    gt_rect = [
+        np.min(gt_poly[:, 0]), np.min(gt_poly[:, 1]), np.max(gt_poly[:, 0]),
+        np.max(gt_poly[:, 1])
+    ]
+    iou = jaccard_overlap(pred_rect, gt_rect, False)
+
+    if iou <= 0:
+        return iou
+
+    # calc rbox iou
+    pred_rbox = poly2rbox_np(pred.reshape(-1, 8)).reshape(-1, 5)
+    gt_rbox = poly2rbox_np(gt_poly.reshape(-1, 8)).reshape(-1, 5)
+    try:
+        from ext_op import rbox_iou
+    except Exception as e:
+        print("import custom_ops error, try install ext_op " \
+                  "following ppdet/ext_op/README.md", e)
+        sys.stdout.flush()
+        sys.exit(-1)
+    pd_gt_rbox = paddle.to_tensor(gt_rbox, dtype='float32')
+    pd_pred_rbox = paddle.to_tensor(pred_rbox, dtype='float32')
+    iou = rbox_iou(pd_gt_rbox, pd_pred_rbox)
+    iou = iou.numpy()
+    return iou[0][0]
+
+
+def prune_zero_padding(gt_box, gt_label, difficult=None):
+    valid_cnt = 0
+    for i in range(len(gt_box)):
+        if (gt_box[i] == 0).all():
+            break
+        valid_cnt += 1
+    return (gt_box[:valid_cnt], gt_label[:valid_cnt], difficult[:valid_cnt]
+            if difficult is not None else None)
+
+
+class DetectionMAP(object):
+    """
+    Calculate detection mean average precision.
+    Currently support two types: 11point and integral
+
+    Args:
+        class_num (int): The class number.
+        overlap_thresh (float): The threshold of overlap
+            ratio between prediction bounding box and 
+            ground truth bounding box for deciding 
+            true/false positive. Default 0.5.
+        map_type (str): Calculation method of mean average
+            precision, currently support '11point' and
+            'integral'. Default '11point'.
+        is_bbox_normalized (bool): Whether bounding boxes
+            is normalized to range[0, 1]. Default False.
+        evaluate_difficult (bool): Whether to evaluate
+            difficult bounding boxes. Default False.
+        catid2name (dict): Mapping between category id and category name.
+        classwise (bool): Whether per-category AP and draw
+            P-R Curve or not.
+    """
+
+    def __init__(self,
+                 class_num,
+                 overlap_thresh=0.5,
+                 map_type='11point',
+                 is_bbox_normalized=False,
+                 evaluate_difficult=False,
+                 catid2name=None,
+                 classwise=False):
+        self.class_num = class_num
+        self.overlap_thresh = overlap_thresh
+        assert map_type in ['11point', 'integral'], \
+                "map_type currently only support '11point' "\
+                "and 'integral'"
+        self.map_type = map_type
+        self.is_bbox_normalized = is_bbox_normalized
+        self.evaluate_difficult = evaluate_difficult
+        self.classwise = classwise
+        self.classes = []
+        for cname in catid2name.values():
+            self.classes.append(cname)
+        self.reset()
+
+    def update(self, bbox, score, label, gt_box, gt_label, difficult=None):
+        """
+        Update metric statics from given prediction and ground
+        truth infomations.
+        """
+        if difficult is None:
+            difficult = np.zeros_like(gt_label)
+
+        # record class gt count
+        for gtl, diff in zip(gt_label, difficult):
+            if self.evaluate_difficult or int(diff) == 0:
+                self.class_gt_counts[int(np.array(gtl))] += 1
+
+        # record class score positive
+        visited = [False] * len(gt_label)
+        for b, s, l in zip(bbox, score, label):
+            pred = b.tolist() if isinstance(b, np.ndarray) else b
+            max_idx = -1
+            max_overlap = -1.0
+            for i, gl in enumerate(gt_label):
+                if int(gl) == int(l):
+                    if len(gt_box[i]) == 8:
+                        overlap = calc_rbox_iou(pred, gt_box[i])
+                    else:
+                        overlap = jaccard_overlap(pred, gt_box[i],
+                                                  self.is_bbox_normalized)
+                    if overlap > max_overlap:
+                        max_overlap = overlap
+                        max_idx = i
+
+            if max_overlap > self.overlap_thresh:
+                if self.evaluate_difficult or \
+                        int(np.array(difficult[max_idx])) == 0:
+                    if not visited[max_idx]:
+                        self.class_score_poss[int(l)].append([s, 1.0])
+                        visited[max_idx] = True
+                    else:
+                        self.class_score_poss[int(l)].append([s, 0.0])
+            else:
+                self.class_score_poss[int(l)].append([s, 0.0])
+
+    def reset(self):
+        """
+        Reset metric statics
+        """
+        self.class_score_poss = [[] for _ in range(self.class_num)]
+        self.class_gt_counts = [0] * self.class_num
+        self.mAP = 0.0
+
+    def accumulate(self):
+        """
+        Accumulate metric results and calculate mAP
+        """
+        mAP = 0.
+        valid_cnt = 0
+        eval_results = []
+        for score_pos, count in zip(self.class_score_poss,
+                                    self.class_gt_counts):
+            if count == 0: continue
+            if len(score_pos) == 0:
+                valid_cnt += 1
+                continue
+
+            accum_tp_list, accum_fp_list = \
+                    self._get_tp_fp_accum(score_pos)
+            precision = []
+            recall = []
+            for ac_tp, ac_fp in zip(accum_tp_list, accum_fp_list):
+                precision.append(float(ac_tp) / (ac_tp + ac_fp))
+                recall.append(float(ac_tp) / count)
+
+            one_class_ap = 0.0
+            if self.map_type == '11point':
+                max_precisions = [0.] * 11
+                start_idx = len(precision) - 1
+                for j in range(10, -1, -1):
+                    for i in range(start_idx, -1, -1):
+                        if recall[i] < float(j) / 10.:
+                            start_idx = i
+                            if j > 0:
+                                max_precisions[j - 1] = max_precisions[j]
+                                break
+                        else:
+                            if max_precisions[j] < precision[i]:
+                                max_precisions[j] = precision[i]
+                one_class_ap = sum(max_precisions) / 11.
+                mAP += one_class_ap
+                valid_cnt += 1
+            elif self.map_type == 'integral':
+                import math
+                prev_recall = 0.
+                for i in range(len(precision)):
+                    recall_gap = math.fabs(recall[i] - prev_recall)
+                    if recall_gap > 1e-6:
+                        one_class_ap += precision[i] * recall_gap
+                        prev_recall = recall[i]
+                mAP += one_class_ap
+                valid_cnt += 1
+            else:
+                logger.error("Unspported mAP type {}".format(self.map_type))
+                sys.exit(1)
+            eval_results.append({
+                'class': self.classes[valid_cnt - 1],
+                'ap': one_class_ap,
+                'precision': precision,
+                'recall': recall,
+            })
+        self.eval_results = eval_results
+        self.mAP = mAP / float(valid_cnt) if valid_cnt > 0 else mAP
+
+    def get_map(self):
+        """
+        Get mAP result
+        """
+        if self.mAP is None:
+            logger.error("mAP is not calculated.")
+        if self.classwise:
+            # Compute per-category AP and PR curve
+            try:
+                from terminaltables import AsciiTable
+            except Exception as e:
+                logger.error(
+                    'terminaltables not found, plaese install terminaltables. '
+                    'for example: `pip install terminaltables`.')
+                raise e
+            results_per_category = []
+            for eval_result in self.eval_results:
+                results_per_category.append(
+                    (str(eval_result['class']),
+                     '{:0.3f}'.format(float(eval_result['ap']))))
+                draw_pr_curve(
+                    eval_result['precision'],
+                    eval_result['recall'],
+                    out_dir='voc_pr_curve',
+                    file_name='{}_precision_recall_curve.jpg'.format(
+                        eval_result['class']))
+
+            num_columns = min(6, len(results_per_category) * 2)
+            results_flatten = list(itertools.chain(*results_per_category))
+            headers = ['category', 'AP'] * (num_columns // 2)
+            results_2d = itertools.zip_longest(* [
+                results_flatten[i::num_columns] for i in range(num_columns)
+            ])
+            table_data = [headers]
+            table_data += [result for result in results_2d]
+            table = AsciiTable(table_data)
+            logger.info('Per-category of VOC AP: \n{}'.format(table.table))
+            logger.info(
+                "per-category PR curve has output to voc_pr_curve folder.")
+        return self.mAP
+
+    def _get_tp_fp_accum(self, score_pos_list):
+        """
+        Calculate accumulating true/false positive results from
+        [score, pos] records
+        """
+        sorted_list = sorted(score_pos_list, key=lambda s: s[0], reverse=True)
+        accum_tp = 0
+        accum_fp = 0
+        accum_tp_list = []
+        accum_fp_list = []
+        for (score, pos) in sorted_list:
+            accum_tp += int(pos)
+            accum_tp_list.append(accum_tp)
+            accum_fp += 1 - int(pos)
+            accum_fp_list.append(accum_fp)
+        return accum_tp_list, accum_fp_list
+
+
+def ap_per_class(tp, conf, pred_cls, target_cls):
+    """
+    Computes the average precision, given the recall and precision curves.
+    Method originally from https://github.com/rafaelpadilla/Object-Detection-Metrics.
+    
+    Args:
+        tp (list): True positives.
+        conf (list): Objectness value from 0-1.
+        pred_cls (list): Predicted object classes.
+        target_cls (list): Target object classes.
+    """
+    tp, conf, pred_cls, target_cls = np.array(tp), np.array(conf), np.array(
+        pred_cls), np.array(target_cls)
+
+    # Sort by objectness
+    i = np.argsort(-conf)
+    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
+
+    # Find unique classes
+    unique_classes = np.unique(np.concatenate((pred_cls, target_cls), 0))
+
+    # Create Precision-Recall curve and compute AP for each class
+    ap, p, r = [], [], []
+    for c in unique_classes:
+        i = pred_cls == c
+        n_gt = sum(target_cls == c)  # Number of ground truth objects
+        n_p = sum(i)  # Number of predicted objects
+
+        if (n_p == 0) and (n_gt == 0):
+            continue
+        elif (n_p == 0) or (n_gt == 0):
+            ap.append(0)
+            r.append(0)
+            p.append(0)
+        else:
+            # Accumulate FPs and TPs
+            fpc = np.cumsum(1 - tp[i])
+            tpc = np.cumsum(tp[i])
+
+            # Recall
+            recall_curve = tpc / (n_gt + 1e-16)
+            r.append(tpc[-1] / (n_gt + 1e-16))
+
+            # Precision
+            precision_curve = tpc / (tpc + fpc)
+            p.append(tpc[-1] / (tpc[-1] + fpc[-1]))
+
+            # AP from recall-precision curve
+            ap.append(compute_ap(recall_curve, precision_curve))
+
+    return np.array(ap), unique_classes.astype('int32'), np.array(r), np.array(
+        p)
+
+
+def compute_ap(recall, precision):
+    """
+    Computes the average precision, given the recall and precision curves.
+    Code originally from https://github.com/rbgirshick/py-faster-rcnn.
+    
+    Args:
+        recall (list): The recall curve.
+        precision (list): The precision curve.
+
+    Returns:
+        The average precision as computed in py-faster-rcnn.
+    """
+    # correct AP calculation
+    # first append sentinel values at the end
+    mrec = np.concatenate(([0.], recall, [1.]))
+    mpre = np.concatenate(([0.], precision, [0.]))
+
+    # compute the precision envelope
+    for i in range(mpre.size - 1, 0, -1):
+        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+
+    # to calculate area under PR curve, look for points
+    # where X axis (recall) changes value
+    i = np.where(mrec[1:] != mrec[:-1])[0]
+
+    # and sum (\Delta recall) * prec
+    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap
--- a/paddle_detection/ppdet/metrics/mcmot_metrics.py
+++ b/paddle_detection/ppdet/metrics/mcmot_metrics.py
@@ -0,0 +1,473 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import copy
+import sys
+import math
+from collections import defaultdict
+
+import numpy as np
+import pandas as pd
+
+from .metrics import Metric
+try:
+    import motmetrics as mm
+    from motmetrics.math_util import quiet_divide
+    metrics = mm.metrics.motchallenge_metrics
+    mh = mm.metrics.create()
+except:
+    print(
+        'Warning: Unable to use MCMOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics'
+    )
+    pass
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+__all__ = ['MCMOTEvaluator', 'MCMOTMetric']
+
+METRICS_LIST = [
+    'num_frames', 'num_matches', 'num_switches', 'num_transfer', 'num_ascend',
+    'num_migrate', 'num_false_positives', 'num_misses', 'num_detections',
+    'num_objects', 'num_predictions', 'num_unique_objects', 'mostly_tracked',
+    'partially_tracked', 'mostly_lost', 'num_fragmentations', 'motp', 'mota',
+    'precision', 'recall', 'idfp', 'idfn', 'idtp', 'idp', 'idr', 'idf1'
+]
+
+NAME_MAP = {
+    'num_frames': 'num_frames',
+    'num_matches': 'num_matches',
+    'num_switches': 'IDs',
+    'num_transfer': 'IDt',
+    'num_ascend': 'IDa',
+    'num_migrate': 'IDm',
+    'num_false_positives': 'FP',
+    'num_misses': 'FN',
+    'num_detections': 'num_detections',
+    'num_objects': 'num_objects',
+    'num_predictions': 'num_predictions',
+    'num_unique_objects': 'GT',
+    'mostly_tracked': 'MT',
+    'partially_tracked': 'partially_tracked',
+    'mostly_lost': 'ML',
+    'num_fragmentations': 'FM',
+    'motp': 'MOTP',
+    'mota': 'MOTA',
+    'precision': 'Prcn',
+    'recall': 'Rcll',
+    'idfp': 'idfp',
+    'idfn': 'idfn',
+    'idtp': 'idtp',
+    'idp': 'IDP',
+    'idr': 'IDR',
+    'idf1': 'IDF1'
+}
+
+
+def parse_accs_metrics(seq_acc, index_name, verbose=False):
+    """
+    Parse the evaluation indicators of multiple MOTAccumulator 
+    """
+    mh = mm.metrics.create()
+    summary = MCMOTEvaluator.get_summary(seq_acc, index_name, METRICS_LIST)
+    summary.loc['OVERALL', 'motp'] = (summary['motp'] * summary['num_detections']).sum() / \
+                                     summary.loc['OVERALL', 'num_detections']
+    if verbose:
+        strsummary = mm.io.render_summary(
+            summary, formatters=mh.formatters, namemap=NAME_MAP)
+        print(strsummary)
+
+    return summary
+
+
+def seqs_overall_metrics(summary_df, verbose=False):
+    """
+    Calculate overall metrics for multiple sequences
+    """
+    add_col = [
+        'num_frames', 'num_matches', 'num_switches', 'num_transfer',
+        'num_ascend', 'num_migrate', 'num_false_positives', 'num_misses',
+        'num_detections', 'num_objects', 'num_predictions',
+        'num_unique_objects', 'mostly_tracked', 'partially_tracked',
+        'mostly_lost', 'num_fragmentations', 'idfp', 'idfn', 'idtp'
+    ]
+    calc_col = ['motp', 'mota', 'precision', 'recall', 'idp', 'idr', 'idf1']
+    calc_df = summary_df.copy()
+
+    overall_dic = {}
+    for col in add_col:
+        overall_dic[col] = calc_df[col].sum()
+
+    for col in calc_col:
+        overall_dic[col] = getattr(MCMOTMetricOverall, col + '_overall')(
+            calc_df, overall_dic)
+
+    overall_df = pd.DataFrame(overall_dic, index=['overall_calc'])
+    calc_df = pd.concat([calc_df, overall_df])
+
+    if verbose:
+        mh = mm.metrics.create()
+        str_calc_df = mm.io.render_summary(
+            calc_df, formatters=mh.formatters, namemap=NAME_MAP)
+        print(str_calc_df)
+
+    return calc_df
+
+
+class MCMOTMetricOverall(object):
+    def motp_overall(summary_df, overall_dic):
+        motp = quiet_divide((summary_df['motp'] *
+                             summary_df['num_detections']).sum(),
+                            overall_dic['num_detections'])
+        return motp
+
+    def mota_overall(summary_df, overall_dic):
+        del summary_df
+        mota = 1. - quiet_divide(
+            (overall_dic['num_misses'] + overall_dic['num_switches'] +
+             overall_dic['num_false_positives']), overall_dic['num_objects'])
+        return mota
+
+    def precision_overall(summary_df, overall_dic):
+        del summary_df
+        precision = quiet_divide(overall_dic['num_detections'], (
+            overall_dic['num_false_positives'] + overall_dic['num_detections']))
+        return precision
+
+    def recall_overall(summary_df, overall_dic):
+        del summary_df
+        recall = quiet_divide(overall_dic['num_detections'],
+                              overall_dic['num_objects'])
+        return recall
+
+    def idp_overall(summary_df, overall_dic):
+        del summary_df
+        idp = quiet_divide(overall_dic['idtp'],
+                           (overall_dic['idtp'] + overall_dic['idfp']))
+        return idp
+
+    def idr_overall(summary_df, overall_dic):
+        del summary_df
+        idr = quiet_divide(overall_dic['idtp'],
+                           (overall_dic['idtp'] + overall_dic['idfn']))
+        return idr
+
+    def idf1_overall(summary_df, overall_dic):
+        del summary_df
+        idf1 = quiet_divide(2. * overall_dic['idtp'], (
+            overall_dic['num_objects'] + overall_dic['num_predictions']))
+        return idf1
+
+
+def read_mcmot_results_union(filename, is_gt, is_ignore):
+    results_dict = dict()
+    if os.path.isfile(filename):
+        all_result = np.loadtxt(filename, delimiter=',')
+        if all_result.shape[0] == 0 or all_result.shape[1] < 7:
+            return results_dict
+        if is_ignore:
+            return results_dict
+        if is_gt:
+            # only for test use
+            all_result = all_result[all_result[:, 7] != 0]
+            all_result[:, 7] = all_result[:, 7] - 1
+
+        if all_result.shape[0] == 0:
+            return results_dict
+
+        class_unique = np.unique(all_result[:, 7])
+
+        last_max_id = 0
+        result_cls_list = []
+        for cls in class_unique:
+            result_cls_split = all_result[all_result[:, 7] == cls]
+            result_cls_split[:, 1] = result_cls_split[:, 1] + last_max_id
+            # make sure track id different between every category
+            last_max_id = max(np.unique(result_cls_split[:, 1])) + 1
+            result_cls_list.append(result_cls_split)
+
+        results_con = np.concatenate(result_cls_list)
+
+        for line in range(len(results_con)):
+            linelist = results_con[line]
+            fid = int(linelist[0])
+            if fid < 1:
+                continue
+            results_dict.setdefault(fid, list())
+
+            if is_gt:
+                score = 1
+            else:
+                score = float(linelist[6])
+
+            tlwh = tuple(map(float, linelist[2:6]))
+            target_id = int(linelist[1])
+            cls = int(linelist[7])
+
+            results_dict[fid].append((tlwh, target_id, cls, score))
+
+        return results_dict
+
+
+def read_mcmot_results(filename, is_gt, is_ignore):
+    results_dict = dict()
+    if os.path.isfile(filename):
+        with open(filename, 'r') as f:
+            for line in f.readlines():
+                linelist = line.strip().split(',')
+                if len(linelist) < 7:
+                    continue
+                fid = int(linelist[0])
+                if fid < 1:
+                    continue
+                cid = int(linelist[7])
+                if is_gt:
+                    score = 1
+                    # only for test use
+                    cid -= 1
+                else:
+                    score = float(linelist[6])
+
+                cls_result_dict = results_dict.setdefault(cid, dict())
+                cls_result_dict.setdefault(fid, list())
+
+                tlwh = tuple(map(float, linelist[2:6]))
+                target_id = int(linelist[1])
+                cls_result_dict[fid].append((tlwh, target_id, score))
+    return results_dict
+
+
+def read_results(filename,
+                 data_type,
+                 is_gt=False,
+                 is_ignore=False,
+                 multi_class=False,
+                 union=False):
+    if data_type in ['mcmot', 'lab']:
+        if multi_class:
+            if union:
+                # The results are evaluated by union all the categories.
+                # Track IDs between different categories cannot be duplicate.
+                read_fun = read_mcmot_results_union
+            else:
+                # The results are evaluated separately by category.
+                read_fun = read_mcmot_results
+        else:
+            raise ValueError('multi_class: {}, MCMOT should have cls_id.'.
+                             format(multi_class))
+    else:
+        raise ValueError('Unknown data type: {}'.format(data_type))
+
+    return read_fun(filename, is_gt, is_ignore)
+
+
+def unzip_objs(objs):
+    if len(objs) > 0:
+        tlwhs, ids, scores = zip(*objs)
+    else:
+        tlwhs, ids, scores = [], [], []
+    tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
+    return tlwhs, ids, scores
+
+
+def unzip_objs_cls(objs):
+    if len(objs) > 0:
+        tlwhs, ids, cls, scores = zip(*objs)
+    else:
+        tlwhs, ids, cls, scores = [], [], [], []
+    tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
+    ids = np.array(ids)
+    cls = np.array(cls)
+    scores = np.array(scores)
+    return tlwhs, ids, cls, scores
+
+
+class MCMOTEvaluator(object):
+    def __init__(self, data_root, seq_name, data_type, num_classes):
+        self.data_root = data_root
+        self.seq_name = seq_name
+        self.data_type = data_type
+        self.num_classes = num_classes
+
+        self.load_annotations()
+        try:
+            import motmetrics as mm
+            mm.lap.default_solver = 'lap'
+        except Exception as e:
+            raise RuntimeError(
+                'Unable to use MCMOT metric, please install motmetrics, for example: `pip install motmetrics`, see https://github.com/longcw/py-motmetrics'
+            )
+        self.reset_accumulator()
+
+        self.class_accs = []
+
+    def load_annotations(self):
+        assert self.data_type == 'mcmot'
+        self.gt_filename = os.path.join(self.data_root, '../', 'sequences',
+                                        '{}.txt'.format(self.seq_name))
+        if not os.path.exists(self.gt_filename):
+            logger.warning(
+                "gt_filename '{}' of MCMOTEvaluator is not exist, so the MOTA will be -INF."
+            )
+
+    def reset_accumulator(self):
+        self.acc = mm.MOTAccumulator(auto_id=True)
+
+    def eval_frame_dict(self, trk_objs, gt_objs, rtn_events=False, union=False):
+        if union:
+            trk_tlwhs, trk_ids, trk_cls = unzip_objs_cls(trk_objs)[:3]
+            gt_tlwhs, gt_ids, gt_cls = unzip_objs_cls(gt_objs)[:3]
+
+            # get distance matrix
+            iou_distance = mm.distances.iou_matrix(
+                gt_tlwhs, trk_tlwhs, max_iou=0.5)
+
+            # Set the distance between objects of different categories to nan
+            gt_cls_len = len(gt_cls)
+            trk_cls_len = len(trk_cls)
+            # When the number of GT or Trk is 0, iou_distance dimension is (0,0)
+            if gt_cls_len != 0 and trk_cls_len != 0:
+                gt_cls = gt_cls.reshape(gt_cls_len, 1)
+                gt_cls = np.repeat(gt_cls, trk_cls_len, axis=1)
+                trk_cls = trk_cls.reshape(1, trk_cls_len)
+                trk_cls = np.repeat(trk_cls, gt_cls_len, axis=0)
+                iou_distance = np.where(gt_cls == trk_cls, iou_distance, np.nan)
+
+        else:
+            trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
+            gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
+
+            # get distance matrix
+            iou_distance = mm.distances.iou_matrix(
+                gt_tlwhs, trk_tlwhs, max_iou=0.5)
+
+        self.acc.update(gt_ids, trk_ids, iou_distance)
+
+        if rtn_events and iou_distance.size > 0 and hasattr(self.acc,
+                                                            'mot_events'):
+            events = self.acc.mot_events  # only supported by https://github.com/longcw/py-motmetrics
+        else:
+            events = None
+        return events
+
+    def eval_file(self, result_filename):
+        # evaluation of each category
+        gt_frame_dict = read_results(
+            self.gt_filename,
+            self.data_type,
+            is_gt=True,
+            multi_class=True,
+            union=False)
+        result_frame_dict = read_results(
+            result_filename,
+            self.data_type,
+            is_gt=False,
+            multi_class=True,
+            union=False)
+
+        for cid in range(self.num_classes):
+            self.reset_accumulator()
+            cls_result_frame_dict = result_frame_dict.setdefault(cid, dict())
+            cls_gt_frame_dict = gt_frame_dict.setdefault(cid, dict())
+
+            # only labeled frames will be evaluated
+            frames = sorted(list(set(cls_gt_frame_dict.keys())))
+
+            for frame_id in frames:
+                trk_objs = cls_result_frame_dict.get(frame_id, [])
+                gt_objs = cls_gt_frame_dict.get(frame_id, [])
+                self.eval_frame_dict(trk_objs, gt_objs, rtn_events=False)
+
+            self.class_accs.append(self.acc)
+
+        return self.class_accs
+
+    @staticmethod
+    def get_summary(accs,
+                    names,
+                    metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1',
+                             'precision', 'recall')):
+        names = copy.deepcopy(names)
+        if metrics is None:
+            metrics = mm.metrics.motchallenge_metrics
+        metrics = copy.deepcopy(metrics)
+
+        mh = mm.metrics.create()
+        summary = mh.compute_many(
+            accs, metrics=metrics, names=names, generate_overall=True)
+
+        return summary
+
+    @staticmethod
+    def save_summary(summary, filename):
+        import pandas as pd
+        writer = pd.ExcelWriter(filename)
+        summary.to_excel(writer)
+        writer.save()
+
+
+class MCMOTMetric(Metric):
+    def __init__(self, num_classes, save_summary=False):
+        self.num_classes = num_classes
+        self.save_summary = save_summary
+        self.MCMOTEvaluator = MCMOTEvaluator
+        self.result_root = None
+        self.reset()
+
+        self.seqs_overall = defaultdict(list)
+
+    def reset(self):
+        self.accs = []
+        self.seqs = []
+
+    def update(self, data_root, seq, data_type, result_root, result_filename):
+        evaluator = self.MCMOTEvaluator(data_root, seq, data_type,
+                                        self.num_classes)
+        seq_acc = evaluator.eval_file(result_filename)
+        self.accs.append(seq_acc)
+        self.seqs.append(seq)
+        self.result_root = result_root
+
+        cls_index_name = [
+            '{}_{}'.format(seq, i) for i in range(self.num_classes)
+        ]
+        summary = parse_accs_metrics(seq_acc, cls_index_name)
+        summary.rename(
+            index={'OVERALL': '{}_OVERALL'.format(seq)}, inplace=True)
+        for row in range(len(summary)):
+            self.seqs_overall[row].append(summary.iloc[row:row + 1])
+
+    def accumulate(self):
+        self.cls_summary_list = []
+        for row in range(self.num_classes):
+            seqs_cls_df = pd.concat(self.seqs_overall[row])
+            seqs_cls_summary = seqs_overall_metrics(seqs_cls_df)
+            cls_summary_overall = seqs_cls_summary.iloc[-1:].copy()
+            cls_summary_overall.rename(
+                index={'overall_calc': 'overall_calc_{}'.format(row)},
+                inplace=True)
+            self.cls_summary_list.append(cls_summary_overall)
+
+    def log(self):
+        seqs_summary = seqs_overall_metrics(
+            pd.concat(self.seqs_overall[self.num_classes]), verbose=True)
+        class_summary = seqs_overall_metrics(
+            pd.concat(self.cls_summary_list), verbose=True)
+
+    def get_results(self):
+        return 1
--- a/paddle_detection/ppdet/metrics/metrics.py
+++ b/paddle_detection/ppdet/metrics/metrics.py
@@ -0,0 +1,505 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import json
+import paddle
+import numpy as np
+import typing
+from collections import defaultdict
+from pathlib import Path
+
+from .map_utils import prune_zero_padding, DetectionMAP
+from .coco_utils import get_infer_results, cocoapi_eval
+from .widerface_utils import face_eval_run
+from ppdet.data.source.category import get_categories
+from ppdet.modeling.rbox_utils import poly2rbox_np
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+__all__ = [
+    'Metric', 'COCOMetric', 'VOCMetric', 'WiderFaceMetric', 'get_infer_results',
+    'RBoxMetric', 'SNIPERCOCOMetric'
+]
+
+COCO_SIGMAS = np.array([
+    .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87,
+    .89, .89
+]) / 10.0
+CROWD_SIGMAS = np.array(
+    [.79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89, .79,
+     .79]) / 10.0
+
+
+class Metric(paddle.metric.Metric):
+    def name(self):
+        return self.__class__.__name__
+
+    def reset(self):
+        pass
+
+    def accumulate(self):
+        pass
+
+    # paddle.metric.Metric defined :metch:`update`, :meth:`accumulate`
+    # :metch:`reset`, in ppdet, we also need following 2 methods:
+
+    # abstract method for logging metric results
+    def log(self):
+        pass
+
+    # abstract method for getting metric results
+    def get_results(self):
+        pass
+
+
+class COCOMetric(Metric):
+    def __init__(self, anno_file, **kwargs):
+        self.anno_file = anno_file
+        self.clsid2catid = kwargs.get('clsid2catid', None)
+        if self.clsid2catid is None:
+            self.clsid2catid, _ = get_categories('COCO', anno_file)
+        self.classwise = kwargs.get('classwise', False)
+        self.output_eval = kwargs.get('output_eval', None)
+        # TODO: bias should be unified
+        self.bias = kwargs.get('bias', 0)
+        self.save_prediction_only = kwargs.get('save_prediction_only', False)
+        self.iou_type = kwargs.get('IouType', 'bbox')
+
+        if not self.save_prediction_only:
+            assert os.path.isfile(anno_file), \
+                    "anno_file {} not a file".format(anno_file)
+
+        if self.output_eval is not None:
+            Path(self.output_eval).mkdir(exist_ok=True)
+
+        self.reset()
+
+    def reset(self):
+        # only bbox and mask evaluation support currently
+        self.results = {'bbox': [], 'mask': [], 'segm': [], 'keypoint': []}
+        self.eval_results = {}
+
+    def update(self, inputs, outputs):
+        outs = {}
+        # outputs Tensor -> numpy.ndarray
+        for k, v in outputs.items():
+            outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v
+
+        # multi-scale inputs: all inputs have same im_id
+        if isinstance(inputs, typing.Sequence):
+            im_id = inputs[0]['im_id']
+        else:
+            im_id = inputs['im_id']
+        outs['im_id'] = im_id.numpy() if isinstance(im_id,
+                                                    paddle.Tensor) else im_id
+
+        infer_results = get_infer_results(
+            outs, self.clsid2catid, bias=self.bias)
+        self.results['bbox'] += infer_results[
+            'bbox'] if 'bbox' in infer_results else []
+        self.results['mask'] += infer_results[
+            'mask'] if 'mask' in infer_results else []
+        self.results['segm'] += infer_results[
+            'segm'] if 'segm' in infer_results else []
+        self.results['keypoint'] += infer_results[
+            'keypoint'] if 'keypoint' in infer_results else []
+
+    def accumulate(self):
+        if len(self.results['bbox']) > 0:
+            output = "bbox.json"
+            if self.output_eval:
+                output = os.path.join(self.output_eval, output)
+            with open(output, 'w') as f:
+                json.dump(self.results['bbox'], f)
+                logger.info('The bbox result is saved to bbox.json.')
+
+            if self.save_prediction_only:
+                logger.info('The bbox result is saved to {} and do not '
+                            'evaluate the mAP.'.format(output))
+            else:
+                bbox_stats = cocoapi_eval(
+                    output,
+                    'bbox',
+                    anno_file=self.anno_file,
+                    classwise=self.classwise)
+                self.eval_results['bbox'] = bbox_stats
+                sys.stdout.flush()
+
+        if len(self.results['mask']) > 0:
+            output = "mask.json"
+            if self.output_eval:
+                output = os.path.join(self.output_eval, output)
+            with open(output, 'w') as f:
+                json.dump(self.results['mask'], f)
+                logger.info('The mask result is saved to mask.json.')
+
+            if self.save_prediction_only:
+                logger.info('The mask result is saved to {} and do not '
+                            'evaluate the mAP.'.format(output))
+            else:
+                seg_stats = cocoapi_eval(
+                    output,
+                    'segm',
+                    anno_file=self.anno_file,
+                    classwise=self.classwise)
+                self.eval_results['mask'] = seg_stats
+                sys.stdout.flush()
+
+        if len(self.results['segm']) > 0:
+            output = "segm.json"
+            if self.output_eval:
+                output = os.path.join(self.output_eval, output)
+            with open(output, 'w') as f:
+                json.dump(self.results['segm'], f)
+                logger.info('The segm result is saved to segm.json.')
+
+            if self.save_prediction_only:
+                logger.info('The segm result is saved to {} and do not '
+                            'evaluate the mAP.'.format(output))
+            else:
+                seg_stats = cocoapi_eval(
+                    output,
+                    'segm',
+                    anno_file=self.anno_file,
+                    classwise=self.classwise)
+                self.eval_results['mask'] = seg_stats
+                sys.stdout.flush()
+
+        if len(self.results['keypoint']) > 0:
+            output = "keypoint.json"
+            if self.output_eval:
+                output = os.path.join(self.output_eval, output)
+            with open(output, 'w') as f:
+                json.dump(self.results['keypoint'], f)
+                logger.info('The keypoint result is saved to keypoint.json.')
+
+            if self.save_prediction_only:
+                logger.info('The keypoint result is saved to {} and do not '
+                            'evaluate the mAP.'.format(output))
+            else:
+                style = 'keypoints'
+                use_area = True
+                sigmas = COCO_SIGMAS
+                if self.iou_type == 'keypoints_crowd':
+                    style = 'keypoints_crowd'
+                    use_area = False
+                    sigmas = CROWD_SIGMAS
+                keypoint_stats = cocoapi_eval(
+                    output,
+                    style,
+                    anno_file=self.anno_file,
+                    classwise=self.classwise,
+                    sigmas=sigmas,
+                    use_area=use_area)
+                self.eval_results['keypoint'] = keypoint_stats
+                sys.stdout.flush()
+
+    def log(self):
+        pass
+
+    def get_results(self):
+        return self.eval_results
+
+
+class VOCMetric(Metric):
+    def __init__(self,
+                 label_list,
+                 class_num=20,
+                 overlap_thresh=0.5,
+                 map_type='11point',
+                 is_bbox_normalized=False,
+                 evaluate_difficult=False,
+                 classwise=False,
+                 output_eval=None,
+                 save_prediction_only=False):
+        assert os.path.isfile(label_list), \
+                "label_list {} not a file".format(label_list)
+        self.clsid2catid, self.catid2name = get_categories('VOC', label_list)
+
+        self.overlap_thresh = overlap_thresh
+        self.map_type = map_type
+        self.evaluate_difficult = evaluate_difficult
+        self.output_eval = output_eval
+        self.save_prediction_only = save_prediction_only
+        self.detection_map = DetectionMAP(
+            class_num=class_num,
+            overlap_thresh=overlap_thresh,
+            map_type=map_type,
+            is_bbox_normalized=is_bbox_normalized,
+            evaluate_difficult=evaluate_difficult,
+            catid2name=self.catid2name,
+            classwise=classwise)
+
+        self.reset()
+
+    def reset(self):
+        self.results = {'bbox': [], 'score': [], 'label': []}
+        self.detection_map.reset()
+
+    def update(self, inputs, outputs):
+        bbox_np = outputs['bbox'].numpy() if isinstance(
+            outputs['bbox'], paddle.Tensor) else outputs['bbox']
+        bboxes = bbox_np[:, 2:]
+        scores = bbox_np[:, 1]
+        labels = bbox_np[:, 0]
+        bbox_lengths = outputs['bbox_num'].numpy() if isinstance(
+            outputs['bbox_num'], paddle.Tensor) else outputs['bbox_num']
+
+        self.results['bbox'].append(bboxes.tolist())
+        self.results['score'].append(scores.tolist())
+        self.results['label'].append(labels.tolist())
+
+        if bboxes.shape == (1, 1) or bboxes is None:
+            return
+        if self.save_prediction_only:
+            return
+
+        gt_boxes = inputs['gt_bbox']
+        gt_labels = inputs['gt_class']
+        difficults = inputs['difficult'] if not self.evaluate_difficult \
+                            else None
+
+        if 'scale_factor' in inputs:
+            scale_factor = inputs['scale_factor'].numpy() if isinstance(
+                inputs['scale_factor'],
+                paddle.Tensor) else inputs['scale_factor']
+        else:
+            scale_factor = np.ones((gt_boxes.shape[0], 2)).astype('float32')
+
+        bbox_idx = 0
+        for i in range(len(gt_boxes)):
+            gt_box = gt_boxes[i].numpy() if isinstance(
+                gt_boxes[i], paddle.Tensor) else gt_boxes[i]
+            h, w = scale_factor[i]
+            gt_box = gt_box / np.array([w, h, w, h])
+            gt_label = gt_labels[i].numpy() if isinstance(
+                gt_labels[i], paddle.Tensor) else gt_labels[i]
+            if difficults is not None:
+                difficult = difficults[i].numpy() if isinstance(
+                    difficults[i], paddle.Tensor) else difficults[i]
+            else:
+                difficult = None
+            bbox_num = bbox_lengths[i]
+            bbox = bboxes[bbox_idx:bbox_idx + bbox_num]
+            score = scores[bbox_idx:bbox_idx + bbox_num]
+            label = labels[bbox_idx:bbox_idx + bbox_num]
+            gt_box, gt_label, difficult = prune_zero_padding(gt_box, gt_label,
+                                                             difficult)
+            self.detection_map.update(bbox, score, label, gt_box, gt_label,
+                                      difficult)
+            bbox_idx += bbox_num
+
+    def accumulate(self):
+        output = "bbox.json"
+        if self.output_eval:
+            output = os.path.join(self.output_eval, output)
+            with open(output, 'w') as f:
+                json.dump(self.results, f)
+                logger.info('The bbox result is saved to bbox.json.')
+        if self.save_prediction_only:
+            return
+
+        logger.info("Accumulating evaluatation results...")
+        self.detection_map.accumulate()
+
+    def log(self):
+        map_stat = 100. * self.detection_map.get_map()
+        logger.info("mAP({:.2f}, {}) = {:.2f}%".format(self.overlap_thresh,
+                                                       self.map_type, map_stat))
+
+    def get_results(self):
+        return {'bbox': [self.detection_map.get_map()]}
+
+
+class WiderFaceMetric(Metric):
+    def __init__(self, image_dir, anno_file, multi_scale=True):
+        self.image_dir = image_dir
+        self.anno_file = anno_file
+        self.multi_scale = multi_scale
+        self.clsid2catid, self.catid2name = get_categories('widerface')
+
+    def update(self, model):
+
+        face_eval_run(
+            model,
+            self.image_dir,
+            self.anno_file,
+            pred_dir='output/pred',
+            eval_mode='widerface',
+            multi_scale=self.multi_scale)
+
+
+class RBoxMetric(Metric):
+    def __init__(self, anno_file, **kwargs):
+        self.anno_file = anno_file
+        self.clsid2catid, self.catid2name = get_categories('RBOX', anno_file)
+        self.catid2clsid = {v: k for k, v in self.clsid2catid.items()}
+        self.classwise = kwargs.get('classwise', False)
+        self.output_eval = kwargs.get('output_eval', None)
+        self.save_prediction_only = kwargs.get('save_prediction_only', False)
+        self.overlap_thresh = kwargs.get('overlap_thresh', 0.5)
+        self.map_type = kwargs.get('map_type', '11point')
+        self.evaluate_difficult = kwargs.get('evaluate_difficult', False)
+        self.imid2path = kwargs.get('imid2path', None)
+        class_num = len(self.catid2name)
+        self.detection_map = DetectionMAP(
+            class_num=class_num,
+            overlap_thresh=self.overlap_thresh,
+            map_type=self.map_type,
+            is_bbox_normalized=False,
+            evaluate_difficult=self.evaluate_difficult,
+            catid2name=self.catid2name,
+            classwise=self.classwise)
+
+        self.reset()
+
+    def reset(self):
+        self.results = []
+        self.detection_map.reset()
+
+    def update(self, inputs, outputs):
+        outs = {}
+        # outputs Tensor -> numpy.ndarray
+        for k, v in outputs.items():
+            outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v
+
+        im_id = inputs['im_id']
+        im_id = im_id.numpy() if isinstance(im_id, paddle.Tensor) else im_id
+        outs['im_id'] = im_id
+
+        infer_results = get_infer_results(outs, self.clsid2catid)
+        infer_results = infer_results['bbox'] if 'bbox' in infer_results else []
+        self.results += infer_results
+        if self.save_prediction_only:
+            return
+
+        gt_boxes = inputs['gt_poly']
+        gt_labels = inputs['gt_class']
+
+        if 'scale_factor' in inputs:
+            scale_factor = inputs['scale_factor'].numpy() if isinstance(
+                inputs['scale_factor'],
+                paddle.Tensor) else inputs['scale_factor']
+        else:
+            scale_factor = np.ones((gt_boxes.shape[0], 2)).astype('float32')
+
+        for i in range(len(gt_boxes)):
+            gt_box = gt_boxes[i].numpy() if isinstance(
+                gt_boxes[i], paddle.Tensor) else gt_boxes[i]
+            h, w = scale_factor[i]
+            gt_box = gt_box / np.array([w, h, w, h, w, h, w, h])
+            gt_label = gt_labels[i].numpy() if isinstance(
+                gt_labels[i], paddle.Tensor) else gt_labels[i]
+            gt_box, gt_label, _ = prune_zero_padding(gt_box, gt_label)
+            bbox = [
+                res['bbox'] for res in infer_results
+                if int(res['image_id']) == int(im_id[i])
+            ]
+            score = [
+                res['score'] for res in infer_results
+                if int(res['image_id']) == int(im_id[i])
+            ]
+            label = [
+                self.catid2clsid[int(res['category_id'])]
+                for res in infer_results
+                if int(res['image_id']) == int(im_id[i])
+            ]
+            self.detection_map.update(bbox, score, label, gt_box, gt_label)
+
+    def save_results(self, results, output_dir, imid2path):
+        if imid2path:
+            data_dicts = defaultdict(list)
+            for result in results:
+                image_id = result['image_id']
+                data_dicts[image_id].append(result)
+
+            for image_id, image_path in imid2path.items():
+                basename = os.path.splitext(os.path.split(image_path)[-1])[0]
+                output = os.path.join(output_dir, "{}.txt".format(basename))
+                dets = data_dicts.get(image_id, [])
+                with open(output, 'w') as f:
+                    for det in dets:
+                        catid, bbox, score = det['category_id'], det[
+                            'bbox'], det['score']
+                        bbox_pred = '{} {} '.format(self.catid2name[catid],
+                                                    score) + ' '.join(
+                                                        [str(e) for e in bbox])
+                        f.write(bbox_pred + '\n')
+
+            logger.info('The bbox result is saved to {}.'.format(output_dir))
+        else:
+            output = os.path.join(output_dir, "bbox.json")
+            with open(output, 'w') as f:
+                json.dump(results, f)
+
+            logger.info('The bbox result is saved to {}.'.format(output))
+
+    def accumulate(self):
+        if self.output_eval:
+            self.save_results(self.results, self.output_eval, self.imid2path)
+
+        if not self.save_prediction_only:
+            logger.info("Accumulating evaluatation results...")
+            self.detection_map.accumulate()
+
+    def log(self):
+        map_stat = 100. * self.detection_map.get_map()
+        logger.info("mAP({:.2f}, {}) = {:.2f}%".format(self.overlap_thresh,
+                                                       self.map_type, map_stat))
+
+    def get_results(self):
+        return {'bbox': [self.detection_map.get_map()]}
+
+
+class SNIPERCOCOMetric(COCOMetric):
+    def __init__(self, anno_file, **kwargs):
+        super(SNIPERCOCOMetric, self).__init__(anno_file, **kwargs)
+        self.dataset = kwargs["dataset"]
+        self.chip_results = []
+
+    def reset(self):
+        # only bbox and mask evaluation support currently
+        self.results = {'bbox': [], 'mask': [], 'segm': [], 'keypoint': []}
+        self.eval_results = {}
+        self.chip_results = []
+
+    def update(self, inputs, outputs):
+        outs = {}
+        # outputs Tensor -> numpy.ndarray
+        for k, v in outputs.items():
+            outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v
+
+        im_id = inputs['im_id']
+        outs['im_id'] = im_id.numpy() if isinstance(im_id,
+                                                    paddle.Tensor) else im_id
+
+        self.chip_results.append(outs)
+
+    def accumulate(self):
+        results = self.dataset.anno_cropper.aggregate_chips_detections(
+            self.chip_results)
+        for outs in results:
+            infer_results = get_infer_results(
+                outs, self.clsid2catid, bias=self.bias)
+            self.results['bbox'] += infer_results[
+                'bbox'] if 'bbox' in infer_results else []
+
+        super(SNIPERCOCOMetric, self).accumulate()
--- a/paddle_detection/ppdet/metrics/mot_metrics.py
+++ b/paddle_detection/ppdet/metrics/mot_metrics.py
--- a/paddle_detection/ppdet/metrics/munkres.py
+++ b/paddle_detection/ppdet/metrics/munkres.py
@@ -0,0 +1,428 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+"""
+This code is borrow from https://github.com/xingyizhou/CenterTrack/blob/master/src/tools/eval_kitti_track/munkres.py
+"""
+
+import sys
+
+__all__ = ['Munkres', 'make_cost_matrix']
+
+
+class Munkres:
+    """
+    Calculate the Munkres solution to the classical assignment problem.
+    See the module documentation for usage.
+    """
+
+    def __init__(self):
+        """Create a new instance"""
+        self.C = None
+        self.row_covered = []
+        self.col_covered = []
+        self.n = 0
+        self.Z0_r = 0
+        self.Z0_c = 0
+        self.marked = None
+        self.path = None
+
+    def make_cost_matrix(profit_matrix, inversion_function):
+        """
+        **DEPRECATED**
+
+        Please use the module function ``make_cost_matrix()``.
+        """
+        import munkres
+        return munkres.make_cost_matrix(profit_matrix, inversion_function)
+
+    make_cost_matrix = staticmethod(make_cost_matrix)
+
+    def pad_matrix(self, matrix, pad_value=0):
+        """
+        Pad a possibly non-square matrix to make it square.
+
+        :Parameters:
+            matrix : list of lists
+                matrix to pad
+
+            pad_value : int
+                value to use to pad the matrix
+
+        :rtype: list of lists
+        :return: a new, possibly padded, matrix
+        """
+        max_columns = 0
+        total_rows = len(matrix)
+
+        for row in matrix:
+            max_columns = max(max_columns, len(row))
+
+        total_rows = max(max_columns, total_rows)
+
+        new_matrix = []
+        for row in matrix:
+            row_len = len(row)
+            new_row = row[:]
+            if total_rows > row_len:
+                # Row too short. Pad it.
+                new_row += [0] * (total_rows - row_len)
+            new_matrix += [new_row]
+
+        while len(new_matrix) < total_rows:
+            new_matrix += [[0] * total_rows]
+
+        return new_matrix
+
+    def compute(self, cost_matrix):
+        """
+        Compute the indexes for the lowest-cost pairings between rows and
+        columns in the database. Returns a list of (row, column) tuples
+        that can be used to traverse the matrix.
+
+        :Parameters:
+            cost_matrix : list of lists
+                The cost matrix. If this cost matrix is not square, it
+                will be padded with zeros, via a call to ``pad_matrix()``.
+                (This method does *not* modify the caller's matrix. It
+                operates on a copy of the matrix.)
+
+                **WARNING**: This code handles square and rectangular
+                matrices. It does *not* handle irregular matrices.
+
+        :rtype: list
+        :return: A list of ``(row, column)`` tuples that describe the lowest
+                 cost path through the matrix
+
+        """
+        self.C = self.pad_matrix(cost_matrix)
+        self.n = len(self.C)
+        self.original_length = len(cost_matrix)
+        self.original_width = len(cost_matrix[0])
+        self.row_covered = [False for i in range(self.n)]
+        self.col_covered = [False for i in range(self.n)]
+        self.Z0_r = 0
+        self.Z0_c = 0
+        self.path = self.__make_matrix(self.n * 2, 0)
+        self.marked = self.__make_matrix(self.n, 0)
+
+        done = False
+        step = 1
+
+        steps = {
+            1: self.__step1,
+            2: self.__step2,
+            3: self.__step3,
+            4: self.__step4,
+            5: self.__step5,
+            6: self.__step6
+        }
+
+        while not done:
+            try:
+                func = steps[step]
+                step = func()
+            except KeyError:
+                done = True
+
+        # Look for the starred columns
+        results = []
+        for i in range(self.original_length):
+            for j in range(self.original_width):
+                if self.marked[i][j] == 1:
+                    results += [(i, j)]
+
+        return results
+
+    def __copy_matrix(self, matrix):
+        """Return an exact copy of the supplied matrix"""
+        return copy.deepcopy(matrix)
+
+    def __make_matrix(self, n, val):
+        """Create an *n*x*n* matrix, populating it with the specific value."""
+        matrix = []
+        for i in range(n):
+            matrix += [[val for j in range(n)]]
+        return matrix
+
+    def __step1(self):
+        """
+        For each row of the matrix, find the smallest element and
+        subtract it from every element in its row. Go to Step 2.
+        """
+        C = self.C
+        n = self.n
+        for i in range(n):
+            minval = min(self.C[i])
+            # Find the minimum value for this row and subtract that minimum
+            # from every element in the row.
+            for j in range(n):
+                self.C[i][j] -= minval
+
+        return 2
+
+    def __step2(self):
+        """
+        Find a zero (Z) in the resulting matrix. If there is no starred
+        zero in its row or column, star Z. Repeat for each element in the
+        matrix. Go to Step 3.
+        """
+        n = self.n
+        for i in range(n):
+            for j in range(n):
+                if (self.C[i][j] == 0) and \
+                   (not self.col_covered[j]) and \
+                   (not self.row_covered[i]):
+                    self.marked[i][j] = 1
+                    self.col_covered[j] = True
+                    self.row_covered[i] = True
+
+        self.__clear_covers()
+        return 3
+
+    def __step3(self):
+        """
+        Cover each column containing a starred zero. If K columns are
+        covered, the starred zeros describe a complete set of unique
+        assignments. In this case, Go to DONE, otherwise, Go to Step 4.
+        """
+        n = self.n
+        count = 0
+        for i in range(n):
+            for j in range(n):
+                if self.marked[i][j] == 1:
+                    self.col_covered[j] = True
+                    count += 1
+
+        if count >= n:
+            step = 7  # done
+        else:
+            step = 4
+
+        return step
+
+    def __step4(self):
+        """
+        Find a noncovered zero and prime it. If there is no starred zero
+        in the row containing this primed zero, Go to Step 5. Otherwise,
+        cover this row and uncover the column containing the starred
+        zero. Continue in this manner until there are no uncovered zeros
+        left. Save the smallest uncovered value and Go to Step 6.
+        """
+        step = 0
+        done = False
+        row = -1
+        col = -1
+        star_col = -1
+        while not done:
+            (row, col) = self.__find_a_zero()
+            if row < 0:
+                done = True
+                step = 6
+            else:
+                self.marked[row][col] = 2
+                star_col = self.__find_star_in_row(row)
+                if star_col >= 0:
+                    col = star_col
+                    self.row_covered[row] = True
+                    self.col_covered[col] = False
+                else:
+                    done = True
+                    self.Z0_r = row
+                    self.Z0_c = col
+                    step = 5
+
+        return step
+
+    def __step5(self):
+        """
+        Construct a series of alternating primed and starred zeros as
+        follows. Let Z0 represent the uncovered primed zero found in Step 4.
+        Let Z1 denote the starred zero in the column of Z0 (if any).
+        Let Z2 denote the primed zero in the row of Z1 (there will always
+        be one). Continue until the series terminates at a primed zero
+        that has no starred zero in its column. Unstar each starred zero
+        of the series, star each primed zero of the series, erase all
+        primes and uncover every line in the matrix. Return to Step 3
+        """
+        count = 0
+        path = self.path
+        path[count][0] = self.Z0_r
+        path[count][1] = self.Z0_c
+        done = False
+        while not done:
+            row = self.__find_star_in_col(path[count][1])
+            if row >= 0:
+                count += 1
+                path[count][0] = row
+                path[count][1] = path[count - 1][1]
+            else:
+                done = True
+
+            if not done:
+                col = self.__find_prime_in_row(path[count][0])
+                count += 1
+                path[count][0] = path[count - 1][0]
+                path[count][1] = col
+
+        self.__convert_path(path, count)
+        self.__clear_covers()
+        self.__erase_primes()
+        return 3
+
+    def __step6(self):
+        """
+        Add the value found in Step 4 to every element of each covered
+        row, and subtract it from every element of each uncovered column.
+        Return to Step 4 without altering any stars, primes, or covered
+        lines.
+        """
+        minval = self.__find_smallest()
+        for i in range(self.n):
+            for j in range(self.n):
+                if self.row_covered[i]:
+                    self.C[i][j] += minval
+                if not self.col_covered[j]:
+                    self.C[i][j] -= minval
+        return 4
+
+    def __find_smallest(self):
+        """Find the smallest uncovered value in the matrix."""
+        minval = 2e9  # sys.maxint
+        for i in range(self.n):
+            for j in range(self.n):
+                if (not self.row_covered[i]) and (not self.col_covered[j]):
+                    if minval > self.C[i][j]:
+                        minval = self.C[i][j]
+        return minval
+
+    def __find_a_zero(self):
+        """Find the first uncovered element with value 0"""
+        row = -1
+        col = -1
+        i = 0
+        n = self.n
+        done = False
+
+        while not done:
+            j = 0
+            while True:
+                if (self.C[i][j] == 0) and \
+                   (not self.row_covered[i]) and \
+                   (not self.col_covered[j]):
+                    row = i
+                    col = j
+                    done = True
+                j += 1
+                if j >= n:
+                    break
+            i += 1
+            if i >= n:
+                done = True
+
+        return (row, col)
+
+    def __find_star_in_row(self, row):
+        """
+        Find the first starred element in the specified row. Returns
+        the column index, or -1 if no starred element was found.
+        """
+        col = -1
+        for j in range(self.n):
+            if self.marked[row][j] == 1:
+                col = j
+                break
+
+        return col
+
+    def __find_star_in_col(self, col):
+        """
+        Find the first starred element in the specified row. Returns
+        the row index, or -1 if no starred element was found.
+        """
+        row = -1
+        for i in range(self.n):
+            if self.marked[i][col] == 1:
+                row = i
+                break
+
+        return row
+
+    def __find_prime_in_row(self, row):
+        """
+        Find the first prime element in the specified row. Returns
+        the column index, or -1 if no starred element was found.
+        """
+        col = -1
+        for j in range(self.n):
+            if self.marked[row][j] == 2:
+                col = j
+                break
+
+        return col
+
+    def __convert_path(self, path, count):
+        for i in range(count + 1):
+            if self.marked[path[i][0]][path[i][1]] == 1:
+                self.marked[path[i][0]][path[i][1]] = 0
+            else:
+                self.marked[path[i][0]][path[i][1]] = 1
+
+    def __clear_covers(self):
+        """Clear all covered matrix cells"""
+        for i in range(self.n):
+            self.row_covered[i] = False
+            self.col_covered[i] = False
+
+    def __erase_primes(self):
+        """Erase all prime markings"""
+        for i in range(self.n):
+            for j in range(self.n):
+                if self.marked[i][j] == 2:
+                    self.marked[i][j] = 0
+
+
+def make_cost_matrix(profit_matrix, inversion_function):
+    """
+    Create a cost matrix from a profit matrix by calling
+    'inversion_function' to invert each value. The inversion
+    function must take one numeric argument (of any type) and return
+    another numeric argument which is presumed to be the cost inverse
+    of the original profit.
+
+    This is a static method. Call it like this:
+
+    .. python::
+
+        cost_matrix = Munkres.make_cost_matrix(matrix, inversion_func)
+
+    For example:
+
+    .. python::
+
+        cost_matrix = Munkres.make_cost_matrix(matrix, lambda x : sys.maxint - x)
+
+    :Parameters:
+        profit_matrix : list of lists
+            The matrix to convert from a profit to a cost matrix
+
+        inversion_function : function
+            The function to use to invert each entry in the profit matrix
+
+    :rtype: list of lists
+    :return: The converted matrix
+    """
+    cost_matrix = []
+    for row in profit_matrix:
+        cost_matrix.append([inversion_function(value) for value in row])
+    return cost_matrix
--- a/paddle_detection/ppdet/metrics/pose3d_metrics.py
+++ b/paddle_detection/ppdet/metrics/pose3d_metrics.py
@@ -0,0 +1,200 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+
+import paddle
+from paddle.distributed import ParallelEnv
+import os
+import json
+from collections import defaultdict, OrderedDict
+import numpy as np
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+__all__ = ['Pose3DEval']
+
+
+class AverageMeter(object):
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+
+
+def mean_per_joint_position_error(pred, gt, has_3d_joints):
+    """ 
+    Compute mPJPE
+    """
+    gt = gt[has_3d_joints == 1]
+    gt = gt[:, :, :3]
+    pred = pred[has_3d_joints == 1]
+
+    with paddle.no_grad():
+        gt_pelvis = (gt[:, 2, :] + gt[:, 3, :]) / 2
+        gt = gt - gt_pelvis[:, None, :]
+        pred_pelvis = (pred[:, 2, :] + pred[:, 3, :]) / 2
+        pred = pred - pred_pelvis[:, None, :]
+        error = paddle.sqrt(((pred - gt)**2).sum(axis=-1)).mean(axis=-1).numpy()
+        return error
+
+
+def compute_similarity_transform(S1, S2):
+    """Computes a similarity transform (sR, t) that takes
+    a set of 3D points S1 (3 x N) closest to a set of 3D points S2,
+    where R is an 3x3 rotation matrix, t 3x1 translation, s scale.
+    i.e. solves the orthogonal Procrutes problem.
+    """
+    transposed = False
+    if S1.shape[0] != 3 and S1.shape[0] != 2:
+        S1 = S1.T
+        S2 = S2.T
+        transposed = True
+    assert (S2.shape[1] == S1.shape[1])
+
+    # 1. Remove mean.
+    mu1 = S1.mean(axis=1, keepdims=True)
+    mu2 = S2.mean(axis=1, keepdims=True)
+    X1 = S1 - mu1
+    X2 = S2 - mu2
+
+    # 2. Compute variance of X1 used for scale.
+    var1 = np.sum(X1**2)
+
+    # 3. The outer product of X1 and X2.
+    K = X1.dot(X2.T)
+
+    # 4. Solution that Maximizes trace(R'K) is R=U*V', where U, V are
+    # singular vectors of K.
+    U, s, Vh = np.linalg.svd(K)
+    V = Vh.T
+    # Construct Z that fixes the orientation of R to get det(R)=1.
+    Z = np.eye(U.shape[0])
+    Z[-1, -1] *= np.sign(np.linalg.det(U.dot(V.T)))
+    # Construct R.
+    R = V.dot(Z.dot(U.T))
+
+    # 5. Recover scale.
+    scale = np.trace(R.dot(K)) / var1
+
+    # 6. Recover translation.
+    t = mu2 - scale * (R.dot(mu1))
+
+    # 7. Error:
+    S1_hat = scale * R.dot(S1) + t
+
+    if transposed:
+        S1_hat = S1_hat.T
+
+    return S1_hat
+
+
+def compute_similarity_transform_batch(S1, S2):
+    """Batched version of compute_similarity_transform."""
+    S1_hat = np.zeros_like(S1)
+    for i in range(S1.shape[0]):
+        S1_hat[i] = compute_similarity_transform(S1[i], S2[i])
+    return S1_hat
+
+
+def reconstruction_error(S1, S2, reduction='mean'):
+    """Do Procrustes alignment and compute reconstruction error."""
+    S1_hat = compute_similarity_transform_batch(S1, S2)
+    re = np.sqrt(((S1_hat - S2)**2).sum(axis=-1)).mean(axis=-1)
+    if reduction == 'mean':
+        re = re.mean()
+    elif reduction == 'sum':
+        re = re.sum()
+    return re
+
+
+def all_gather(data):
+    if paddle.distributed.get_world_size() == 1:
+        return data
+    vlist = []
+    paddle.distributed.all_gather(vlist, data)
+    data = paddle.concat(vlist, 0)
+    return data
+
+
+class Pose3DEval(object):
+    def __init__(self, output_eval, save_prediction_only=False):
+        super(Pose3DEval, self).__init__()
+        self.output_eval = output_eval
+        self.res_file = os.path.join(output_eval, "pose3d_results.json")
+        self.save_prediction_only = save_prediction_only
+        self.reset()
+
+    def reset(self):
+        self.PAmPJPE = AverageMeter()
+        self.mPJPE = AverageMeter()
+        self.eval_results = {}
+
+    def get_human36m_joints(self, input):
+        J24_TO_J14 = paddle.to_tensor(
+            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 18])
+        J24_TO_J17 = paddle.to_tensor(
+            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 18, 19])
+        return paddle.index_select(input, J24_TO_J14, axis=1)
+
+    def update(self, inputs, outputs):
+        gt_3d_joints = all_gather(inputs['joints_3d'].cuda(ParallelEnv()
+                                                           .local_rank))
+        has_3d_joints = all_gather(inputs['has_3d_joints'].cuda(ParallelEnv()
+                                                                .local_rank))
+        pred_3d_joints = all_gather(outputs['pose3d'])
+        if gt_3d_joints.shape[1] == 24:
+            gt_3d_joints = self.get_human36m_joints(gt_3d_joints)
+        if pred_3d_joints.shape[1] == 24:
+            pred_3d_joints = self.get_human36m_joints(pred_3d_joints)
+        mPJPE_val = mean_per_joint_position_error(pred_3d_joints, gt_3d_joints,
+                                                  has_3d_joints).mean()
+        PAmPJPE_val = reconstruction_error(
+            pred_3d_joints.numpy(),
+            gt_3d_joints[:, :, :3].numpy(),
+            reduction=None).mean()
+        count = int(np.sum(has_3d_joints.numpy()))
+        self.PAmPJPE.update(PAmPJPE_val * 1000., count)
+        self.mPJPE.update(mPJPE_val * 1000., count)
+
+    def accumulate(self):
+        if self.save_prediction_only:
+            logger.info(f'The pose3d result is saved to {self.res_file} '
+                        'and do not evaluate the model.')
+            return
+        self.eval_results['pose3d'] = [-self.mPJPE.avg, -self.PAmPJPE.avg]
+
+    def log(self):
+        if self.save_prediction_only:
+            return
+        stats_names = ['mPJPE', 'PAmPJPE']
+        num_values = len(stats_names)
+        print(' '.join(['| {}'.format(name) for name in stats_names]) + ' |')
+        print('|---' * (num_values + 1) + '|')
+
+        print(' '.join([
+            '| {:.3f}'.format(abs(value))
+            for value in self.eval_results['pose3d']
+        ]) + ' |')
+
+    def get_results(self):
+        return self.eval_results
--- a/paddle_detection/ppdet/metrics/widerface_utils.py
+++ b/paddle_detection/ppdet/metrics/widerface_utils.py
@@ -0,0 +1,391 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import cv2
+import numpy as np
+from collections import OrderedDict
+
+import paddle
+
+from ppdet.utils.logger import setup_logger
+logger = setup_logger(__name__)
+
+__all__ = ['face_eval_run', 'lmk2out']
+
+
+def face_eval_run(model,
+                  image_dir,
+                  gt_file,
+                  pred_dir='output/pred',
+                  eval_mode='widerface',
+                  multi_scale=False):
+    # load ground truth files
+    with open(gt_file, 'r') as f:
+        gt_lines = f.readlines()
+    imid2path = []
+    pos_gt = 0
+    while pos_gt < len(gt_lines):
+        name_gt = gt_lines[pos_gt].strip('\n\t').split()[0]
+        imid2path.append(name_gt)
+        pos_gt += 1
+        n_gt = int(gt_lines[pos_gt].strip('\n\t').split()[0])
+        pos_gt += 1 + n_gt
+    logger.info('The ground truth file load {} images'.format(len(imid2path)))
+
+    dets_dist = OrderedDict()
+    for iter_id, im_path in enumerate(imid2path):
+        image_path = os.path.join(image_dir, im_path)
+        if eval_mode == 'fddb':
+            image_path += '.jpg'
+        assert os.path.exists(image_path)
+        image = cv2.imread(image_path)
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        if multi_scale:
+            shrink, max_shrink = get_shrink(image.shape[0], image.shape[1])
+            det0 = detect_face(model, image, shrink)
+            det1 = flip_test(model, image, shrink)
+            [det2, det3] = multi_scale_test(model, image, max_shrink)
+            det4 = multi_scale_test_pyramid(model, image, max_shrink)
+            det = np.row_stack((det0, det1, det2, det3, det4))
+            dets = bbox_vote(det)
+        else:
+            dets = detect_face(model, image, 1)
+        if eval_mode == 'widerface':
+            save_widerface_bboxes(image_path, dets, pred_dir)
+        else:
+            dets_dist[im_path] = dets
+        if iter_id % 100 == 0:
+            logger.info('Test iter {}'.format(iter_id))
+    if eval_mode == 'fddb':
+        save_fddb_bboxes(dets_dist, pred_dir)
+    logger.info("Finish evaluation.")
+
+
+def detect_face(model, image, shrink):
+    image_shape = [image.shape[0], image.shape[1]]
+    if shrink != 1:
+        h, w = int(image_shape[0] * shrink), int(image_shape[1] * shrink)
+        image = cv2.resize(image, (w, h))
+        image_shape = [h, w]
+
+    img = face_img_process(image)
+    image_shape = np.asarray([image_shape])
+    scale_factor = np.asarray([[shrink, shrink]])
+    data = {
+        "image": paddle.to_tensor(
+            img, dtype='float32'),
+        "im_shape": paddle.to_tensor(
+            image_shape, dtype='float32'),
+        "scale_factor": paddle.to_tensor(
+            scale_factor, dtype='float32')
+    }
+    model.eval()
+    detection = model(data)
+    detection = detection['bbox'].numpy()
+    # layout: xmin, ymin, xmax. ymax, score
+    if np.prod(detection.shape) == 1:
+        logger.info("No face detected")
+        return np.array([[0, 0, 0, 0, 0]])
+    det_conf = detection[:, 1]
+    det_xmin = detection[:, 2]
+    det_ymin = detection[:, 3]
+    det_xmax = detection[:, 4]
+    det_ymax = detection[:, 5]
+
+    det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf))
+    return det
+
+
+def flip_test(model, image, shrink):
+    img = cv2.flip(image, 1)
+    det_f = detect_face(model, img, shrink)
+    det_t = np.zeros(det_f.shape)
+    img_width = image.shape[1]
+    det_t[:, 0] = img_width - det_f[:, 2]
+    det_t[:, 1] = det_f[:, 1]
+    det_t[:, 2] = img_width - det_f[:, 0]
+    det_t[:, 3] = det_f[:, 3]
+    det_t[:, 4] = det_f[:, 4]
+    return det_t
+
+
+def multi_scale_test(model, image, max_shrink):
+    # Shrink detecting is only used to detect big faces
+    st = 0.5 if max_shrink >= 0.75 else 0.5 * max_shrink
+    det_s = detect_face(model, image, st)
+    index = np.where(
+        np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1)
+        > 30)[0]
+    det_s = det_s[index, :]
+    # Enlarge one times
+    bt = min(2, max_shrink) if max_shrink > 1 else (st + max_shrink) / 2
+    det_b = detect_face(model, image, bt)
+
+    # Enlarge small image x times for small faces
+    if max_shrink > 2:
+        bt *= 2
+        while bt < max_shrink:
+            det_b = np.row_stack((det_b, detect_face(model, image, bt)))
+            bt *= 2
+        det_b = np.row_stack((det_b, detect_face(model, image, max_shrink)))
+
+    # Enlarged images are only used to detect small faces.
+    if bt > 1:
+        index = np.where(
+            np.minimum(det_b[:, 2] - det_b[:, 0] + 1,
+                       det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]
+        det_b = det_b[index, :]
+    # Shrinked images are only used to detect big faces.
+    else:
+        index = np.where(
+            np.maximum(det_b[:, 2] - det_b[:, 0] + 1,
+                       det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]
+        det_b = det_b[index, :]
+    return det_s, det_b
+
+
+def multi_scale_test_pyramid(model, image, max_shrink):
+    # Use image pyramids to detect faces
+    det_b = detect_face(model, image, 0.25)
+    index = np.where(
+        np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1)
+        > 30)[0]
+    det_b = det_b[index, :]
+
+    st = [0.75, 1.25, 1.5, 1.75]
+    for i in range(len(st)):
+        if st[i] <= max_shrink:
+            det_temp = detect_face(model, image, st[i])
+            # Enlarged images are only used to detect small faces.
+            if st[i] > 1:
+                index = np.where(
+                    np.minimum(det_temp[:, 2] - det_temp[:, 0] + 1,
+                               det_temp[:, 3] - det_temp[:, 1] + 1) < 100)[0]
+                det_temp = det_temp[index, :]
+            # Shrinked images are only used to detect big faces.
+            else:
+                index = np.where(
+                    np.maximum(det_temp[:, 2] - det_temp[:, 0] + 1,
+                               det_temp[:, 3] - det_temp[:, 1] + 1) > 30)[0]
+                det_temp = det_temp[index, :]
+            det_b = np.row_stack((det_b, det_temp))
+    return det_b
+
+
+def to_chw(image):
+    """
+    Transpose image from HWC to CHW.
+    Args:
+        image (np.array): an image with HWC layout.
+    """
+    # HWC to CHW
+    if len(image.shape) == 3:
+        image = np.swapaxes(image, 1, 2)
+        image = np.swapaxes(image, 1, 0)
+    return image
+
+
+def face_img_process(image,
+                     mean=[104., 117., 123.],
+                     std=[127.502231, 127.502231, 127.502231]):
+    img = np.array(image)
+    img = to_chw(img)
+    img = img.astype('float32')
+    img -= np.array(mean)[:, np.newaxis, np.newaxis].astype('float32')
+    img /= np.array(std)[:, np.newaxis, np.newaxis].astype('float32')
+    img = [img]
+    img = np.array(img)
+    return img
+
+
+def get_shrink(height, width):
+    """
+    Args:
+        height (int): image height.
+        width (int): image width.
+    """
+    # avoid out of memory
+    max_shrink_v1 = (0x7fffffff / 577.0 / (height * width))**0.5
+    max_shrink_v2 = ((678 * 1024 * 2.0 * 2.0) / (height * width))**0.5
+
+    def get_round(x, loc):
+        str_x = str(x)
+        if '.' in str_x:
+            str_before, str_after = str_x.split('.')
+            len_after = len(str_after)
+            if len_after >= 3:
+                str_final = str_before + '.' + str_after[0:loc]
+                return float(str_final)
+            else:
+                return x
+
+    max_shrink = get_round(min(max_shrink_v1, max_shrink_v2), 2) - 0.3
+    if max_shrink >= 1.5 and max_shrink < 2:
+        max_shrink = max_shrink - 0.1
+    elif max_shrink >= 2 and max_shrink < 3:
+        max_shrink = max_shrink - 0.2
+    elif max_shrink >= 3 and max_shrink < 4:
+        max_shrink = max_shrink - 0.3
+    elif max_shrink >= 4 and max_shrink < 5:
+        max_shrink = max_shrink - 0.4
+    elif max_shrink >= 5:
+        max_shrink = max_shrink - 0.5
+    elif max_shrink <= 0.1:
+        max_shrink = 0.1
+
+    shrink = max_shrink if max_shrink < 1 else 1
+    return shrink, max_shrink
+
+
+def bbox_vote(det):
+    order = det[:, 4].ravel().argsort()[::-1]
+    det = det[order, :]
+    if det.shape[0] == 0:
+        dets = np.array([[10, 10, 20, 20, 0.002]])
+        det = np.empty(shape=[0, 5])
+    while det.shape[0] > 0:
+        # IOU
+        area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)
+        xx1 = np.maximum(det[0, 0], det[:, 0])
+        yy1 = np.maximum(det[0, 1], det[:, 1])
+        xx2 = np.minimum(det[0, 2], det[:, 2])
+        yy2 = np.minimum(det[0, 3], det[:, 3])
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        o = inter / (area[0] + area[:] - inter)
+
+        # nms
+        merge_index = np.where(o >= 0.3)[0]
+        det_accu = det[merge_index, :]
+        det = np.delete(det, merge_index, 0)
+        if merge_index.shape[0] <= 1:
+            if det.shape[0] == 0:
+                try:
+                    dets = np.row_stack((dets, det_accu))
+                except:
+                    dets = det_accu
+            continue
+        det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))
+        max_score = np.max(det_accu[:, 4])
+        det_accu_sum = np.zeros((1, 5))
+        det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4],
+                                      axis=0) / np.sum(det_accu[:, -1:])
+        det_accu_sum[:, 4] = max_score
+        try:
+            dets = np.row_stack((dets, det_accu_sum))
+        except:
+            dets = det_accu_sum
+    dets = dets[0:750, :]
+    keep_index = np.where(dets[:, 4] >= 0.01)[0]
+    dets = dets[keep_index, :]
+    return dets
+
+
+def save_widerface_bboxes(image_path, bboxes_scores, output_dir):
+    image_name = image_path.split('/')[-1]
+    image_class = image_path.split('/')[-2]
+    odir = os.path.join(output_dir, image_class)
+    if not os.path.exists(odir):
+        os.makedirs(odir)
+
+    ofname = os.path.join(odir, '%s.txt' % (image_name[:-4]))
+    f = open(ofname, 'w')
+    f.write('{:s}\n'.format(image_class + '/' + image_name))
+    f.write('{:d}\n'.format(bboxes_scores.shape[0]))
+    for box_score in bboxes_scores:
+        xmin, ymin, xmax, ymax, score = box_score
+        f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.format(xmin, ymin, (
+            xmax - xmin + 1), (ymax - ymin + 1), score))
+    f.close()
+    logger.info("The predicted result is saved as {}".format(ofname))
+
+
+def save_fddb_bboxes(bboxes_scores,
+                     output_dir,
+                     output_fname='pred_fddb_res.txt'):
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    predict_file = os.path.join(output_dir, output_fname)
+    f = open(predict_file, 'w')
+    for image_path, dets in bboxes_scores.iteritems():
+        f.write('{:s}\n'.format(image_path))
+        f.write('{:d}\n'.format(dets.shape[0]))
+        for box_score in dets:
+            xmin, ymin, xmax, ymax, score = box_score
+            width, height = xmax - xmin, ymax - ymin
+            f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'
+                    .format(xmin, ymin, width, height, score))
+    logger.info("The predicted result is saved as {}".format(predict_file))
+    return predict_file
+
+
+def lmk2out(results, is_bbox_normalized=False):
+    """
+    Args:
+        results: request a dict, should include: `landmark`, `im_id`,
+                 if is_bbox_normalized=True, also need `im_shape`.
+        is_bbox_normalized: whether or not landmark is normalized.
+    """
+    xywh_res = []
+    for t in results:
+        bboxes = t['bbox'][0]
+        lengths = t['bbox'][1][0]
+        im_ids = np.array(t['im_id'][0]).flatten()
+        if bboxes.shape == (1, 1) or bboxes is None:
+            continue
+        face_index = t['face_index'][0]
+        prior_box = t['prior_boxes'][0]
+        predict_lmk = t['landmark'][0]
+        prior = np.reshape(prior_box, (-1, 4))
+        predictlmk = np.reshape(predict_lmk, (-1, 10))
+
+        k = 0
+        for a in range(len(lengths)):
+            num = lengths[a]
+            im_id = int(im_ids[a])
+            for i in range(num):
+                score = bboxes[k][1]
+                theindex = face_index[i][0]
+                me_prior = prior[theindex, :]
+                lmk_pred = predictlmk[theindex, :]
+                prior_w = me_prior[2] - me_prior[0]
+                prior_h = me_prior[3] - me_prior[1]
+                prior_w_center = (me_prior[2] + me_prior[0]) / 2
+                prior_h_center = (me_prior[3] + me_prior[1]) / 2
+                lmk_decode = np.zeros((10))
+                for j in [0, 2, 4, 6, 8]:
+                    lmk_decode[j] = lmk_pred[j] * 0.1 * prior_w + prior_w_center
+                for j in [1, 3, 5, 7, 9]:
+                    lmk_decode[j] = lmk_pred[j] * 0.1 * prior_h + prior_h_center
+                im_shape = t['im_shape'][0][a].tolist()
+                image_h, image_w = int(im_shape[0]), int(im_shape[1])
+                if is_bbox_normalized:
+                    lmk_decode = lmk_decode * np.array([
+                        image_w, image_h, image_w, image_h, image_w, image_h,
+                        image_w, image_h, image_w, image_h
+                    ])
+                lmk_res = {
+                    'image_id': im_id,
+                    'landmark': lmk_decode,
+                    'score': score,
+                }
+                xywh_res.append(lmk_res)
+                k += 1
+    return xywh_res