更换文档检测模型

2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions
--- a/paddle_detection/deploy/third_engine/demo_openvino/CMakeLists.txt
+++ b/paddle_detection/deploy/third_engine/demo_openvino/CMakeLists.txt
@@ -0,0 +1,23 @@
+cmake_minimum_required(VERSION 3.4.1)
+set(CMAKE_CXX_STANDARD 14)
+
+project(picodet_demo)
+
+find_package(OpenCV REQUIRED)
+find_package(InferenceEngine REQUIRED)
+find_package(ngraph REQUIRED)
+
+include_directories(
+    ${OpenCV_INCLUDE_DIRS}
+    ${CMAKE_CURRENT_SOURCE_DIR}
+    ${CMAKE_CURRENT_BINARY_DIR}
+)
+
+add_executable(picodet_demo main.cpp picodet_openvino.cpp)
+
+target_link_libraries(
+    picodet_demo
+    ${InferenceEngine_LIBRARIES}
+    ${NGRAPH_LIBRARIES}
+    ${OpenCV_LIBS}
+)
--- a/paddle_detection/deploy/third_engine/demo_openvino/README.md
+++ b/paddle_detection/deploy/third_engine/demo_openvino/README.md
@@ -0,0 +1,143 @@
+# PicoDet OpenVINO Demo
+
+This fold provides PicoDet inference code using
+[Intel's OpenVINO Toolkit](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html). Most of the implements in this fold are same as *demo_ncnn*.  
+**Recommand** to use the xxx.tar.gz file to install instead of github method, [link](https://registrationcenter-download.intel.com/akdlm/irc_nas/18096/l_openvino_toolkit_p_2021.4.689.tgz).
+
+
+## Install OpenVINO Toolkit
+
+Go to [OpenVINO HomePage](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html)
+
+Download a suitable version and install.
+
+Follow the official Get Started Guides: https://docs.openvinotoolkit.org/latest/get_started_guides.html
+
+## Set the Environment Variables
+
+### Windows:
+
+Run this command in cmd. (Every time before using OpenVINO)
+```cmd
+<INSTSLL_DIR>\openvino_2021\bin\setupvars.bat
+```
+
+Or set the system environment variables once for all:
+
+Name                  |Value
+:--------------------:|:--------:
+INTEL_OPENVINO_DIR | <INSTSLL_DIR>\openvino_2021
+INTEL_CVSDK_DIR | %INTEL_OPENVINO_DIR%
+InferenceEngine_DIR | %INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\share
+HDDL_INSTALL_DIR | %INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\external\hddl
+ngraph_DIR | %INTEL_OPENVINO_DIR%\deployment_tools\ngraph\cmake
+
+And add this to ```Path```
+```
+%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\bin\intel64\Debug;%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\bin\intel64\Release;%HDDL_INSTALL_DIR%\bin;%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\external\tbb\bin;%INTEL_OPENVINO_DIR%\deployment_tools\ngraph\lib
+```
+
+### Linux
+
+Run this command in shell. (Every time before using OpenVINO)
+
+```shell
+source /opt/intel/openvino_2021/bin/setupvars.sh
+```
+
+Or edit .bashrc
+
+```shell
+vi ~/.bashrc
+```
+
+Add this line to the end of the file
+
+```shell
+source /opt/intel/openvino_2021/bin/setupvars.sh
+```
+
+## Convert model
+
+   Convert to OpenVINO
+
+   ``` shell
+   cd <INSTSLL_DIR>/openvino_2021/deployment_tools/model_optimizer
+   ```
+
+   Install requirements for convert tool
+
+   ```shell
+   cd ./install_prerequisites
+   sudo install_prerequisites_onnx.sh
+
+   ```
+
+   Then convert model. Notice: mean_values and scale_values should be the same with your training settings in YAML config file.
+   ```shell
+   python3 mo_onnx.py --input_model <ONNX_MODEL> --mean_values [103.53,116.28,123.675] --scale_values [57.375,57.12,58.395]
+   ```
+
+## Build
+
+### Windows
+
+```cmd
+<OPENVINO_INSTSLL_DIR>\openvino_2021\bin\setupvars.bat
+mkdir -p build
+cd build
+cmake ..
+msbuild picodet_demo.vcxproj /p:configuration=release /p:platform=x64
+```
+
+### Linux
+```shell
+source /opt/intel/openvino_2021/bin/setupvars.sh
+mkdir build
+cd build
+cmake ..
+make
+```
+
+
+## Run demo
+Download PicoDet openvino model [PicoDet openvino model download link](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_416_openvino.zip).
+
+move picodet openvino model files to the demo's weight folder.
+
+### Edit file
+```
+step1:
+main.cpp
+#define image_size 416
+...
+auto detector = PicoDet("../weight/picodet_m_416.xml");
+...
+step2:
+picodet_openvino.h
+#define image_size 416
+```
+
+### Webcam
+
+```shell
+picodet_demo 0 0
+```
+
+### Inference images
+
+```shell
+picodet_demo 1 IMAGE_FOLDER/*.jpg
+```
+
+### Inference video
+
+```shell
+picodet_demo 2 VIDEO_PATH
+```
+
+### Benchmark
+
+```shell
+picodet_demo 3 0
+```
--- a/paddle_detection/deploy/third_engine/demo_openvino/main.cpp
+++ b/paddle_detection/deploy/third_engine/demo_openvino/main.cpp
@@ -0,0 +1,302 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// reference from https://github.com/RangiLyu/nanodet
+
+#include "picodet_openvino.h"
+#include <iostream>
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+#define image_size 416
+
+struct object_rect {
+  int x;
+  int y;
+  int width;
+  int height;
+};
+
+int resize_uniform(cv::Mat &src, cv::Mat &dst, cv::Size dst_size,
+                   object_rect &effect_area) {
+  int w = src.cols;
+  int h = src.rows;
+  int dst_w = dst_size.width;
+  int dst_h = dst_size.height;
+  dst = cv::Mat(cv::Size(dst_w, dst_h), CV_8UC3, cv::Scalar(0));
+
+  float ratio_src = w * 1.0 / h;
+  float ratio_dst = dst_w * 1.0 / dst_h;
+
+  int tmp_w = 0;
+  int tmp_h = 0;
+  if (ratio_src > ratio_dst) {
+    tmp_w = dst_w;
+    tmp_h = floor((dst_w * 1.0 / w) * h);
+  } else if (ratio_src < ratio_dst) {
+    tmp_h = dst_h;
+    tmp_w = floor((dst_h * 1.0 / h) * w);
+  } else {
+    cv::resize(src, dst, dst_size);
+    effect_area.x = 0;
+    effect_area.y = 0;
+    effect_area.width = dst_w;
+    effect_area.height = dst_h;
+    return 0;
+  }
+  cv::Mat tmp;
+  cv::resize(src, tmp, cv::Size(tmp_w, tmp_h));
+
+  if (tmp_w != dst_w) {
+    int index_w = floor((dst_w - tmp_w) / 2.0);
+    for (int i = 0; i < dst_h; i++) {
+      memcpy(dst.data + i * dst_w * 3 + index_w * 3, tmp.data + i * tmp_w * 3,
+             tmp_w * 3);
+    }
+    effect_area.x = index_w;
+    effect_area.y = 0;
+    effect_area.width = tmp_w;
+    effect_area.height = tmp_h;
+  } else if (tmp_h != dst_h) {
+    int index_h = floor((dst_h - tmp_h) / 2.0);
+    memcpy(dst.data + index_h * dst_w * 3, tmp.data, tmp_w * tmp_h * 3);
+    effect_area.x = 0;
+    effect_area.y = index_h;
+    effect_area.width = tmp_w;
+    effect_area.height = tmp_h;
+  } else {
+    printf("error\n");
+  }
+  return 0;
+}
+
+const int color_list[80][3] = {
+    {216, 82, 24},   {236, 176, 31},  {125, 46, 141},  {118, 171, 47},
+    {76, 189, 237},  {238, 19, 46},   {76, 76, 76},    {153, 153, 153},
+    {255, 0, 0},     {255, 127, 0},   {190, 190, 0},   {0, 255, 0},
+    {0, 0, 255},     {170, 0, 255},   {84, 84, 0},     {84, 170, 0},
+    {84, 255, 0},    {170, 84, 0},    {170, 170, 0},   {170, 255, 0},
+    {255, 84, 0},    {255, 170, 0},   {255, 255, 0},   {0, 84, 127},
+    {0, 170, 127},   {0, 255, 127},   {84, 0, 127},    {84, 84, 127},
+    {84, 170, 127},  {84, 255, 127},  {170, 0, 127},   {170, 84, 127},
+    {170, 170, 127}, {170, 255, 127}, {255, 0, 127},   {255, 84, 127},
+    {255, 170, 127}, {255, 255, 127}, {0, 84, 255},    {0, 170, 255},
+    {0, 255, 255},   {84, 0, 255},    {84, 84, 255},   {84, 170, 255},
+    {84, 255, 255},  {170, 0, 255},   {170, 84, 255},  {170, 170, 255},
+    {170, 255, 255}, {255, 0, 255},   {255, 84, 255},  {255, 170, 255},
+    {42, 0, 0},      {84, 0, 0},      {127, 0, 0},     {170, 0, 0},
+    {212, 0, 0},     {255, 0, 0},     {0, 42, 0},      {0, 84, 0},
+    {0, 127, 0},     {0, 170, 0},     {0, 212, 0},     {0, 255, 0},
+    {0, 0, 42},      {0, 0, 84},      {0, 0, 127},     {0, 0, 170},
+    {0, 0, 212},     {0, 0, 255},     {0, 0, 0},       {36, 36, 36},
+    {72, 72, 72},    {109, 109, 109}, {145, 145, 145}, {182, 182, 182},
+    {218, 218, 218}, {0, 113, 188},   {80, 182, 188},  {127, 127, 0},
+};
+
+void draw_bboxes(const cv::Mat &bgr, const std::vector<BoxInfo> &bboxes,
+                 object_rect effect_roi) {
+  static const char *class_names[] = {
+      "person",        "bicycle",      "car",
+      "motorcycle",    "airplane",     "bus",
+      "train",         "truck",        "boat",
+      "traffic light", "fire hydrant", "stop sign",
+      "parking meter", "bench",        "bird",
+      "cat",           "dog",          "horse",
+      "sheep",         "cow",          "elephant",
+      "bear",          "zebra",        "giraffe",
+      "backpack",      "umbrella",     "handbag",
+      "tie",           "suitcase",     "frisbee",
+      "skis",          "snowboard",    "sports ball",
+      "kite",          "baseball bat", "baseball glove",
+      "skateboard",    "surfboard",    "tennis racket",
+      "bottle",        "wine glass",   "cup",
+      "fork",          "knife",        "spoon",
+      "bowl",          "banana",       "apple",
+      "sandwich",      "orange",       "broccoli",
+      "carrot",        "hot dog",      "pizza",
+      "donut",         "cake",         "chair",
+      "couch",         "potted plant", "bed",
+      "dining table",  "toilet",       "tv",
+      "laptop",        "mouse",        "remote",
+      "keyboard",      "cell phone",   "microwave",
+      "oven",          "toaster",      "sink",
+      "refrigerator",  "book",         "clock",
+      "vase",          "scissors",     "teddy bear",
+      "hair drier",    "toothbrush"};
+
+  cv::Mat image = bgr.clone();
+  int src_w = image.cols;
+  int src_h = image.rows;
+  int dst_w = effect_roi.width;
+  int dst_h = effect_roi.height;
+  float width_ratio = (float)src_w / (float)dst_w;
+  float height_ratio = (float)src_h / (float)dst_h;
+
+  for (size_t i = 0; i < bboxes.size(); i++) {
+    const BoxInfo &bbox = bboxes[i];
+    cv::Scalar color =
+        cv::Scalar(color_list[bbox.label][0], color_list[bbox.label][1],
+                   color_list[bbox.label][2]);
+    cv::rectangle(image,
+                  cv::Rect(cv::Point((bbox.x1 - effect_roi.x) * width_ratio,
+                                     (bbox.y1 - effect_roi.y) * height_ratio),
+                           cv::Point((bbox.x2 - effect_roi.x) * width_ratio,
+                                     (bbox.y2 - effect_roi.y) * height_ratio)),
+                  color);
+
+    char text[256];
+    sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100);
+    int baseLine = 0;
+    cv::Size label_size =
+        cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
+    int x = (bbox.x1 - effect_roi.x) * width_ratio;
+    int y =
+        (bbox.y1 - effect_roi.y) * height_ratio - label_size.height - baseLine;
+    if (y < 0)
+      y = 0;
+    if (x + label_size.width > image.cols)
+      x = image.cols - label_size.width;
+
+    cv::rectangle(image, cv::Rect(cv::Point(x, y),
+                                  cv::Size(label_size.width,
+                                           label_size.height + baseLine)),
+                  color, -1);
+    cv::putText(image, text, cv::Point(x, y + label_size.height),
+                cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255));
+  }
+
+  cv::imwrite("../predict.jpg", image);
+}
+
+int image_demo(PicoDet &detector, const char *imagepath) {
+  std::vector<std::string> filenames;
+  cv::glob(imagepath, filenames, false);
+
+  for (auto img_name : filenames) {
+    cv::Mat image = cv::imread(img_name);
+    if (image.empty()) {
+      return -1;
+    }
+    object_rect effect_roi;
+    cv::Mat resized_img;
+    resize_uniform(image, resized_img, cv::Size(image_size, image_size),
+                   effect_roi);
+    auto results = detector.detect(resized_img, 0.4, 0.5);
+    draw_bboxes(image, results, effect_roi);
+  }
+  return 0;
+}
+
+int webcam_demo(PicoDet &detector, int cam_id) {
+  cv::Mat image;
+  cv::VideoCapture cap(cam_id);
+  while (true) {
+    cap >> image;
+    object_rect effect_roi;
+    cv::Mat resized_img;
+    resize_uniform(image, resized_img, cv::Size(image_size, image_size),
+                   effect_roi);
+    auto results = detector.detect(resized_img, 0.4, 0.5);
+    draw_bboxes(image, results, effect_roi);
+    cv::waitKey(1);
+  }
+  return 0;
+}
+
+int video_demo(PicoDet &detector, const char *path) {
+  cv::Mat image;
+  cv::VideoCapture cap(path);
+
+  while (true) {
+    cap >> image;
+    object_rect effect_roi;
+    cv::Mat resized_img;
+    resize_uniform(image, resized_img, cv::Size(image_size, image_size),
+                   effect_roi);
+    auto results = detector.detect(resized_img, 0.4, 0.5);
+    draw_bboxes(image, results, effect_roi);
+    cv::waitKey(1);
+  }
+  return 0;
+}
+
+int benchmark(PicoDet &detector) {
+  int loop_num = 100;
+  int warm_up = 8;
+
+  double time_min = DBL_MAX;
+  double time_max = -DBL_MAX;
+  double time_avg = 0;
+  cv::Mat image(image_size, image_size, CV_8UC3, cv::Scalar(1, 1, 1));
+
+  for (int i = 0; i < warm_up + loop_num; i++) {
+    auto start = std::chrono::steady_clock::now();
+    std::vector<BoxInfo> results;
+    results = detector.detect(image, 0.4, 0.5);
+    auto end = std::chrono::steady_clock::now();
+    double time =
+        std::chrono::duration<double, std::milli>(end - start).count();
+    if (i >= warm_up) {
+      time_min = (std::min)(time_min, time);
+      time_max = (std::max)(time_max, time);
+      time_avg += time;
+    }
+  }
+  time_avg /= loop_num;
+  fprintf(stderr, "%20s  min = %7.2f  max = %7.2f  avg = %7.2f\n", "picodet",
+          time_min, time_max, time_avg);
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  if (argc != 3) {
+    fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is "
+                    "cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n "
+                    "For video, mode=2; \n For benchmark, mode=3 path=0.\n",
+            argv[0]);
+    return -1;
+  }
+  std::cout << "start init model" << std::endl;
+  auto detector = PicoDet("../weight/picodet_m_416.xml");
+  std::cout << "success" << std::endl;
+
+  int mode = atoi(argv[1]);
+  switch (mode) {
+  case 0: {
+    int cam_id = atoi(argv[2]);
+    webcam_demo(detector, cam_id);
+    break;
+  }
+  case 1: {
+    const char *images = argv[2];
+    image_demo(detector, images);
+    break;
+  }
+  case 2: {
+    const char *path = argv[2];
+    video_demo(detector, path);
+    break;
+  }
+  case 3: {
+    benchmark(detector);
+    break;
+  }
+  default: {
+    fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is "
+                    "cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n "
+                    "For video, mode=2; \n For benchmark, mode=3 path=0.\n",
+            argv[0]);
+    break;
+  }
+  }
+}
--- a/paddle_detection/deploy/third_engine/demo_openvino/picodet_openvino.cpp
+++ b/paddle_detection/deploy/third_engine/demo_openvino/picodet_openvino.cpp
@@ -0,0 +1,209 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_openvino
+
+#include "picodet_openvino.h"
+
+inline float fast_exp(float x) {
+  union {
+    uint32_t i;
+    float f;
+  } v{};
+  v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
+  return v.f;
+}
+
+inline float sigmoid(float x) { return 1.0f / (1.0f + fast_exp(-x)); }
+
+template <typename _Tp>
+int activation_function_softmax(const _Tp *src, _Tp *dst, int length) {
+  const _Tp alpha = *std::max_element(src, src + length);
+  _Tp denominator{0};
+
+  for (int i = 0; i < length; ++i) {
+    dst[i] = fast_exp(src[i] - alpha);
+    denominator += dst[i];
+  }
+
+  for (int i = 0; i < length; ++i) {
+    dst[i] /= denominator;
+  }
+
+  return 0;
+}
+
+PicoDet::PicoDet(const char *model_path) {
+  InferenceEngine::Core ie;
+  InferenceEngine::CNNNetwork model = ie.ReadNetwork(model_path);
+  // prepare input settings
+  InferenceEngine::InputsDataMap inputs_map(model.getInputsInfo());
+  input_name_ = inputs_map.begin()->first;
+  InferenceEngine::InputInfo::Ptr input_info = inputs_map.begin()->second;
+  // prepare output settings
+  InferenceEngine::OutputsDataMap outputs_map(model.getOutputsInfo());
+  for (auto &output_info : outputs_map) {
+    output_info.second->setPrecision(InferenceEngine::Precision::FP32);
+  }
+
+  // get network
+  network_ = ie.LoadNetwork(model, "CPU");
+  infer_request_ = network_.CreateInferRequest();
+}
+
+PicoDet::~PicoDet() {}
+
+void PicoDet::preprocess(cv::Mat &image, InferenceEngine::Blob::Ptr &blob) {
+  int img_w = image.cols;
+  int img_h = image.rows;
+  int channels = 3;
+
+  InferenceEngine::MemoryBlob::Ptr mblob =
+      InferenceEngine::as<InferenceEngine::MemoryBlob>(blob);
+  if (!mblob) {
+    THROW_IE_EXCEPTION
+        << "We expect blob to be inherited from MemoryBlob in matU8ToBlob, "
+        << "but by fact we were not able to cast inputBlob to MemoryBlob";
+  }
+  auto mblobHolder = mblob->wmap();
+  float *blob_data = mblobHolder.as<float *>();
+
+  for (size_t c = 0; c < channels; c++) {
+    for (size_t h = 0; h < img_h; h++) {
+      for (size_t w = 0; w < img_w; w++) {
+        blob_data[c * img_w * img_h + h * img_w + w] =
+            (float)image.at<cv::Vec3b>(h, w)[c];
+      }
+    }
+  }
+}
+
+std::vector<BoxInfo> PicoDet::detect(cv::Mat image, float score_threshold,
+                                     float nms_threshold) {
+  InferenceEngine::Blob::Ptr input_blob = infer_request_.GetBlob(input_name_);
+  preprocess(image, input_blob);
+
+  // do inference
+  infer_request_.Infer();
+
+  // get output
+  std::vector<std::vector<BoxInfo>> results;
+  results.resize(this->num_class_);
+
+  for (const auto &head_info : this->heads_info_) {
+    const InferenceEngine::Blob::Ptr dis_pred_blob =
+        infer_request_.GetBlob(head_info.dis_layer);
+    const InferenceEngine::Blob::Ptr cls_pred_blob =
+        infer_request_.GetBlob(head_info.cls_layer);
+
+    auto mdis_pred =
+        InferenceEngine::as<InferenceEngine::MemoryBlob>(dis_pred_blob);
+    auto mdis_pred_holder = mdis_pred->rmap();
+    const float *dis_pred = mdis_pred_holder.as<const float *>();
+
+    auto mcls_pred =
+        InferenceEngine::as<InferenceEngine::MemoryBlob>(cls_pred_blob);
+    auto mcls_pred_holder = mcls_pred->rmap();
+    const float *cls_pred = mcls_pred_holder.as<const float *>();
+    this->decode_infer(cls_pred, dis_pred, head_info.stride, score_threshold,
+                       results);
+  }
+
+  std::vector<BoxInfo> dets;
+  for (int i = 0; i < (int)results.size(); i++) {
+    this->nms(results[i], nms_threshold);
+
+    for (auto &box : results[i]) {
+      dets.push_back(box);
+    }
+  }
+  return dets;
+}
+
+void PicoDet::decode_infer(const float *&cls_pred, const float *&dis_pred,
+                           int stride, float threshold,
+                           std::vector<std::vector<BoxInfo>> &results) {
+  int feature_h = ceil((float)input_size_ / stride);
+  int feature_w = ceil((float)input_size_ / stride);
+  for (int idx = 0; idx < feature_h * feature_w; idx++) {
+    int row = idx / feature_w;
+    int col = idx % feature_w;
+    float score = 0;
+    int cur_label = 0;
+
+    for (int label = 0; label < num_class_; label++) {
+      if (cls_pred[idx * num_class_ + label] > score) {
+        score = cls_pred[idx * num_class_ + label];
+        cur_label = label;
+      }
+    }
+    if (score > threshold) {
+      const float *bbox_pred = dis_pred + idx * (reg_max_ + 1) * 4;
+      results[cur_label].push_back(
+          this->disPred2Bbox(bbox_pred, cur_label, score, col, row, stride));
+    }
+  }
+}
+
+BoxInfo PicoDet::disPred2Bbox(const float *&dfl_det, int label, float score,
+                              int x, int y, int stride) {
+  float ct_x = (x + 0.5) * stride;
+  float ct_y = (y + 0.5) * stride;
+  std::vector<float> dis_pred;
+  dis_pred.resize(4);
+  for (int i = 0; i < 4; i++) {
+    float dis = 0;
+    float *dis_after_sm = new float[reg_max_ + 1];
+    activation_function_softmax(dfl_det + i * (reg_max_ + 1), dis_after_sm,
+                                reg_max_ + 1);
+    for (int j = 0; j < reg_max_ + 1; j++) {
+      dis += j * dis_after_sm[j];
+    }
+    dis *= stride;
+    dis_pred[i] = dis;
+    delete[] dis_after_sm;
+  }
+  float xmin = (std::max)(ct_x - dis_pred[0], .0f);
+  float ymin = (std::max)(ct_y - dis_pred[1], .0f);
+  float xmax = (std::min)(ct_x + dis_pred[2], (float)this->input_size_);
+  float ymax = (std::min)(ct_y + dis_pred[3], (float)this->input_size_);
+  return BoxInfo{xmin, ymin, xmax, ymax, score, label};
+}
+
+void PicoDet::nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH) {
+  std::sort(input_boxes.begin(), input_boxes.end(),
+            [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
+  std::vector<float> vArea(input_boxes.size());
+  for (int i = 0; i < int(input_boxes.size()); ++i) {
+    vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) *
+               (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
+  }
+  for (int i = 0; i < int(input_boxes.size()); ++i) {
+    for (int j = i + 1; j < int(input_boxes.size());) {
+      float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1);
+      float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1);
+      float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2);
+      float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2);
+      float w = (std::max)(float(0), xx2 - xx1 + 1);
+      float h = (std::max)(float(0), yy2 - yy1 + 1);
+      float inter = w * h;
+      float ovr = inter / (vArea[i] + vArea[j] - inter);
+      if (ovr >= NMS_THRESH) {
+        input_boxes.erase(input_boxes.begin() + j);
+        vArea.erase(vArea.begin() + j);
+      } else {
+        j++;
+      }
+    }
+  }
+}
--- a/paddle_detection/deploy/third_engine/demo_openvino/picodet_openvino.h
+++ b/paddle_detection/deploy/third_engine/demo_openvino/picodet_openvino.h
@@ -0,0 +1,75 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_openvino
+
+#ifndef _PICODET_OPENVINO_H_
+#define _PICODET_OPENVINO_H_
+
+#include <inference_engine.hpp>
+#include <opencv2/core.hpp>
+#include <string>
+
+#define image_size 416
+
+typedef struct HeadInfo {
+  std::string cls_layer;
+  std::string dis_layer;
+  int stride;
+} HeadInfo;
+
+typedef struct BoxInfo {
+  float x1;
+  float y1;
+  float x2;
+  float y2;
+  float score;
+  int label;
+} BoxInfo;
+
+class PicoDet {
+public:
+  PicoDet(const char *param);
+
+  ~PicoDet();
+
+  InferenceEngine::ExecutableNetwork network_;
+  InferenceEngine::InferRequest infer_request_;
+  // static bool hasGPU;
+
+  std::vector<HeadInfo> heads_info_{
+      // cls_pred|dis_pred|stride
+      {"transpose_0.tmp_0", "transpose_1.tmp_0", 8},
+      {"transpose_2.tmp_0", "transpose_3.tmp_0", 16},
+      {"transpose_4.tmp_0", "transpose_5.tmp_0", 32},
+      {"transpose_6.tmp_0", "transpose_7.tmp_0", 64},
+  };
+
+  std::vector<BoxInfo> detect(cv::Mat image, float score_threshold,
+                              float nms_threshold);
+
+private:
+  void preprocess(cv::Mat &image, InferenceEngine::Blob::Ptr &blob);
+  void decode_infer(const float *&cls_pred, const float *&dis_pred, int stride,
+                    float threshold,
+                    std::vector<std::vector<BoxInfo>> &results);
+  BoxInfo disPred2Bbox(const float *&dfl_det, int label, float score, int x,
+                       int y, int stride);
+  static void nms(std::vector<BoxInfo> &result, float nms_threshold);
+  std::string input_name_;
+  int input_size_ = image_size;
+  int num_class_ = 80;
+  int reg_max_ = 7;
+};
+
+#endif
--- a/paddle_detection/deploy/third_engine/demo_openvino/python/README.md
+++ b/paddle_detection/deploy/third_engine/demo_openvino/python/README.md
@@ -0,0 +1,75 @@
+# PicoDet OpenVINO Benchmark Demo
+
+本文件夹提供利用[Intel's OpenVINO Toolkit](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html)进行PicoDet测速的Benchmark Demo与带后处理的模型Inference Demo。
+
+## 安装 OpenVINO Toolkit
+
+前往 [OpenVINO HomePage](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html)，下载对应版本并安装。
+
+本demo安装的是 OpenVINO 2022.1.0，可直接运行如下指令安装：
+```shell
+pip install openvino==2022.1.0
+```
+
+详细安装步骤，可参考[OpenVINO官网](https://docs.openvinotoolkit.org/latest/get_started_guides.html)
+
+## Benchmark测试
+
+- 准备测试模型：根据[PicoDet](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/picodet)中【导出及转换模型】步骤，采用不包含后处理的方式导出模型（`-o export.benchmark=True` ），并生成待测试模型简化后的onnx模型（可在下文链接中直接下载）。同时在本目录下新建```out_onnxsim```文件夹，将导出的onnx模型放在该目录下。
+
+- 准备测试所用图片：本demo默认利用PaddleDetection/demo/[000000014439.jpg](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/demo/000000014439.jpg)
+
+- 在本目录下直接运行：
+
+```shell
+# Linux
+python openvino_benchmark.py --img_path ../../../../demo/000000014439.jpg --onnx_path out_onnxsim/picodet_s_320_coco_lcnet.onnx --in_shape 320
+# Windows
+python openvino_benchmark.py --img_path ..\..\..\..\demo\000000014439.jpg --onnx_path out_onnxsim\picodet_s_320_coco_lcnet.onnx --in_shape 320
+```
+- 注意：```--in_shape```为对应模型输入size，默认为320
+
+## 真实图片测试(网络包含后处理，但不包含NMS)
+
+- 准备测试模型：根据[PicoDet](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/picodet)中【导出及转换模型】步骤，采用**包含后处理**但**不包含NMS**的方式导出模型（`-o export.benchmark=False export.nms=False` ），并生成待测试模型简化后的onnx模型（可在下文链接中直接下载）。同时在本目录下新建```out_onnxsim_infer```文件夹，将导出的onnx模型放在该目录下。
+
+- 准备测试所用图片：默认利用../../demo_onnxruntime/imgs/[bus.jpg](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/deploy/third_engine/demo_onnxruntime/imgs/bus.jpg)
+
+```shell
+# Linux
+python openvino_infer.py --img_path ../../demo_onnxruntime/imgs/bus.jpg --onnx_path out_onnxsim_infer/picodet_s_320_postproccesed_woNMS.onnx --in_shape 320
+# Windows
+python openvino_infer.py --img_path ..\..\demo_onnxruntime\imgs\bus.jpg --onnx_path out_onnxsim_infer\picodet_s_320_postproccesed_woNMS.onnx --in_shape 320
+```
+
+### 真实图片测试(网络不包含后处理)
+
+```shell
+# Linux
+python openvino_benchmark.py --benchmark 0 --img_path ../../../../demo/000000014439.jpg --onnx_path out_onnxsim/picodet_s_320_coco_lcnet.onnx --in_shape 320
+# Windows
+python openvino_benchmark.py --benchmark 0 --img_path ..\..\..\..\demo\000000014439.jpg --onnx_path out_onnxsim\picodet_s_320_coco_lcnet.onnx --in_shape 320
+```
+
+- 结果：
+    <div align="center">
+      <img src="../../../../docs/images/res.jpg" height="500px" >
+    </div>
+
+## Benchmark结果
+
+- 测速结果如下：
+
+| 模型     | 输入尺寸 | ONNX  | 预测时延<sup><small>[CPU](#latency)|
+| :-------- | :--------: | :---------------------: | :----------------: |
+| PicoDet-XS |  320*320   | [( w/ 后处理;w/o NMS)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_xs_320_lcnet_postproccesed_woNMS.onnx) &#124; [( w/o 后处理)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_xs_320_coco_lcnet.onnx) | 3.9ms |
+| PicoDet-XS |  416*416   | [( w/ 后处理;w/o NMS)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_xs_416_lcnet_postproccesed_woNMS.onnx) &#124; [( w/o 后处理)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_xs_416_coco_lcnet.onnx) | 6.1ms |
+| PicoDet-S |  320*320   | [( w/ 后处理;w/o NMS)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_s_320_lcnet_postproccesed_woNMS.onnx) &#124; [( w/o 后处理)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_s_320_coco_lcnet.onnx) |     4.8ms |
+| PicoDet-S |  416*416   |  [( w/ 后处理;w/o NMS)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_s_416_lcnet_postproccesed_woNMS.onnx) &#124; [( w/o 后处理)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_s_416_coco_lcnet.onnx) |     6.6ms |
+| PicoDet-M |  320*320   | [( w/ 后处理;w/o NMS)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_320_lcnet_postproccesed_woNMS.onnx) &#124; [( w/o 后处理)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_320_coco_lcnet.onnx) | 8.2ms  |
+| PicoDet-M |  416*416   | [( w/ 后处理;w/o NMS)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_416_lcnet_postproccesed_woNMS.onnx) &#124; [( w/o 后处理)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_416_coco_lcnet.onnx) | 12.7ms |
+| PicoDet-L |  320*320   | [( w/ 后处理;w/o NMS)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_l_320_lcnet_postproccesed_woNMS.onnx) &#124; [( w/o 后处理)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_l_320_coco_lcnet.onnx) | 11.5ms |
+| PicoDet-L |  416*416   | [( w/ 后处理;w/o NMS)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_l_416_lcnet_postproccesed_woNMS.onnx) &#124; [( w/o 后处理)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_l_416_coco_lcnet.onnx) |     20.7ms |
+| PicoDet-L |  640*640   | [( w/ 后处理;w/o NMS)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_l_640_lcnet_postproccesed_woNMS.onnx) &#124; [( w/o 后处理)](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_l_640_coco_lcnet.onnx) |     62.5ms |
+
+- <a name="latency">测试环境：</a> 英特尔酷睿i7 10750H CPU。
--- a/paddle_detection/deploy/third_engine/demo_openvino/python/coco_label.txt
+++ b/paddle_detection/deploy/third_engine/demo_openvino/python/coco_label.txt
@@ -0,0 +1,80 @@
+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
--- a/paddle_detection/deploy/third_engine/demo_openvino/python/openvino_benchmark.py
+++ b/paddle_detection/deploy/third_engine/demo_openvino/python/openvino_benchmark.py
@@ -0,0 +1,365 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import cv2
+import numpy as np
+import time
+import argparse
+from scipy.special import softmax
+from openvino.runtime import Core
+
+
+def image_preprocess(img_path, re_shape):
+    img = cv2.imread(img_path)
+    img = cv2.resize(
+        img, (re_shape, re_shape), interpolation=cv2.INTER_LANCZOS4)
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    img = np.transpose(img, [2, 0, 1]) / 255
+    img = np.expand_dims(img, 0)
+    img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
+    img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
+    img -= img_mean
+    img /= img_std
+    return img.astype(np.float32)
+
+
+def draw_box(img, results, class_label, scale_x, scale_y):
+
+    label_list = list(
+        map(lambda x: x.strip(), open(class_label, 'r').readlines()))
+
+    for i in range(len(results)):
+        print(label_list[int(results[i][0])], ':', results[i][1])
+        bbox = results[i, 2:]
+        label_id = int(results[i, 0])
+        score = results[i, 1]
+        if (score > 0.20):
+            xmin, ymin, xmax, ymax = [
+                int(bbox[0] * scale_x), int(bbox[1] * scale_y),
+                int(bbox[2] * scale_x), int(bbox[3] * scale_y)
+            ]
+            cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 255, 0), 3)
+            font = cv2.FONT_HERSHEY_SIMPLEX
+            label_text = label_list[label_id]
+            cv2.rectangle(img, (xmin, ymin), (xmax, ymin - 60), (0, 255, 0), -1)
+            cv2.putText(img, "#" + label_text, (xmin, ymin - 10), font, 1,
+                        (255, 255, 255), 2, cv2.LINE_AA)
+            cv2.putText(img,
+                        str(round(score, 3)), (xmin, ymin - 40), font, 0.8,
+                        (255, 255, 255), 2, cv2.LINE_AA)
+    return img
+
+
+def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
+    """
+    Args:
+        box_scores (N, 5): boxes in corner-form and probabilities.
+        iou_threshold: intersection over union threshold.
+        top_k: keep top_k results. If k <= 0, keep all the results.
+        candidate_size: only consider the candidates with the highest scores.
+    Returns:
+         picked: a list of indexes of the kept boxes
+    """
+    scores = box_scores[:, -1]
+    boxes = box_scores[:, :-1]
+    picked = []
+    indexes = np.argsort(scores)
+    indexes = indexes[-candidate_size:]
+    while len(indexes) > 0:
+        current = indexes[-1]
+        picked.append(current)
+        if 0 < top_k == len(picked) or len(indexes) == 1:
+            break
+        current_box = boxes[current, :]
+        indexes = indexes[:-1]
+        rest_boxes = boxes[indexes, :]
+        iou = iou_of(
+            rest_boxes,
+            np.expand_dims(
+                current_box, axis=0), )
+        indexes = indexes[iou <= iou_threshold]
+
+    return box_scores[picked, :]
+
+
+def iou_of(boxes0, boxes1, eps=1e-5):
+    """Return intersection-over-union (Jaccard index) of boxes.
+    Args:
+        boxes0 (N, 4): ground truth boxes.
+        boxes1 (N or 1, 4): predicted boxes.
+        eps: a small number to avoid 0 as denominator.
+    Returns:
+        iou (N): IoU values.
+    """
+    overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
+    overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
+
+    overlap_area = area_of(overlap_left_top, overlap_right_bottom)
+    area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
+    area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
+    return overlap_area / (area0 + area1 - overlap_area + eps)
+
+
+def area_of(left_top, right_bottom):
+    """Compute the areas of rectangles given two corners.
+    Args:
+        left_top (N, 2): left top corner.
+        right_bottom (N, 2): right bottom corner.
+    Returns:
+        area (N): return the area.
+    """
+    hw = np.clip(right_bottom - left_top, 0.0, None)
+    return hw[..., 0] * hw[..., 1]
+
+
+class PicoDetPostProcess(object):
+    """
+    Args:
+        input_shape (int): network input image size
+        ori_shape (int): ori image shape of before padding
+        scale_factor (float): scale factor of ori image
+        enable_mkldnn (bool): whether to open MKLDNN
+    """
+
+    def __init__(self,
+                 input_shape,
+                 ori_shape,
+                 scale_factor,
+                 strides=[8, 16, 32, 64],
+                 score_threshold=0.4,
+                 nms_threshold=0.5,
+                 nms_top_k=1000,
+                 keep_top_k=100):
+        self.ori_shape = ori_shape
+        self.input_shape = input_shape
+        self.scale_factor = scale_factor
+        self.strides = strides
+        self.score_threshold = score_threshold
+        self.nms_threshold = nms_threshold
+        self.nms_top_k = nms_top_k
+        self.keep_top_k = keep_top_k
+
+    def warp_boxes(self, boxes, ori_shape):
+        """Apply transform to boxes
+        """
+        width, height = ori_shape[1], ori_shape[0]
+        n = len(boxes)
+        if n:
+            # warp points
+            xy = np.ones((n * 4, 3))
+            xy[:, :2] = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
+                n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
+            # xy = xy @ M.T  # transform
+            xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8)  # rescale
+            # create new boxes
+            x = xy[:, [0, 2, 4, 6]]
+            y = xy[:, [1, 3, 5, 7]]
+            xy = np.concatenate(
+                (x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+            # clip boxes
+            xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
+            xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
+            return xy.astype(np.float32)
+        else:
+            return boxes
+
+    def __call__(self, scores, raw_boxes):
+        batch_size = raw_boxes[0].shape[0]
+        reg_max = int(raw_boxes[0].shape[-1] / 4 - 1)
+        out_boxes_num = []
+        out_boxes_list = []
+        for batch_id in range(batch_size):
+            # generate centers
+            decode_boxes = []
+            select_scores = []
+            for stride, box_distribute, score in zip(self.strides, raw_boxes,
+                                                     scores):
+                box_distribute = box_distribute[batch_id]
+                score = score[batch_id]
+                # centers
+                fm_h = self.input_shape[0] / stride
+                fm_w = self.input_shape[1] / stride
+                h_range = np.arange(fm_h)
+                w_range = np.arange(fm_w)
+                ww, hh = np.meshgrid(w_range, h_range)
+                ct_row = (hh.flatten() + 0.5) * stride
+                ct_col = (ww.flatten() + 0.5) * stride
+                center = np.stack((ct_col, ct_row, ct_col, ct_row), axis=1)
+
+                # box distribution to distance
+                reg_range = np.arange(reg_max + 1)
+                box_distance = box_distribute.reshape((-1, reg_max + 1))
+                box_distance = softmax(box_distance, axis=1)
+                box_distance = box_distance * np.expand_dims(reg_range, axis=0)
+                box_distance = np.sum(box_distance, axis=1).reshape((-1, 4))
+                box_distance = box_distance * stride
+
+                # top K candidate
+                topk_idx = np.argsort(score.max(axis=1))[::-1]
+                topk_idx = topk_idx[:self.nms_top_k]
+                center = center[topk_idx]
+                score = score[topk_idx]
+                box_distance = box_distance[topk_idx]
+
+                # decode box
+                decode_box = center + [-1, -1, 1, 1] * box_distance
+
+                select_scores.append(score)
+                decode_boxes.append(decode_box)
+
+            # nms
+            bboxes = np.concatenate(decode_boxes, axis=0)
+            confidences = np.concatenate(select_scores, axis=0)
+            picked_box_probs = []
+            picked_labels = []
+            for class_index in range(0, confidences.shape[1]):
+                probs = confidences[:, class_index]
+                mask = probs > self.score_threshold
+                probs = probs[mask]
+                if probs.shape[0] == 0:
+                    continue
+                subset_boxes = bboxes[mask, :]
+                box_probs = np.concatenate(
+                    [subset_boxes, probs.reshape(-1, 1)], axis=1)
+                box_probs = hard_nms(
+                    box_probs,
+                    iou_threshold=self.nms_threshold,
+                    top_k=self.keep_top_k, )
+                picked_box_probs.append(box_probs)
+                picked_labels.extend([class_index] * box_probs.shape[0])
+
+            if len(picked_box_probs) == 0:
+                out_boxes_list.append(np.empty((0, 4)))
+                out_boxes_num.append(0)
+
+            else:
+                picked_box_probs = np.concatenate(picked_box_probs)
+
+                # resize output boxes
+                picked_box_probs[:, :4] = self.warp_boxes(
+                    picked_box_probs[:, :4], self.ori_shape[batch_id])
+                im_scale = np.concatenate([
+                    self.scale_factor[batch_id][::-1],
+                    self.scale_factor[batch_id][::-1]
+                ])
+                picked_box_probs[:, :4] /= im_scale
+                # clas score box
+                out_boxes_list.append(
+                    np.concatenate(
+                        [
+                            np.expand_dims(
+                                np.array(picked_labels),
+                                axis=-1), np.expand_dims(
+                                    picked_box_probs[:, 4], axis=-1),
+                            picked_box_probs[:, :4]
+                        ],
+                        axis=1))
+                out_boxes_num.append(len(picked_labels))
+
+        out_boxes_list = np.concatenate(out_boxes_list, axis=0)
+        out_boxes_num = np.asarray(out_boxes_num).astype(np.int32)
+        return out_boxes_list, out_boxes_num
+
+
+def detect(img_file, compiled_model, re_shape, class_label):
+    output = compiled_model.infer_new_request({0: test_image})
+    result_ie = list(output.values())  #[0]
+
+    test_im_shape = np.array([[re_shape, re_shape]]).astype('float32')
+    test_scale_factor = np.array([[1, 1]]).astype('float32')
+
+    np_score_list = []
+    np_boxes_list = []
+
+    num_outs = int(len(result_ie) / 2)
+    for out_idx in range(num_outs):
+        np_score_list.append(result_ie[out_idx])
+        np_boxes_list.append(result_ie[out_idx + num_outs])
+
+    postprocess = PicoDetPostProcess(test_image.shape[2:], test_im_shape,
+                                     test_scale_factor)
+
+    np_boxes, np_boxes_num = postprocess(np_score_list, np_boxes_list)
+
+    image = cv2.imread(img_file, 1)
+    scale_x = image.shape[1] / test_image.shape[3]
+    scale_y = image.shape[0] / test_image.shape[2]
+    res_image = draw_box(image, np_boxes, class_label, scale_x, scale_y)
+
+    cv2.imwrite('res.jpg', res_image)
+    cv2.imshow("res", res_image)
+    cv2.waitKey()
+
+
+def benchmark(test_image, compiled_model):
+
+    # benchmark       
+    loop_num = 100
+    warm_up = 8
+    timeall = 0
+    time_min = float("inf")
+    time_max = float('-inf')
+
+    for i in range(loop_num + warm_up):
+        time0 = time.time()
+        #perform the inference step
+
+        output = compiled_model.infer_new_request({0: test_image})
+        time1 = time.time()
+        timed = time1 - time0
+
+        if i >= warm_up:
+            timeall = timeall + timed
+            time_min = min(time_min, timed)
+            time_max = max(time_max, timed)
+
+    time_avg = timeall / loop_num
+
+    print('inference_time(ms): min={}, max={}, avg={}'.format(
+        round(time_min * 1000, 2),
+        round(time_max * 1000, 1), round(time_avg * 1000, 1)))
+
+
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--benchmark', type=int, default=1, help="0:detect; 1:benchmark")
+    parser.add_argument(
+        '--img_path',
+        type=str,
+        default='../../../../demo/000000014439.jpg',
+        help="image path")
+    parser.add_argument(
+        '--onnx_path',
+        type=str,
+        default='out_onnxsim/picodet_s_320_processed.onnx',
+        help="onnx filepath")
+    parser.add_argument('--in_shape', type=int, default=320, help="input_size")
+    parser.add_argument(
+        '--class_label',
+        type=str,
+        default='coco_label.txt',
+        help="class label file")
+    args = parser.parse_args()
+
+    ie = Core()
+    net = ie.read_model(args.onnx_path)
+    test_image = image_preprocess(args.img_path, args.in_shape)
+    compiled_model = ie.compile_model(net, 'CPU')
+
+    if args.benchmark == 0:
+        detect(args.img_path, compiled_model, args.in_shape, args.class_label)
+    if args.benchmark == 1:
+        benchmark(test_image, compiled_model)
--- a/paddle_detection/deploy/third_engine/demo_openvino/python/openvino_infer.py
+++ b/paddle_detection/deploy/third_engine/demo_openvino/python/openvino_infer.py
@@ -0,0 +1,267 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import cv2
+import numpy as np
+import argparse
+from scipy.special import softmax
+from openvino.runtime import Core
+
+
+def image_preprocess(img_path, re_shape):
+    img = cv2.imread(img_path)
+    img = cv2.resize(
+        img, (re_shape, re_shape), interpolation=cv2.INTER_LANCZOS4)
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    img = np.transpose(img, [2, 0, 1]) / 255
+    img = np.expand_dims(img, 0)
+    img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
+    img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
+    img -= img_mean
+    img /= img_std
+    return img.astype(np.float32)
+
+
+def get_color_map_list(num_classes):
+    color_map = num_classes * [0, 0, 0]
+    for i in range(0, num_classes):
+        j = 0
+        lab = i
+        while lab:
+            color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
+            color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
+            color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
+            j += 1
+            lab >>= 3
+    color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)]
+    return color_map
+
+
+def draw_box(srcimg, results, class_label):
+    label_list = list(
+        map(lambda x: x.strip(), open(class_label, 'r').readlines()))
+    for i in range(len(results)):
+        color_list = get_color_map_list(len(label_list))
+        clsid2color = {}
+        classid, conf = int(results[i, 0]), results[i, 1]
+        xmin, ymin, xmax, ymax = int(results[i, 2]), int(results[i, 3]), int(
+            results[i, 4]), int(results[i, 5])
+
+        if classid not in clsid2color:
+            clsid2color[classid] = color_list[classid]
+        color = tuple(clsid2color[classid])
+
+        cv2.rectangle(srcimg, (xmin, ymin), (xmax, ymax), color, thickness=2)
+        print(label_list[classid] + ': ' + str(round(conf, 3)))
+        cv2.putText(
+            srcimg,
+            label_list[classid] + ':' + str(round(conf, 3)), (xmin, ymin - 10),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.8, (0, 255, 0),
+            thickness=2)
+    return srcimg
+
+
+def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
+    """
+    Args:
+        box_scores (N, 5): boxes in corner-form and probabilities.
+        iou_threshold: intersection over union threshold.
+        top_k: keep top_k results. If k <= 0, keep all the results.
+        candidate_size: only consider the candidates with the highest scores.
+    Returns:
+         picked: a list of indexes of the kept boxes
+    """
+    scores = box_scores[:, -1]
+    boxes = box_scores[:, :-1]
+    picked = []
+    indexes = np.argsort(scores)
+    indexes = indexes[-candidate_size:]
+    while len(indexes) > 0:
+        current = indexes[-1]
+        picked.append(current)
+        if 0 < top_k == len(picked) or len(indexes) == 1:
+            break
+        current_box = boxes[current, :]
+        indexes = indexes[:-1]
+        rest_boxes = boxes[indexes, :]
+        iou = iou_of(
+            rest_boxes,
+            np.expand_dims(
+                current_box, axis=0), )
+        indexes = indexes[iou <= iou_threshold]
+
+    return box_scores[picked, :]
+
+
+def iou_of(boxes0, boxes1, eps=1e-5):
+    """Return intersection-over-union (Jaccard index) of boxes.
+    Args:
+        boxes0 (N, 4): ground truth boxes.
+        boxes1 (N or 1, 4): predicted boxes.
+        eps: a small number to avoid 0 as denominator.
+    Returns:
+        iou (N): IoU values.
+    """
+    overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
+    overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
+
+    overlap_area = area_of(overlap_left_top, overlap_right_bottom)
+    area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
+    area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
+    return overlap_area / (area0 + area1 - overlap_area + eps)
+
+
+def area_of(left_top, right_bottom):
+    """Compute the areas of rectangles given two corners.
+    Args:
+        left_top (N, 2): left top corner.
+        right_bottom (N, 2): right bottom corner.
+    Returns:
+        area (N): return the area.
+    """
+    hw = np.clip(right_bottom - left_top, 0.0, None)
+    return hw[..., 0] * hw[..., 1]
+
+
+class PicoDetNMS(object):
+    """
+    Args:
+        input_shape (int): network input image size
+        scale_factor (float): scale factor of ori image
+    """
+
+    def __init__(self,
+                 input_shape,
+                 scale_x,
+                 scale_y,
+                 strides=[8, 16, 32, 64],
+                 score_threshold=0.4,
+                 nms_threshold=0.5,
+                 nms_top_k=1000,
+                 keep_top_k=100):
+        self.input_shape = input_shape
+        self.scale_x = scale_x
+        self.scale_y = scale_y
+        self.strides = strides
+        self.score_threshold = score_threshold
+        self.nms_threshold = nms_threshold
+        self.nms_top_k = nms_top_k
+        self.keep_top_k = keep_top_k
+
+    def __call__(self, decode_boxes, select_scores):
+        batch_size = 1
+        out_boxes_list = []
+        for batch_id in range(batch_size):
+            # nms
+            bboxes = np.concatenate(decode_boxes, axis=0)
+            confidences = np.concatenate(select_scores, axis=0)
+            picked_box_probs = []
+            picked_labels = []
+            for class_index in range(0, confidences.shape[1]):
+                probs = confidences[:, class_index]
+                mask = probs > self.score_threshold
+                probs = probs[mask]
+                if probs.shape[0] == 0:
+                    continue
+                subset_boxes = bboxes[mask, :]
+                box_probs = np.concatenate(
+                    [subset_boxes, probs.reshape(-1, 1)], axis=1)
+                box_probs = hard_nms(
+                    box_probs,
+                    iou_threshold=self.nms_threshold,
+                    top_k=self.keep_top_k, )
+                picked_box_probs.append(box_probs)
+                picked_labels.extend([class_index] * box_probs.shape[0])
+
+            if len(picked_box_probs) == 0:
+                out_boxes_list.append(np.empty((0, 4)))
+
+            else:
+                picked_box_probs = np.concatenate(picked_box_probs)
+
+                # resize output boxes
+                picked_box_probs[:, 0] *= self.scale_x
+                picked_box_probs[:, 2] *= self.scale_x
+                picked_box_probs[:, 1] *= self.scale_y
+                picked_box_probs[:, 3] *= self.scale_y
+
+                # clas score box
+                out_boxes_list.append(
+                    np.concatenate(
+                        [
+                            np.expand_dims(
+                                np.array(picked_labels),
+                                axis=-1), np.expand_dims(
+                                    picked_box_probs[:, 4], axis=-1),
+                            picked_box_probs[:, :4]
+                        ],
+                        axis=1))
+
+        out_boxes_list = np.concatenate(out_boxes_list, axis=0)
+        return out_boxes_list
+
+
+def detect(img_file, compiled_model, class_label):
+    output = compiled_model.infer_new_request({0: test_image})
+    result_ie = list(output.values())
+
+    decode_boxes = []
+    select_scores = []
+    num_outs = int(len(result_ie) / 2)
+    for out_idx in range(num_outs):
+        decode_boxes.append(result_ie[out_idx])
+        select_scores.append(result_ie[out_idx + num_outs])
+
+    image = cv2.imread(img_file, 1)
+    scale_x = image.shape[1] / test_image.shape[3]
+    scale_y = image.shape[0] / test_image.shape[2]
+
+    nms = PicoDetNMS(test_image.shape[2:], scale_x, scale_y)
+    np_boxes = nms(decode_boxes, select_scores)
+
+    res_image = draw_box(image, np_boxes, class_label)
+
+    cv2.imwrite('res.jpg', res_image)
+    cv2.imshow("res", res_image)
+    cv2.waitKey()
+
+
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--img_path',
+        type=str,
+        default='../../demo_onnxruntime/imgs/bus.jpg',
+        help="image path")
+    parser.add_argument(
+        '--onnx_path',
+        type=str,
+        default='out_onnxsim_infer/picodet_s_320_postproccesed_woNMS.onnx',
+        help="onnx filepath")
+    parser.add_argument('--in_shape', type=int, default=320, help="input_size")
+    parser.add_argument(
+        '--class_label',
+        type=str,
+        default='coco_label.txt',
+        help="class label file")
+    args = parser.parse_args()
+
+    ie = Core()
+    net = ie.read_model(args.onnx_path)
+    test_image = image_preprocess(args.img_path, args.in_shape)
+    compiled_model = ie.compile_model(net, 'CPU')
+
+    detect(args.img_path, compiled_model, args.class_label)