更换文档检测模型

2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions
--- a/paddle_detection/deploy/third_engine/demo_onnxruntime/README.md
+++ b/paddle_detection/deploy/third_engine/demo_onnxruntime/README.md
@@ -0,0 +1,43 @@
+# PicoDet ONNX Runtime Demo
+
+本文件夹提供利用[ONNX Runtime](https://onnxruntime.ai/docs/)进行 PicoDet 部署与Inference images 的 Demo。
+
+## 安装 ONNX Runtime
+
+本demo采用的是 ONNX Runtime 1.10.0，可直接运行如下指令安装：
+```shell
+pip install onnxruntime
+```
+
+详细安装步骤，可参考 [Install ONNX Runtime](https://onnxruntime.ai/docs/install/)。
+
+## Inference images
+
+- 准备测试模型：根据[PicoDet](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/picodet)中【导出及转换模型】步骤，采用包含后处理的方式导出模型（`-o export.benchmark=False` ），并生成待测试模型简化后的onnx模型（可在下文链接中直接下载）。同时在本目录下新建```onnx_file```文件夹，将导出的onnx模型放在该目录下。
+
+- 准备测试所用图片：将待测试图片放在```./imgs```文件夹下，本demo已提供了两张测试图片。
+
+- 在本目录下直接运行：
+    ```shell
+    python infer_demo.py --modelpath ./onnx_file/picodet_s_320_lcnet_postprocessed.onnx
+    ```
+    将会对```./imgs```文件夹下所有图片进行识别，并将识别结果保存在```./results```文件夹下。
+
+- 结果：
+    <div align="center">
+      <img src="../../../docs/images/bus.jpg" height="300px" ><img src="../../../docs/images/dog.jpg" height="300px" >
+    </div>
+
+## 模型下载
+
+| 模型     | 输入尺寸 | ONNX( w/ 后处理)  |
+| :-------- | :--------: | :---------------------: |
+| PicoDet-XS |  320*320   | [model](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_xs_320_lcnet_postprocessed.onnx) |
+| PicoDet-XS |  416*416   | [model](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_xs_416_lcnet_postprocessed.onnx) |
+| PicoDet-S |  320*320   | [model](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_s_320_lcnet_postprocessed.onnx) |
+| PicoDet-S |  416*416   |  [model](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_s_416_lcnet_postprocessed.onnx) |
+| PicoDet-M |  320*320   | [model](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_320_lcnet_postprocessed.onnx) |
+| PicoDet-M |  416*416   | [model](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_416_lcnet_postprocessed.onnx) |
+| PicoDet-L |  320*320   | [model](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_l_320_lcnet_postprocessed.onnx) |
+| PicoDet-L |  416*416   | [model](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_l_416_lcnet_postprocessed.onnx) |
+| PicoDet-L |  640*640   | [model](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_l_640_lcnet_postprocessed.onnx) |
--- a/paddle_detection/deploy/third_engine/demo_onnxruntime/coco_label.txt
+++ b/paddle_detection/deploy/third_engine/demo_onnxruntime/coco_label.txt
@@ -0,0 +1,80 @@
+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
--- a/paddle_detection/deploy/third_engine/demo_onnxruntime/imgs/bus.jpg
+++ b/paddle_detection/deploy/third_engine/demo_onnxruntime/imgs/bus.jpg
--- a/paddle_detection/deploy/third_engine/demo_onnxruntime/imgs/dog.jpg
+++ b/paddle_detection/deploy/third_engine/demo_onnxruntime/imgs/dog.jpg
--- a/paddle_detection/deploy/third_engine/demo_onnxruntime/infer_demo.py
+++ b/paddle_detection/deploy/third_engine/demo_onnxruntime/infer_demo.py
@@ -0,0 +1,209 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import cv2
+import numpy as np
+import argparse
+import onnxruntime as ort
+from pathlib import Path
+from tqdm import tqdm
+
+
+class PicoDet():
+    def __init__(self,
+                 model_pb_path,
+                 label_path,
+                 prob_threshold=0.4,
+                 iou_threshold=0.3):
+        self.classes = list(
+            map(lambda x: x.strip(), open(label_path, 'r').readlines()))
+        self.num_classes = len(self.classes)
+        self.prob_threshold = prob_threshold
+        self.iou_threshold = iou_threshold
+        self.mean = np.array(
+            [103.53, 116.28, 123.675], dtype=np.float32).reshape(1, 1, 3)
+        self.std = np.array(
+            [57.375, 57.12, 58.395], dtype=np.float32).reshape(1, 1, 3)
+        so = ort.SessionOptions()
+        so.log_severity_level = 3
+        self.net = ort.InferenceSession(model_pb_path, so)
+        inputs_name = [a.name for a in self.net.get_inputs()]
+        inputs_shape = {
+            k: v.shape
+            for k, v in zip(inputs_name, self.net.get_inputs())
+        }
+        self.input_shape = inputs_shape['image'][2:]
+
+    def _normalize(self, img):
+        img = img.astype(np.float32)
+        img = (img / 255.0 - self.mean / 255.0) / (self.std / 255.0)
+        return img
+
+    def resize_image(self, srcimg, keep_ratio=False):
+        top, left, newh, neww = 0, 0, self.input_shape[0], self.input_shape[1]
+        origin_shape = srcimg.shape[:2]
+        im_scale_y = newh / float(origin_shape[0])
+        im_scale_x = neww / float(origin_shape[1])
+        img_shape = np.array([
+            [float(self.input_shape[0]), float(self.input_shape[1])]
+        ]).astype('float32')
+        scale_factor = np.array([[im_scale_y, im_scale_x]]).astype('float32')
+
+        if keep_ratio and srcimg.shape[0] != srcimg.shape[1]:
+            hw_scale = srcimg.shape[0] / srcimg.shape[1]
+            if hw_scale > 1:
+                newh, neww = self.input_shape[0], int(self.input_shape[1] /
+                                                      hw_scale)
+                img = cv2.resize(
+                    srcimg, (neww, newh), interpolation=cv2.INTER_AREA)
+                left = int((self.input_shape[1] - neww) * 0.5)
+                img = cv2.copyMakeBorder(
+                    img,
+                    0,
+                    0,
+                    left,
+                    self.input_shape[1] - neww - left,
+                    cv2.BORDER_CONSTANT,
+                    value=0)  # add border
+            else:
+                newh, neww = int(self.input_shape[0] *
+                                 hw_scale), self.input_shape[1]
+                img = cv2.resize(
+                    srcimg, (neww, newh), interpolation=cv2.INTER_AREA)
+                top = int((self.input_shape[0] - newh) * 0.5)
+                img = cv2.copyMakeBorder(
+                    img,
+                    top,
+                    self.input_shape[0] - newh - top,
+                    0,
+                    0,
+                    cv2.BORDER_CONSTANT,
+                    value=0)
+        else:
+            img = cv2.resize(
+                srcimg, self.input_shape, interpolation=cv2.INTER_LINEAR)
+
+        return img, img_shape, scale_factor
+
+    def get_color_map_list(self, num_classes):
+        color_map = num_classes * [0, 0, 0]
+        for i in range(0, num_classes):
+            j = 0
+            lab = i
+            while lab:
+                color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
+                color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
+                color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
+                j += 1
+                lab >>= 3
+        color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)]
+        return color_map
+
+    def detect(self, srcimg):
+        img, im_shape, scale_factor = self.resize_image(srcimg)
+        img = self._normalize(img)
+
+        blob = np.expand_dims(np.transpose(img, (2, 0, 1)), axis=0)
+
+        inputs_dict = {
+            'im_shape': im_shape,
+            'image': blob,
+            'scale_factor': scale_factor
+        }
+        inputs_name = [a.name for a in self.net.get_inputs()]
+        net_inputs = {k: inputs_dict[k] for k in inputs_name}
+
+        outs = self.net.run(None, net_inputs)
+
+        outs = np.array(outs[0])
+        expect_boxes = (outs[:, 1] > 0.5) & (outs[:, 0] > -1)
+        np_boxes = outs[expect_boxes, :]
+
+        color_list = self.get_color_map_list(self.num_classes)
+        clsid2color = {}
+
+        for i in range(np_boxes.shape[0]):
+            classid, conf = int(np_boxes[i, 0]), np_boxes[i, 1]
+            xmin, ymin, xmax, ymax = int(np_boxes[i, 2]), int(np_boxes[
+                i, 3]), int(np_boxes[i, 4]), int(np_boxes[i, 5])
+
+            if classid not in clsid2color:
+                clsid2color[classid] = color_list[classid]
+            color = tuple(clsid2color[classid])
+
+            cv2.rectangle(
+                srcimg, (xmin, ymin), (xmax, ymax), color, thickness=2)
+            print(self.classes[classid] + ': ' + str(round(conf, 3)))
+            cv2.putText(
+                srcimg,
+                self.classes[classid] + ':' + str(round(conf, 3)), (xmin,
+                                                                    ymin - 10),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                0.8, (0, 255, 0),
+                thickness=2)
+
+        return srcimg
+
+    def detect_folder(self, img_fold, result_path):
+        img_fold = Path(img_fold)
+        result_path = Path(result_path)
+        result_path.mkdir(parents=True, exist_ok=True)
+
+        img_name_list = filter(
+            lambda x: str(x).endswith(".png") or str(x).endswith(".jpg"),
+            img_fold.iterdir(), )
+        img_name_list = list(img_name_list)
+        print(f"find {len(img_name_list)} images")
+
+        for img_path in tqdm(img_name_list):
+            img = cv2.imread(str(img_path), 1)
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+
+            srcimg = net.detect(img)
+            save_path = str(result_path / img_path.name.replace(".png", ".jpg"))
+            cv2.imwrite(save_path, srcimg)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--modelpath',
+        type=str,
+        default='onnx_file/picodet_s_320_lcnet_postprocessed.onnx',
+        help="onnx filepath")
+    parser.add_argument(
+        '--classfile',
+        type=str,
+        default='coco_label.txt',
+        help="classname filepath")
+    parser.add_argument(
+        '--confThreshold', default=0.5, type=float, help='class confidence')
+    parser.add_argument(
+        '--nmsThreshold', default=0.6, type=float, help='nms iou thresh')
+    parser.add_argument(
+        "--img_fold", dest="img_fold", type=str, default="./imgs")
+    parser.add_argument(
+        "--result_fold", dest="result_fold", type=str, default="results")
+    args = parser.parse_args()
+
+    net = PicoDet(
+        args.modelpath,
+        args.classfile,
+        prob_threshold=args.confThreshold,
+        iou_threshold=args.nmsThreshold)
+
+    net.detect_folder(args.img_fold, args.result_fold)
+    print(
+        f'infer results in ./deploy/third_engine/demo_onnxruntime/{args.result_fold}'
+    )