更换文档检测模型

2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions
--- a/paddle_detection/deploy/pipeline/tools/ccpd2ocr_all.py
+++ b/paddle_detection/deploy/pipeline/tools/ccpd2ocr_all.py
@@ -0,0 +1,167 @@
+import cv2
+import os
+import json
+from tqdm import tqdm
+import numpy as np
+
+provinces = [
+    "皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "京", "闽", "赣",
+    "鲁", "豫", "鄂", "湘", "粤", "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁",
+    "新", "警", "学", "O"
+]
+alphabets = [
+    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q',
+    'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'O'
+]
+ads = [
+    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q',
+    'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5',
+    '6', '7', '8', '9', 'O'
+]
+
+
+def make_label_2020(img_dir, save_gt_folder, phase):
+    crop_img_save_dir = os.path.join(save_gt_folder, phase, 'crop_imgs')
+    os.makedirs(crop_img_save_dir, exist_ok=True)
+
+    f_det = open(
+        os.path.join(save_gt_folder, phase, 'det.txt'), 'w', encoding='utf-8')
+    f_rec = open(
+        os.path.join(save_gt_folder, phase, 'rec.txt'), 'w', encoding='utf-8')
+
+    i = 0
+    for filename in tqdm(os.listdir(os.path.join(img_dir, phase))):
+        str_list = filename.split('-')
+        if len(str_list) < 5:
+            continue
+        coord_list = str_list[3].split('_')
+        txt_list = str_list[4].split('_')
+        boxes = []
+        for coord in coord_list:
+            boxes.append([int(x) for x in coord.split("&")])
+        boxes = [boxes[2], boxes[3], boxes[0], boxes[1]]
+        lp_number = provinces[int(txt_list[0])] + alphabets[int(txt_list[
+            1])] + ''.join([ads[int(x)] for x in txt_list[2:]])
+
+        # det
+        det_info = [{'points': boxes, 'transcription': lp_number}]
+        f_det.write('{}\t{}\n'.format(
+            os.path.join("CCPD2020/ccpd_green", phase, filename),
+            json.dumps(
+                det_info, ensure_ascii=False)))
+
+        # rec
+        boxes = np.float32(boxes)
+        img = cv2.imread(os.path.join(img_dir, phase, filename))
+        # crop_img = img[int(boxes[:,1].min()):int(boxes[:,1].max()),int(boxes[:,0].min()):int(boxes[:,0].max())]
+        crop_img = get_rotate_crop_image(img, boxes)
+        crop_img_save_filename = '{}_{}.jpg'.format(i, '_'.join(txt_list))
+        crop_img_save_path = os.path.join(crop_img_save_dir,
+                                          crop_img_save_filename)
+        cv2.imwrite(crop_img_save_path, crop_img)
+        f_rec.write('{}/{}/crop_imgs/{}\t{}\n'.format(
+            "CCPD2020/PPOCR", phase, crop_img_save_filename, lp_number))
+        i += 1
+    f_det.close()
+    f_rec.close()
+
+
+def make_label_2019(list_dir, save_gt_folder, phase):
+    crop_img_save_dir = os.path.join(save_gt_folder, phase, 'crop_imgs')
+    os.makedirs(crop_img_save_dir, exist_ok=True)
+
+    f_det = open(
+        os.path.join(save_gt_folder, phase, 'det.txt'), 'w', encoding='utf-8')
+    f_rec = open(
+        os.path.join(save_gt_folder, phase, 'rec.txt'), 'w', encoding='utf-8')
+
+    with open(os.path.join(list_dir, phase + ".txt"), 'r') as rf:
+        imglist = rf.readlines()
+
+    i = 0
+    for idx, filename in enumerate(imglist):
+        if idx % 1000 == 0:
+            print("{}/{}".format(idx, len(imglist)))
+        filename = filename.strip()
+        str_list = filename.split('-')
+        if len(str_list) < 5:
+            continue
+        coord_list = str_list[3].split('_')
+        txt_list = str_list[4].split('_')
+        boxes = []
+        for coord in coord_list:
+            boxes.append([int(x) for x in coord.split("&")])
+        boxes = [boxes[2], boxes[3], boxes[0], boxes[1]]
+        lp_number = provinces[int(txt_list[0])] + alphabets[int(txt_list[
+            1])] + ''.join([ads[int(x)] for x in txt_list[2:]])
+
+        # det
+        det_info = [{'points': boxes, 'transcription': lp_number}]
+        f_det.write('{}\t{}\n'.format(
+            os.path.join("CCPD2019", filename),
+            json.dumps(
+                det_info, ensure_ascii=False)))
+
+        # rec
+        boxes = np.float32(boxes)
+        imgpath = os.path.join(list_dir[:-7], filename)
+        img = cv2.imread(imgpath)
+        # crop_img = img[int(boxes[:,1].min()):int(boxes[:,1].max()),int(boxes[:,0].min()):int(boxes[:,0].max())]
+        crop_img = get_rotate_crop_image(img, boxes)
+        crop_img_save_filename = '{}_{}.jpg'.format(i, '_'.join(txt_list))
+        crop_img_save_path = os.path.join(crop_img_save_dir,
+                                          crop_img_save_filename)
+        cv2.imwrite(crop_img_save_path, crop_img)
+        f_rec.write('{}/{}/crop_imgs/{}\t{}\n'.format(
+            "CCPD2019/PPOCR", phase, crop_img_save_filename, lp_number))
+        i += 1
+    f_det.close()
+    f_rec.close()
+
+
+def get_rotate_crop_image(img, points):
+    '''
+    img_height, img_width = img.shape[0:2]
+    left = int(np.min(points[:, 0]))
+    right = int(np.max(points[:, 0]))
+    top = int(np.min(points[:, 1]))
+    bottom = int(np.max(points[:, 1]))
+    img_crop = img[top:bottom, left:right, :].copy()
+    points[:, 0] = points[:, 0] - left
+    points[:, 1] = points[:, 1] - top
+    '''
+    assert len(points) == 4, "shape of points must be 4*2"
+    img_crop_width = int(
+        max(
+            np.linalg.norm(points[0] - points[1]),
+            np.linalg.norm(points[2] - points[3])))
+    img_crop_height = int(
+        max(
+            np.linalg.norm(points[0] - points[3]),
+            np.linalg.norm(points[1] - points[2])))
+    pts_std = np.float32([[0, 0], [img_crop_width, 0],
+                          [img_crop_width, img_crop_height],
+                          [0, img_crop_height]])
+    M = cv2.getPerspectiveTransform(points, pts_std)
+    dst_img = cv2.warpPerspective(
+        img,
+        M, (img_crop_width, img_crop_height),
+        borderMode=cv2.BORDER_REPLICATE,
+        flags=cv2.INTER_CUBIC)
+    dst_img_height, dst_img_width = dst_img.shape[0:2]
+    if dst_img_height * 1.0 / dst_img_width >= 1.5:
+        dst_img = np.rot90(dst_img)
+    return dst_img
+
+
+img_dir = './CCPD2020/ccpd_green'
+save_gt_folder = './CCPD2020/PPOCR'
+# phase = 'train' # change to val and test to make val dataset and test dataset
+for phase in ['train', 'val', 'test']:
+    make_label_2020(img_dir, save_gt_folder, phase)
+
+list_dir = './CCPD2019/splits/'
+save_gt_folder = './CCPD2019/PPOCR'
+
+for phase in ['train', 'val', 'test']:
+    make_label_2019(list_dir, save_gt_folder, phase)
--- a/paddle_detection/deploy/pipeline/tools/clip_video.py
+++ b/paddle_detection/deploy/pipeline/tools/clip_video.py
@@ -0,0 +1,36 @@
+import cv2
+
+
+def cut_video(video_path, frameToStart, frametoStop, saved_video_path):
+    cap = cv2.VideoCapture(video_path)
+    FPS = cap.get(cv2.CAP_PROP_FPS)
+
+    TOTAL_FRAME = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))  # 获取视频总帧数
+
+    size = (cap.get(cv2.CAP_PROP_FRAME_WIDTH),
+            cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+
+    videoWriter = cv2.VideoWriter(
+        saved_video_path,
+        apiPreference=0,
+        fourcc=cv2.VideoWriter_fourcc(* 'mp4v'),
+        fps=FPS,
+        frameSize=(int(size[0]), int(size[1])))
+
+    COUNT = 0
+    while True:
+        success, frame = cap.read()
+        if success:
+            COUNT += 1
+            if COUNT <= frametoStop and COUNT > frameToStart:  # 选取起始帧
+                videoWriter.write(frame)
+        else:
+            print("cap.read failed!")
+            break
+        if COUNT > frametoStop:
+            break
+
+    cap.release()
+    videoWriter.release()
+
+    print(saved_video_path)
--- a/paddle_detection/deploy/pipeline/tools/create_dataset_list.py
+++ b/paddle_detection/deploy/pipeline/tools/create_dataset_list.py
@@ -0,0 +1,147 @@
+# coding: utf8
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import glob
+import os.path
+import argparse
+import warnings
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='PaddleSeg generate file list on cityscapes or your customized dataset.'
+    )
+    parser.add_argument('dataset_root', help='dataset root directory', type=str)
+    parser.add_argument(
+        '--type',
+        help='dataset type: \n'
+        '- cityscapes \n'
+        '- custom(default)',
+        default="custom",
+        type=str)
+    parser.add_argument(
+        '--separator',
+        dest='separator',
+        help='file list separator',
+        default=" ",
+        type=str)
+    parser.add_argument(
+        '--folder',
+        help='the folder names of images and labels',
+        type=str,
+        nargs=2,
+        default=['images', 'labels'])
+    parser.add_argument(
+        '--second_folder',
+        help='the second-level folder names of train set, validation set, test set',
+        type=str,
+        nargs='*',
+        default=['train', 'val', 'test'])
+    parser.add_argument(
+        '--format',
+        help='data format of images and labels, e.g. jpg or png.',
+        type=str,
+        nargs=2,
+        default=['jpg', 'png'])
+    parser.add_argument(
+        '--postfix',
+        help='postfix of images or labels',
+        type=str,
+        nargs=2,
+        default=['', ''])
+
+    return parser.parse_args()
+
+
+def get_files(image_or_label, dataset_split, args):
+    dataset_root = args.dataset_root
+    postfix = args.postfix
+    format = args.format
+    folder = args.folder
+
+    pattern = '*%s.%s' % (postfix[image_or_label], format[image_or_label])
+
+    search_files = os.path.join(dataset_root, folder[image_or_label],
+                                dataset_split, pattern)
+    search_files2 = os.path.join(dataset_root, folder[image_or_label],
+                                 dataset_split, "*", pattern)  # 包含子目录
+    search_files3 = os.path.join(dataset_root, folder[image_or_label],
+                                 dataset_split, "*", "*", pattern)  # 包含三级目录
+    search_files4 = os.path.join(dataset_root, folder[image_or_label],
+                                 dataset_split, "*", "*", "*",
+                                 pattern)  # 包含四级目录
+    search_files5 = os.path.join(dataset_root, folder[image_or_label],
+                                 dataset_split, "*", "*", "*", "*",
+                                 pattern)  # 包含五级目录
+
+    filenames = glob.glob(search_files)
+    filenames2 = glob.glob(search_files2)
+    filenames3 = glob.glob(search_files3)
+    filenames4 = glob.glob(search_files4)
+    filenames5 = glob.glob(search_files5)
+
+    filenames = filenames + filenames2 + filenames3 + filenames4 + filenames5
+
+    return sorted(filenames)
+
+
+def generate_list(args):
+    dataset_root = args.dataset_root
+    separator = args.separator
+
+    for dataset_split in args.second_folder:
+        print("Creating {}.txt...".format(dataset_split))
+        image_files = get_files(0, dataset_split, args)
+        label_files = get_files(1, dataset_split, args)
+        if not image_files:
+            img_dir = os.path.join(dataset_root, args.folder[0], dataset_split)
+            warnings.warn("No images in {} !!!".format(img_dir))
+        num_images = len(image_files)
+
+        if not label_files:
+            label_dir = os.path.join(dataset_root, args.folder[1],
+                                     dataset_split)
+            warnings.warn("No labels in {} !!!".format(label_dir))
+        num_label = len(label_files)
+
+        if num_images != num_label and num_label > 0:
+            raise Exception(
+                "Number of images = {}    number of labels = {} \n"
+                "Either number of images is equal to number of labels, "
+                "or number of labels is equal to 0.\n"
+                "Please check your dataset!".format(num_images, num_label))
+
+        file_list = os.path.join(dataset_root, dataset_split + '.txt')
+        with open(file_list, "w") as f:
+            for item in range(num_images):
+                left = image_files[item].replace(dataset_root, '', 1)
+                if left[0] == os.path.sep:
+                    left = left.lstrip(os.path.sep)
+
+                try:
+                    right = label_files[item].replace(dataset_root, '', 1)
+                    if right[0] == os.path.sep:
+                        right = right.lstrip(os.path.sep)
+                    line = left + separator + right + '\n'
+                except:
+                    line = left + '\n'
+
+                f.write(line)
+                print(line)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    generate_list(args)
--- a/paddle_detection/deploy/pipeline/tools/get_video_info.py
+++ b/paddle_detection/deploy/pipeline/tools/get_video_info.py
@@ -0,0 +1,71 @@
+import os
+import sys
+import cv2
+import numpy as np
+import argparse
+
+
+def argsparser():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--video_file",
+        type=str,
+        default=None,
+        help="Path of video file, `video_file` or `camera_id` has a highest priority."
+    )
+    parser.add_argument(
+        '--region_polygon',
+        nargs='+',
+        type=int,
+        default=[],
+        help="Clockwise point coords (x0,y0,x1,y1...) of polygon of area when "
+        "do_break_in_counting. Note that only support single-class MOT and "
+        "the video should be taken by a static camera.")
+    return parser
+
+
+def get_video_info(video_file, region_polygon):
+    entrance = []
+    assert len(region_polygon
+               ) % 2 == 0, "region_polygon should be pairs of coords points."
+    for i in range(0, len(region_polygon), 2):
+        entrance.append([region_polygon[i], region_polygon[i + 1]])
+
+    if not os.path.exists(video_file):
+        print("video path '{}' not exists".format(video_file))
+        sys.exit(-1)
+    capture = cv2.VideoCapture(video_file)
+    width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    print("video width: %d, height: %d" % (width, height))
+    np_masks = np.zeros((height, width, 1), np.uint8)
+
+    entrance = np.array(entrance)
+    cv2.fillPoly(np_masks, [entrance], 255)
+
+    fps = int(capture.get(cv2.CAP_PROP_FPS))
+    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
+    print("video fps: %d, frame_count: %d" % (fps, frame_count))
+    cnt = 0
+    while (1):
+        ret, frame = capture.read()
+        cnt += 1
+        if cnt == 3: break
+
+    alpha = 0.3
+    img = np.array(frame).astype('float32')
+    mask = np_masks[:, :, 0]
+    color_mask = [0, 0, 255]
+    idx = np.nonzero(mask)
+    color_mask = np.array(color_mask)
+    img[idx[0], idx[1], :] *= 1.0 - alpha
+    img[idx[0], idx[1], :] += alpha * color_mask
+    cv2.imwrite('region_vis.jpg', img)
+
+
+if __name__ == "__main__":
+    parser = argsparser()
+    FLAGS = parser.parse_args()
+    get_video_info(FLAGS.video_file, FLAGS.region_polygon)
+
+    # python get_video_info.py --video_file=demo.mp4 --region_polygon 200 200 400 200 300 400 100 400
--- a/paddle_detection/deploy/pipeline/tools/lane_to_mask.py
+++ b/paddle_detection/deploy/pipeline/tools/lane_to_mask.py
@@ -0,0 +1,508 @@
+# coding: utf8
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Convert poly2d to mask/bitmask."""
+
+import os
+from functools import partial
+from multiprocessing import Pool
+from typing import Callable, Dict, List
+
+import matplotlib  # type: ignore
+import matplotlib.pyplot as plt  # type: ignore
+import numpy as np
+from PIL import Image
+from scalabel.common.parallel import NPROC
+from scalabel.common.typing import NDArrayU8
+from scalabel.label.io import group_and_sort, load
+from scalabel.label.transforms import poly_to_patch
+from scalabel.label.typing import Config, Frame, ImageSize, Label, Poly2D
+from scalabel.label.utils import (
+    check_crowd,
+    check_ignored,
+    get_leaf_categories, )
+from tqdm import tqdm
+
+from bdd100k.common.logger import logger
+from bdd100k.common.typing import BDD100KConfig
+from bdd100k.common.utils import get_bdd100k_instance_id, load_bdd100k_config
+from bdd100k.label.label import drivables, labels, lane_categories
+from bdd100k.label.to_coco import parse_args
+from bdd100k.label.to_scalabel import bdd100k_to_scalabel
+
+IGNORE_LABEL = 255
+STUFF_NUM = 30
+LANE_DIRECTION_MAP = {"parallel": 0, "vertical": 1}
+LANE_STYLE_MAP = {"solid": 0, "dashed": 1}
+
+
+def frame_to_mask(
+        out_path: str,
+        shape: ImageSize,
+        colors: List[NDArrayU8],
+        poly2ds: List[List[Poly2D]],
+        with_instances: bool=True,
+        back_color: int=0,
+        closed: bool=True, ) -> None:
+    """Converting a frame of poly2ds to mask/bitmask."""
+    assert len(colors) == len(poly2ds)
+    height, width = shape.height, shape.width
+
+    assert back_color >= 0
+    if with_instances:
+        img: NDArrayU8 = (
+            np.ones(
+                [height, width, 4], dtype=np.uint8) * back_color  # type: ignore
+        )
+    else:
+        img = (
+            np.ones(
+                [height, width, 1], dtype=np.uint8) * back_color  # type: ignore
+        )
+
+    if len(colors) == 0:
+        pil_img = Image.fromarray(img.squeeze())
+        pil_img.save(out_path)
+
+    matplotlib.use("Agg")
+    fig = plt.figure(facecolor="0")
+    fig.set_size_inches((width / fig.get_dpi()), height / fig.get_dpi())
+    ax = fig.add_axes([0, 0, 1, 1])
+    ax.axis("off")
+    ax.set_xlim(0, width)
+    ax.set_ylim(0, height)
+    ax.set_facecolor((0, 0, 0, 0))
+    ax.invert_yaxis()
+
+    for i, poly2d in enumerate(poly2ds):
+        for poly in poly2d:
+            ax.add_patch(
+                poly_to_patch(
+                    poly.vertices,
+                    poly.types,
+                    # (0, 0, 0) for the background
+                    color=(
+                        ((i + 1) >> 8) / 255.0,
+                        ((i + 1) % 255) / 255.0,
+                        0.0, ),
+                    closed=closed, ))
+
+    fig.canvas.draw()
+    out: NDArrayU8 = np.frombuffer(fig.canvas.tostring_rgb(), np.uint8)
+    out = out.reshape((height, width, -1)).astype(np.int32)
+    out = (out[..., 0] << 8) + out[..., 1]
+    plt.close()
+
+    for i, color in enumerate(colors):
+        # 0 is for the background
+        img[out == i + 1] = color
+
+    img[img == 255] = 0
+
+    pil_img = Image.fromarray(img.squeeze())
+    pil_img.save(out_path)
+
+
+def set_instance_color(label: Label, category_id: int,
+                       ann_id: int) -> NDArrayU8:
+    """Set the color for an instance given its attributes and ID."""
+    attributes = label.attributes
+    if attributes is None:
+        truncated, occluded, crowd, ignored = 0, 0, 0, 0
+    else:
+        truncated = int(attributes.get("truncated", False))
+        occluded = int(attributes.get("occluded", False))
+        crowd = int(check_crowd(label))
+        ignored = int(check_ignored(label))
+    color: NDArrayU8 = np.array(
+        [
+            category_id & 255,
+            (truncated << 3) + (occluded << 2) + (crowd << 1) + ignored,
+            ann_id >> 8,
+            ann_id & 255,
+        ],
+        dtype=np.uint8, )
+    return color
+
+
+def set_lane_color(label: Label, category_id: int) -> NDArrayU8:
+    """Set the color for the lane given its attributes and category."""
+    attributes = label.attributes
+    if attributes is None:
+        lane_direction, lane_style = 0, 0
+    else:
+        lane_direction = LANE_DIRECTION_MAP[str(
+            attributes.get("laneDirection", "parallel"))]
+        lane_style = LANE_STYLE_MAP[str(attributes.get("laneStyle", "solid"))]
+
+    #value = category_id + (lane_direction << 5) + (lane_style << 4)
+    value = category_id
+    if lane_style == 0 and (category_id == 3 or category_id == 2):
+        value = 1
+    if lane_style == 0:
+        value = 2
+    else:
+        value = 3
+
+    color: NDArrayU8 = np.array([value], dtype=np.uint8)
+    return color
+
+
+def frames_to_masks(
+        nproc: int,
+        out_paths: List[str],
+        shapes: List[ImageSize],
+        colors_list: List[List[NDArrayU8]],
+        poly2ds_list: List[List[List[Poly2D]]],
+        with_instances: bool=True,
+        back_color: int=0,
+        closed: bool=True, ) -> None:
+    """Execute the mask conversion in parallel."""
+    with Pool(nproc) as pool:
+        pool.starmap(
+            partial(
+                frame_to_mask,
+                with_instances=with_instances,
+                back_color=back_color,
+                closed=closed, ),
+            tqdm(
+                zip(out_paths, shapes, colors_list, poly2ds_list),
+                total=len(out_paths), ), )
+
+
+def seg_to_masks(
+        frames: List[Frame],
+        out_base: str,
+        config: Config,
+        nproc: int=NPROC,
+        mode: str="sem_seg",
+        back_color: int=IGNORE_LABEL,
+        closed: bool=True, ) -> None:
+    """Converting segmentation poly2d to 1-channel masks."""
+    os.makedirs(out_base, exist_ok=True)
+    img_shape = config.imageSize
+
+    out_paths: List[str] = []
+    shapes: List[ImageSize] = []
+    colors_list: List[List[NDArrayU8]] = []
+    poly2ds_list: List[List[List[Poly2D]]] = []
+
+    categories = dict(
+        sem_seg=labels, drivable=drivables, lane_mark=lane_categories)[mode]
+    cat_name2id = {
+        cat.name: cat.trainId
+        for cat in categories if cat.trainId != IGNORE_LABEL
+    }
+
+    logger.info("Preparing annotations for Semseg to Bitmasks")
+
+    for image_anns in tqdm(frames):
+        # Mask in .png format
+        image_name = image_anns.name.replace(".jpg", ".png")
+        image_name = os.path.split(image_name)[-1]
+        out_path = os.path.join(out_base, image_name)
+        out_paths.append(out_path)
+
+        if img_shape is None:
+            if image_anns.size is not None:
+                img_shape = image_anns.size
+            else:
+                raise ValueError("Image shape not defined!")
+        shapes.append(img_shape)
+
+        colors: List[NDArrayU8] = []
+        poly2ds: List[List[Poly2D]] = []
+        colors_list.append(colors)
+        poly2ds_list.append(poly2ds)
+
+        if image_anns.labels is None:
+            continue
+
+        for label in image_anns.labels:
+            if label.category not in cat_name2id:
+                continue
+            if label.poly2d is None:
+                continue
+
+            category_id = cat_name2id[label.category]
+            if mode in ["sem_seg", "drivable"]:
+                color: NDArrayU8 = np.array([category_id], dtype=np.uint8)
+            else:
+                color = set_lane_color(label, category_id)
+
+            colors.append(color)
+            poly2ds.append(label.poly2d)
+
+    logger.info("Start Conversion for Seg to Masks")
+    frames_to_masks(
+        nproc,
+        out_paths,
+        shapes,
+        colors_list,
+        poly2ds_list,
+        with_instances=False,
+        back_color=back_color,
+        closed=closed, )
+
+
+ToMasksFunc = Callable[[List[Frame], str, Config, int], None]
+semseg_to_masks: ToMasksFunc = partial(
+    seg_to_masks, mode="sem_seg", back_color=IGNORE_LABEL, closed=True)
+drivable_to_masks: ToMasksFunc = partial(
+    seg_to_masks,
+    mode="drivable",
+    back_color=len(drivables) - 1,
+    closed=True, )
+lanemark_to_masks: ToMasksFunc = partial(
+    seg_to_masks, mode="lane_mark", back_color=IGNORE_LABEL, closed=False)
+
+
+def insseg_to_bitmasks(frames: List[Frame],
+                       out_base: str,
+                       config: Config,
+                       nproc: int=NPROC) -> None:
+    """Converting instance segmentation poly2d to bitmasks."""
+    os.makedirs(out_base, exist_ok=True)
+    img_shape = config.imageSize
+
+    out_paths: List[str] = []
+    shapes: List[ImageSize] = []
+    colors_list: List[List[NDArrayU8]] = []
+    poly2ds_list: List[List[List[Poly2D]]] = []
+
+    categories = get_leaf_categories(config.categories)
+    cat_name2id = {cat.name: i + 1 for i, cat in enumerate(categories)}
+
+    logger.info("Preparing annotations for InsSeg to Bitmasks")
+
+    for image_anns in tqdm(frames):
+        ann_id = 0
+
+        # Bitmask in .png format
+        image_name = image_anns.name.replace(".jpg", ".png")
+        image_name = os.path.split(image_name)[-1]
+        out_path = os.path.join(out_base, image_name)
+        out_paths.append(out_path)
+
+        if img_shape is None:
+            if image_anns.size is not None:
+                img_shape = image_anns.size
+            else:
+                raise ValueError("Image shape not defined!")
+        shapes.append(img_shape)
+
+        colors: List[NDArrayU8] = []
+        poly2ds: List[List[Poly2D]] = []
+        colors_list.append(colors)
+        poly2ds_list.append(poly2ds)
+
+        labels_ = image_anns.labels
+        if labels_ is None or len(labels_) == 0:
+            continue
+
+        # Scores higher, rendering later
+        if labels_[0].score is not None:
+            labels_ = sorted(labels_, key=lambda label: float(label.score))
+
+        for label in labels_:
+            if label.poly2d is None:
+                continue
+            if label.category not in cat_name2id:
+                continue
+
+            ann_id += 1
+            category_id = cat_name2id[label.category]
+            color = set_instance_color(label, category_id, ann_id)
+            colors.append(color)
+            poly2ds.append(label.poly2d)
+
+    logger.info("Start conversion for InsSeg to Bitmasks")
+    frames_to_masks(nproc, out_paths, shapes, colors_list, poly2ds_list)
+
+
+def panseg_to_bitmasks(frames: List[Frame],
+                       out_base: str,
+                       config: Config,
+                       nproc: int=NPROC) -> None:
+    """Converting panoptic segmentation poly2d to bitmasks."""
+    os.makedirs(out_base, exist_ok=True)
+    img_shape = config.imageSize
+
+    out_paths: List[str] = []
+    shapes: List[ImageSize] = []
+    colors_list: List[List[NDArrayU8]] = []
+    poly2ds_list: List[List[List[Poly2D]]] = []
+    cat_name2id = {cat.name: cat.id for cat in labels}
+
+    logger.info("Preparing annotations for InsSeg to Bitmasks")
+
+    for image_anns in tqdm(frames):
+        cur_ann_id = STUFF_NUM
+
+        # Bitmask in .png format
+        image_name = image_anns.name.replace(".jpg", ".png")
+        image_name = os.path.split(image_name)[-1]
+        out_path = os.path.join(out_base, image_name)
+        out_paths.append(out_path)
+
+        if img_shape is None:
+            if image_anns.size is not None:
+                img_shape = image_anns.size
+            else:
+                raise ValueError("Image shape not defined!")
+        shapes.append(img_shape)
+
+        colors: List[NDArrayU8] = []
+        poly2ds: List[List[Poly2D]] = []
+        colors_list.append(colors)
+        poly2ds_list.append(poly2ds)
+
+        labels_ = image_anns.labels
+        if labels_ is None or len(labels_) == 0:
+            continue
+
+        # Scores higher, rendering later
+        if labels_[0].score is not None:
+            labels_ = sorted(labels_, key=lambda label: float(label.score))
+
+        for label in labels_:
+            if label.poly2d is None:
+                continue
+            if label.category not in cat_name2id:
+                continue
+
+            category_id = cat_name2id[label.category]
+            if category_id == 0:
+                continue
+            if category_id <= STUFF_NUM:
+                ann_id = category_id
+            else:
+                cur_ann_id += 1
+                ann_id = cur_ann_id
+
+            color = set_instance_color(label, category_id, ann_id)
+            colors.append(color)
+            poly2ds.append(label.poly2d)
+
+    logger.info("Start conversion for PanSeg to Bitmasks")
+    frames_to_masks(nproc, out_paths, shapes, colors_list, poly2ds_list)
+
+
+def segtrack_to_bitmasks(frames: List[Frame],
+                         out_base: str,
+                         config: Config,
+                         nproc: int=NPROC) -> None:
+    """Converting segmentation tracking poly2d to bitmasks."""
+    frames_list = group_and_sort(frames)
+    img_shape = config.imageSize
+
+    out_paths: List[str] = []
+    shapes: List[ImageSize] = []
+    colors_list: List[List[NDArrayU8]] = []
+    poly2ds_list: List[List[List[Poly2D]]] = []
+
+    categories = get_leaf_categories(config.categories)
+    cat_name2id = {cat.name: i + 1 for i, cat in enumerate(categories)}
+
+    logger.info("Preparing annotations for SegTrack to Bitmasks")
+
+    for video_anns in tqdm(frames_list):
+        global_instance_id: int = 1
+        instance_id_maps: Dict[str, int] = {}
+
+        video_name = video_anns[0].videoName
+        out_dir = os.path.join(out_base, video_name)
+        if not os.path.isdir(out_dir):
+            os.makedirs(out_dir)
+
+        for image_anns in video_anns:
+            # Bitmask in .png format
+            image_name = image_anns.name.replace(".jpg", ".png")
+            image_name = os.path.split(image_name)[-1]
+            out_path = os.path.join(out_dir, image_name)
+            out_paths.append(out_path)
+
+            if img_shape is None:
+                if image_anns.size is not None:
+                    img_shape = image_anns.size
+                else:
+                    raise ValueError("Image shape not defined!")
+            shapes.append(img_shape)
+
+            colors: List[NDArrayU8] = []
+            poly2ds: List[List[Poly2D]] = []
+            colors_list.append(colors)
+            poly2ds_list.append(poly2ds)
+
+            labels_ = image_anns.labels
+            if labels_ is None or len(labels_) == 0:
+                continue
+
+            # Scores higher, rendering later
+            if labels_[0].score is not None:
+                labels_ = sorted(labels_, key=lambda label: float(label.score))
+
+            for label in labels_:
+                if label.poly2d is None:
+                    continue
+                if label.category not in cat_name2id:
+                    continue
+
+                instance_id, global_instance_id = get_bdd100k_instance_id(
+                    instance_id_maps, global_instance_id, label.id)
+                category_id = cat_name2id[label.category]
+                color = set_instance_color(label, category_id, instance_id)
+                colors.append(color)
+                poly2ds.append(label.poly2d)
+
+    logger.info("Start Conversion for SegTrack to Bitmasks")
+    frames_to_masks(nproc, out_paths, shapes, colors_list, poly2ds_list)
+
+
+def main() -> None:
+    """Main function."""
+    args = parse_args()
+    args.mode = "lane_mark"
+
+    os.environ["QT_QPA_PLATFORM"] = "offscreen"  # matplotlib offscreen render
+
+    convert_funcs: Dict[str, ToMasksFunc] = dict(
+        sem_seg=semseg_to_masks,
+        drivable=drivable_to_masks,
+        lane_mark=lanemark_to_masks,
+        pan_seg=panseg_to_bitmasks,
+        ins_seg=insseg_to_bitmasks,
+        seg_track=segtrack_to_bitmasks, )
+
+    dataset = load(args.input, args.nproc)
+    if args.config is not None:
+        bdd100k_config = load_bdd100k_config(args.config)
+    elif dataset.config is not None:
+        bdd100k_config = BDD100KConfig(config=dataset.config)
+    else:
+        bdd100k_config = load_bdd100k_config(args.mode)
+
+    if args.mode in ["ins_seg", "seg_track"]:
+        frames = bdd100k_to_scalabel(dataset.frames, bdd100k_config)
+    else:
+        frames = dataset.frames
+
+    convert_funcs[args.mode](frames, args.output, bdd100k_config.scalabel,
+                             args.nproc)
+
+    logger.info("Finished!")
+
+
+if __name__ == "__main__":
+    main()
--- a/paddle_detection/deploy/pipeline/tools/split_fight_train_test_dataset.py
+++ b/paddle_detection/deploy/pipeline/tools/split_fight_train_test_dataset.py
@@ -0,0 +1,80 @@
+import os
+import glob
+import random
+import fnmatch
+import re
+import sys
+
+class_id = {"nofight": 0, "fight": 1}
+
+
+def get_list(path, key_func=lambda x: x[-11:], rgb_prefix='img_', level=1):
+    if level == 1:
+        frame_folders = glob.glob(os.path.join(path, '*'))
+    elif level == 2:
+        frame_folders = glob.glob(os.path.join(path, '*', '*'))
+    else:
+        raise ValueError('level can be only 1 or 2')
+
+    def count_files(directory):
+        lst = os.listdir(directory)
+        cnt = len(fnmatch.filter(lst, rgb_prefix + '*'))
+        return cnt
+
+    # check RGB
+    video_dict = {}
+    for f in frame_folders:
+        cnt = count_files(f)
+        k = key_func(f)
+        if level == 2:
+            k = k.split("/")[0]
+
+        video_dict[f] = str(cnt) + " " + str(class_id[k])
+
+    return video_dict
+
+
+def fight_splits(video_dict, train_percent=0.8):
+    videos = list(video_dict.keys())
+
+    train_num = int(len(videos) * train_percent)
+
+    train_list = []
+    val_list = []
+
+    random.shuffle(videos)
+
+    for i in range(train_num):
+        train_list.append(videos[i] + " " + str(video_dict[videos[i]]))
+    for i in range(train_num, len(videos)):
+        val_list.append(videos[i] + " " + str(video_dict[videos[i]]))
+
+    print("train:", len(train_list), ",val:", len(val_list))
+
+    with open("fight_train_list.txt", "w") as f:
+        for item in train_list:
+            f.write(item + "\n")
+
+    with open("fight_val_list.txt", "w") as f:
+        for item in val_list:
+            f.write(item + "\n")
+
+
+if __name__ == "__main__":
+    frame_dir = sys.argv[1]  # "rawframes"
+    level = sys.argv[2]  # 2
+    train_percent = sys.argv[3]  # 0.8
+
+    if level == 2:
+
+        def key_func(x):
+            return '/'.join(x.split('/')[-2:])
+    else:
+
+        def key_func(x):
+            return x.split('/')[-1]
+
+    video_dict = get_list(frame_dir, key_func=key_func, level=level)
+    print("number:", len(video_dict))
+
+    fight_splits(video_dict, train_percent)