文档检测

This commit is contained in:
2024-08-20 13:18:45 +08:00
parent e6891257b9
commit 299b762cad
59 changed files with 4966 additions and 0 deletions

View File

@@ -0,0 +1,5 @@
from .coco import COCO
datasets = {
"COCO": COCO
}

View File

@@ -0,0 +1,74 @@
import os
import numpy as np
class BASE(object):
def __init__(self):
self._split = None
self._db_inds = []
self._image_ids = []
self._mean = np.zeros((3,), dtype=np.float32)
self._std = np.ones((3,), dtype=np.float32)
self._eig_val = np.ones((3,), dtype=np.float32)
self._eig_vec = np.zeros((3, 3), dtype=np.float32)
self._configs = {}
self._configs["data_aug"] = True
self._data_rng = None
@property
def configs(self):
return self._configs
@property
def mean(self):
return self._mean
@property
def std(self):
return self._std
@property
def eig_val(self):
return self._eig_val
@property
def eig_vec(self):
return self._eig_vec
@property
def db_inds(self):
return self._db_inds
@property
def split(self):
return self._split
def update_config(self, new):
for key in new:
if key in self._configs:
self._configs[key] = new[key]
def image_ids(self, ind):
return self._image_ids[ind]
def image_path(self, ind):
pass
def write_result(self, ind, all_bboxes, all_scores):
pass
def evaluate(self, name):
pass
def shuffle_inds(self, quiet=False):
if self._data_rng is None:
self._data_rng = np.random.RandomState(os.getpid())
if not quiet:
print("shuffling indices...")
rand_perm = self._data_rng.permutation(len(self._db_inds))
self._db_inds = self._db_inds[rand_perm]

View File

@@ -0,0 +1,169 @@
import os
import numpy as np
from .detection import DETECTION
# COCO bounding boxes are 0-indexed
class COCO(DETECTION):
def __init__(self, db_config, split=None, sys_config=None):
assert split is None or sys_config is not None
super(COCO, self).__init__(db_config)
self._mean = np.array([0.40789654, 0.44719302, 0.47026115], dtype=np.float32)
self._std = np.array([0.28863828, 0.27408164, 0.27809835], dtype=np.float32)
self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571], dtype=np.float32)
self._eig_vec = np.array([
[-0.58752847, -0.69563484, 0.41340352],
[-0.5832747, 0.00994535, -0.81221408],
[-0.56089297, 0.71832671, 0.41158938]
], dtype=np.float32)
self._coco_cls_ids = [
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 27, 28, 31, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
58, 59, 60, 61, 62, 63, 64, 65, 67, 70,
72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 84, 85, 86, 87, 88, 89, 90
]
self._coco_cls_names = [
'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench',
'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
'snowboard', 'sports ball', 'kite', 'baseball bat',
'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
'donut', 'cake', 'chair', 'couch', 'potted plant',
'bed', 'dining table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book',
'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
'toothbrush'
]
self._cls2coco = {ind + 1: coco_id for ind, coco_id in enumerate(self._coco_cls_ids)}
self._coco2cls = {coco_id: cls_id for cls_id, coco_id in self._cls2coco.items()}
self._coco2name = {cls_id: cls_name for cls_id, cls_name in zip(self._coco_cls_ids, self._coco_cls_names)}
self._name2coco = {cls_name: cls_id for cls_name, cls_id in self._coco2name.items()}
if split is not None:
coco_dir = os.path.join(sys_config.data_dir, "coco")
self._split = {
"trainval": "trainval2014",
"minival": "minival2014",
"testdev": "testdev2017"
}[split]
self._data_dir = os.path.join(coco_dir, "images", self._split)
self._anno_file = os.path.join(coco_dir, "annotations", "instances_{}.json".format(self._split))
self._detections, self._eval_ids = self._load_coco_annos()
self._image_ids = list(self._detections.keys())
self._db_inds = np.arange(len(self._image_ids))
def _load_coco_annos(self):
from pycocotools.coco import COCO
coco = COCO(self._anno_file)
self._coco = coco
class_ids = coco.getCatIds()
image_ids = coco.getImgIds()
eval_ids = {}
detections = {}
for image_id in image_ids:
image = coco.loadImgs(image_id)[0]
dets = []
eval_ids[image["file_name"]] = image_id
for class_id in class_ids:
annotation_ids = coco.getAnnIds(imgIds=image["id"], catIds=class_id)
annotations = coco.loadAnns(annotation_ids)
category = self._coco2cls[class_id]
for annotation in annotations:
det = annotation["bbox"] + [category]
det[2] += det[0]
det[3] += det[1]
dets.append(det)
file_name = image["file_name"]
if len(dets) == 0:
detections[file_name] = np.zeros((0, 5), dtype=np.float32)
else:
detections[file_name] = np.array(dets, dtype=np.float32)
return detections, eval_ids
def image_path(self, ind):
if self._data_dir is None:
raise ValueError("Data directory is not set")
db_ind = self._db_inds[ind]
file_name = self._image_ids[db_ind]
return os.path.join(self._data_dir, file_name)
def detections(self, ind):
db_ind = self._db_inds[ind]
file_name = self._image_ids[db_ind]
return self._detections[file_name].copy()
def cls2name(self, cls):
coco = self._cls2coco[cls]
return self._coco2name[coco]
def _to_float(self, x):
return float("{:.2f}".format(x))
def convert_to_coco(self, all_bboxes):
detections = []
for image_id in all_bboxes:
coco_id = self._eval_ids[image_id]
for cls_ind in all_bboxes[image_id]:
category_id = self._cls2coco[cls_ind]
for bbox in all_bboxes[image_id][cls_ind]:
bbox[2] -= bbox[0]
bbox[3] -= bbox[1]
score = bbox[4]
bbox = list(map(self._to_float, bbox[0:4]))
detection = {
"image_id": coco_id,
"category_id": category_id,
"bbox": bbox,
"score": float("{:.2f}".format(score))
}
detections.append(detection)
return detections
def evaluate(self, result_json, cls_ids, image_ids):
from pycocotools.cocoeval import COCOeval
if self._split == "testdev":
return None
coco = self._coco
eval_ids = [self._eval_ids[image_id] for image_id in image_ids]
cat_ids = [self._cls2coco[cls_id] for cls_id in cls_ids]
coco_dets = coco.loadRes(result_json)
coco_eval = COCOeval(coco, coco_dets, "bbox")
coco_eval.params.imgIds = eval_ids
coco_eval.params.catIds = cat_ids
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
return coco_eval.stats[0], coco_eval.stats[12:]

View File

@@ -0,0 +1,71 @@
import numpy as np
from .base import BASE
class DETECTION(BASE):
def __init__(self, db_config):
super(DETECTION, self).__init__()
# Configs for training
self._configs["categories"] = 80
self._configs["rand_scales"] = [1]
self._configs["rand_scale_min"] = 0.8
self._configs["rand_scale_max"] = 1.4
self._configs["rand_scale_step"] = 0.2
# Configs for both training and testing
self._configs["input_size"] = [383, 383]
self._configs["output_sizes"] = [[96, 96], [48, 48], [24, 24], [12, 12]]
self._configs["score_threshold"] = 0.05
self._configs["nms_threshold"] = 0.7
self._configs["max_per_set"] = 40
self._configs["max_per_image"] = 100
self._configs["top_k"] = 20
self._configs["ae_threshold"] = 1
self._configs["nms_kernel"] = 3
self._configs["num_dets"] = 1000
self._configs["nms_algorithm"] = "exp_soft_nms"
self._configs["weight_exp"] = 8
self._configs["merge_bbox"] = False
self._configs["data_aug"] = True
self._configs["lighting"] = True
self._configs["border"] = 64
self._configs["gaussian_bump"] = False
self._configs["gaussian_iou"] = 0.7
self._configs["gaussian_radius"] = -1
self._configs["rand_crop"] = False
self._configs["rand_color"] = False
self._configs["rand_center"] = True
self._configs["init_sizes"] = [192, 255]
self._configs["view_sizes"] = []
self._configs["min_scale"] = 16
self._configs["max_scale"] = 32
self._configs["att_sizes"] = [[16, 16], [32, 32], [64, 64]]
self._configs["att_ranges"] = [[96, 256], [32, 96], [0, 32]]
self._configs["att_ratios"] = [16, 8, 4]
self._configs["att_scales"] = [1, 1.5, 2]
self._configs["att_thresholds"] = [0.3, 0.3, 0.3, 0.3]
self._configs["att_nms_ks"] = [3, 3, 3]
self._configs["att_max_crops"] = 8
self._configs["ref_dets"] = True
# Configs for testing
self._configs["test_scales"] = [1]
self._configs["test_flipped"] = True
self.update_config(db_config)
if self._configs["rand_scales"] is None:
self._configs["rand_scales"] = np.arange(
self._configs["rand_scale_min"],
self._configs["rand_scale_max"],
self._configs["rand_scale_step"]
)