更换文档检测模型
This commit is contained in:
341
paddle_detection/ppdet/modeling/architectures/detr_ssod.py
Normal file
341
paddle_detection/ppdet/modeling/architectures/detr_ssod.py
Normal file
@@ -0,0 +1,341 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from ppdet.core.workspace import register, create, merge_config
|
||||
import paddle
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
import paddle.nn.functional as F
|
||||
from ppdet.core.workspace import register, create
|
||||
from ppdet.utils.logger import setup_logger
|
||||
from ppdet.modeling.ssod.utils import filter_invalid
|
||||
from .multi_stream_detector import MultiSteamDetector
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
__all__ = ['DETR_SSOD']
|
||||
__shared__ = ['num_classes']
|
||||
|
||||
|
||||
@register
|
||||
class DETR_SSOD(MultiSteamDetector):
|
||||
def __init__(self,
|
||||
teacher,
|
||||
student,
|
||||
train_cfg=None,
|
||||
test_cfg=None,
|
||||
RTDETRTransformer=None,
|
||||
num_classes=80):
|
||||
super(DETR_SSOD, self).__init__(
|
||||
dict(
|
||||
teacher=teacher, student=student),
|
||||
train_cfg=train_cfg,
|
||||
test_cfg=test_cfg, )
|
||||
self.ema_start_iters = train_cfg['ema_start_iters']
|
||||
self.momentum = 0.9996
|
||||
self.cls_thr = None
|
||||
self.cls_thr_ig = None
|
||||
self.num_classes = num_classes
|
||||
if train_cfg is not None:
|
||||
self.freeze("teacher")
|
||||
self.unsup_weight = self.train_cfg['unsup_weight']
|
||||
self.sup_weight = self.train_cfg['sup_weight']
|
||||
self._teacher = None
|
||||
self._student = None
|
||||
self._transformer = None
|
||||
|
||||
@classmethod
|
||||
def from_config(cls, cfg):
|
||||
teacher = create(cfg['teacher'])
|
||||
merge_config(cfg)
|
||||
student = create(cfg['student'])
|
||||
train_cfg = cfg['train_cfg']
|
||||
test_cfg = cfg['test_cfg']
|
||||
RTDETRTransformer = cfg['RTDETRTransformer']
|
||||
return {
|
||||
'teacher': teacher,
|
||||
'student': student,
|
||||
'train_cfg': train_cfg,
|
||||
'test_cfg': test_cfg,
|
||||
'RTDETRTransformer': RTDETRTransformer
|
||||
}
|
||||
|
||||
def forward_train(self, inputs, **kwargs):
|
||||
if isinstance(inputs, dict):
|
||||
iter_id = inputs['iter_id']
|
||||
elif isinstance(inputs, list):
|
||||
iter_id = inputs[-1]
|
||||
if iter_id == self.ema_start_iters:
|
||||
self.update_ema_model(momentum=0)
|
||||
elif iter_id > self.ema_start_iters:
|
||||
self.update_ema_model(momentum=self.momentum)
|
||||
if iter_id > self.ema_start_iters:
|
||||
data_sup_w, data_sup_s, data_unsup_w, data_unsup_s, _ = inputs
|
||||
|
||||
if data_sup_w['image'].shape != data_sup_s['image'].shape:
|
||||
data_sup_w, data_sup_s = align_weak_strong_shape(data_sup_w,
|
||||
data_sup_s)
|
||||
|
||||
if 'gt_bbox' in data_unsup_s.keys():
|
||||
del data_unsup_s['gt_bbox']
|
||||
if 'gt_class' in data_unsup_s.keys():
|
||||
del data_unsup_s['gt_class']
|
||||
if 'gt_class' in data_unsup_w.keys():
|
||||
del data_unsup_w['gt_class']
|
||||
if 'gt_bbox' in data_unsup_w.keys():
|
||||
del data_unsup_w['gt_bbox']
|
||||
for k, v in data_sup_s.items():
|
||||
if k in ['epoch_id']:
|
||||
continue
|
||||
elif k in ['gt_class', 'gt_bbox', 'is_crowd']:
|
||||
data_sup_s[k].extend(data_sup_w[k])
|
||||
else:
|
||||
data_sup_s[k] = paddle.concat([v, data_sup_w[k]])
|
||||
|
||||
loss = {}
|
||||
body_feats = self.student.backbone(data_sup_s)
|
||||
if self.student.neck is not None:
|
||||
body_feats = self.student.neck(body_feats)
|
||||
out_transformer = self.student.transformer(body_feats, None,
|
||||
data_sup_s)
|
||||
sup_loss = self.student.detr_head(out_transformer, body_feats,
|
||||
data_sup_s)
|
||||
sup_loss.update({
|
||||
'loss': paddle.add_n(
|
||||
[v for k, v in sup_loss.items() if 'log' not in k])
|
||||
})
|
||||
sup_loss = {"sup_" + k: v for k, v in sup_loss.items()}
|
||||
|
||||
loss.update(**sup_loss)
|
||||
unsup_loss = self.foward_unsup_train(data_unsup_w, data_unsup_s)
|
||||
unsup_loss.update({
|
||||
'loss': paddle.add_n(
|
||||
[v for k, v in unsup_loss.items() if 'log' not in k])
|
||||
})
|
||||
unsup_loss = {"unsup_" + k: v for k, v in unsup_loss.items()}
|
||||
unsup_loss.update({
|
||||
'loss': paddle.add_n(
|
||||
[v for k, v in unsup_loss.items() if 'log' not in k])
|
||||
})
|
||||
loss.update(**unsup_loss)
|
||||
loss.update({'loss': loss['sup_loss'] + loss['unsup_loss']})
|
||||
else:
|
||||
if iter_id == self.ema_start_iters:
|
||||
logger.info("start semi_supervised_traing")
|
||||
data_sup_w, data_sup_s, data_unsup_w, data_unsup_s, _ = inputs
|
||||
|
||||
if data_sup_w['image'].shape != data_sup_s['image'].shape:
|
||||
data_sup_w, data_sup_s = align_weak_strong_shape(data_sup_w,
|
||||
data_sup_s)
|
||||
for k, v in data_sup_s.items():
|
||||
if k in ['epoch_id']:
|
||||
continue
|
||||
elif k in ['gt_class', 'gt_bbox', 'is_crowd']:
|
||||
data_sup_s[k].extend(data_sup_w[k])
|
||||
else:
|
||||
data_sup_s[k] = paddle.concat([v, data_sup_w[k]])
|
||||
loss = {}
|
||||
sup_loss = self.student(data_sup_s)
|
||||
unsup_loss = {
|
||||
"unsup_" + k: v * paddle.to_tensor(0)
|
||||
for k, v in sup_loss.items()
|
||||
}
|
||||
sup_loss = {"sup_" + k: v for k, v in sup_loss.items()}
|
||||
loss.update(**sup_loss)
|
||||
unsup_loss.update({
|
||||
'loss': paddle.add_n(
|
||||
[v * 0 for k, v in sup_loss.items() if 'log' not in k])
|
||||
})
|
||||
unsup_loss = {"unsup_" + k: v * 0 for k, v in unsup_loss.items()}
|
||||
loss.update(**unsup_loss)
|
||||
loss.update({'loss': loss['sup_loss']})
|
||||
return loss
|
||||
|
||||
def foward_unsup_train(self, data_unsup_w, data_unsup_s):
|
||||
|
||||
with paddle.no_grad():
|
||||
body_feats = self.teacher.backbone(data_unsup_w)
|
||||
if self.teacher.neck is not None:
|
||||
body_feats = self.teacher.neck(body_feats, is_teacher=True)
|
||||
out_transformer = self.teacher.transformer(
|
||||
body_feats, None, data_unsup_w, is_teacher=True)
|
||||
preds = self.teacher.detr_head(out_transformer, body_feats)
|
||||
bbox, bbox_num = self.teacher.post_process_semi(preds)
|
||||
self.place = body_feats[0].place
|
||||
|
||||
proposal_bbox_list = bbox[:, -4:]
|
||||
proposal_bbox_list = proposal_bbox_list.split(
|
||||
tuple(np.array(bbox_num)), 0)
|
||||
|
||||
proposal_label_list = paddle.cast(bbox[:, :1], np.float32)
|
||||
proposal_label_list = proposal_label_list.split(
|
||||
tuple(np.array(bbox_num)), 0)
|
||||
proposal_score_list = paddle.cast(bbox[:, 1:self.num_classes + 1],
|
||||
np.float32)
|
||||
proposal_score_list = proposal_score_list.split(
|
||||
tuple(np.array(bbox_num)), 0)
|
||||
proposal_bbox_list = [
|
||||
paddle.to_tensor(
|
||||
p, place=self.place) for p in proposal_bbox_list
|
||||
]
|
||||
proposal_label_list = [
|
||||
paddle.to_tensor(
|
||||
p, place=self.place) for p in proposal_label_list
|
||||
]
|
||||
# filter invalid box roughly
|
||||
if isinstance(self.train_cfg['pseudo_label_initial_score_thr'], float):
|
||||
thr = self.train_cfg['pseudo_label_initial_score_thr']
|
||||
else:
|
||||
# TODO: use dynamic threshold
|
||||
raise NotImplementedError(
|
||||
"Dynamic Threshold is not implemented yet.")
|
||||
proposal_bbox_list, proposal_label_list, proposal_score_list = list(
|
||||
zip(* [
|
||||
filter_invalid(
|
||||
proposal[:, :4],
|
||||
proposal_label,
|
||||
proposal_score,
|
||||
thr=thr,
|
||||
min_size=self.train_cfg['min_pseduo_box_size'], )
|
||||
for proposal, proposal_label, proposal_score in
|
||||
zip(proposal_bbox_list, proposal_label_list,
|
||||
proposal_score_list)
|
||||
]))
|
||||
|
||||
teacher_bboxes = list(proposal_bbox_list)
|
||||
teacher_labels = proposal_label_list
|
||||
teacher_info = [teacher_bboxes, teacher_labels]
|
||||
student_unsup = data_unsup_s
|
||||
return self.compute_pseudo_label_loss(student_unsup, teacher_info,
|
||||
proposal_score_list)
|
||||
|
||||
def compute_pseudo_label_loss(self, student_unsup, teacher_info,
|
||||
proposal_score_list):
|
||||
|
||||
pseudo_bboxes = list(teacher_info[0])
|
||||
pseudo_labels = list(teacher_info[1])
|
||||
losses = dict()
|
||||
for i in range(len(pseudo_bboxes)):
|
||||
if pseudo_labels[i].shape[0] == 0:
|
||||
pseudo_bboxes[i] = paddle.zeros([0, 4]).numpy()
|
||||
pseudo_labels[i] = paddle.zeros([0, 1]).numpy()
|
||||
else:
|
||||
pseudo_bboxes[i] = pseudo_bboxes[i][:, :4].numpy()
|
||||
pseudo_labels[i] = pseudo_labels[i].numpy()
|
||||
for i in range(len(pseudo_bboxes)):
|
||||
pseudo_labels[i] = paddle.to_tensor(
|
||||
pseudo_labels[i], dtype=paddle.int32, place=self.place)
|
||||
pseudo_bboxes[i] = paddle.to_tensor(
|
||||
pseudo_bboxes[i], dtype=paddle.float32, place=self.place)
|
||||
student_unsup.update({
|
||||
'gt_bbox': pseudo_bboxes,
|
||||
'gt_class': pseudo_labels
|
||||
})
|
||||
pseudo_sum = 0
|
||||
for i in range(len(pseudo_bboxes)):
|
||||
pseudo_sum += pseudo_bboxes[i].sum()
|
||||
if pseudo_sum == 0: #input fake data when there are no pseudo labels
|
||||
pseudo_bboxes[0] = paddle.ones([1, 4]) - 0.5
|
||||
pseudo_labels[0] = paddle.ones([1, 1]).astype('int32')
|
||||
student_unsup.update({
|
||||
'gt_bbox': pseudo_bboxes,
|
||||
'gt_class': pseudo_labels
|
||||
})
|
||||
body_feats = self.student.backbone(student_unsup)
|
||||
if self.student.neck is not None:
|
||||
body_feats = self.student.neck(body_feats)
|
||||
out_transformer = self.student.transformer(body_feats, None,
|
||||
student_unsup)
|
||||
losses = self.student.detr_head(out_transformer, body_feats,
|
||||
student_unsup)
|
||||
for n, v in losses.items():
|
||||
losses[n] = v * 0
|
||||
else:
|
||||
gt_bbox = []
|
||||
gt_class = []
|
||||
images = []
|
||||
proposal_score = []
|
||||
for i in range(len(pseudo_bboxes)):
|
||||
if pseudo_labels[i].shape[0] == 0:
|
||||
continue
|
||||
else:
|
||||
proposal_score.append(proposal_score_list[i].max(-1)
|
||||
.unsqueeze(-1))
|
||||
gt_class.append(pseudo_labels[i])
|
||||
gt_bbox.append(pseudo_bboxes[i])
|
||||
images.append(student_unsup['image'][i])
|
||||
images = paddle.stack(images)
|
||||
student_unsup.update({
|
||||
'image': images,
|
||||
'gt_bbox': gt_bbox,
|
||||
'gt_class': gt_class
|
||||
})
|
||||
body_feats = self.student.backbone(student_unsup)
|
||||
if self.student.neck is not None:
|
||||
body_feats = self.student.neck(body_feats)
|
||||
out_transformer = self.student.transformer(body_feats, None,
|
||||
student_unsup)
|
||||
student_unsup.update({'gt_score': proposal_score})
|
||||
losses = self.student.detr_head(out_transformer, body_feats,
|
||||
student_unsup)
|
||||
return losses
|
||||
|
||||
|
||||
def box_cxcywh_to_xyxy(x):
|
||||
x_c, y_c, w, h = x.unbind(-1)
|
||||
b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (x_c + 0.5 * w), (y_c + 0.5 * h)]
|
||||
return paddle.stack(b, axis=-1)
|
||||
|
||||
|
||||
def box_xyxy_to_cxcywh(x):
|
||||
x0, y0, x1, y1 = x.unbind(-1)
|
||||
b = [(x0 + x1) / 2, (y0 + y1) / 2, (x1 - x0), (y1 - y0)]
|
||||
return paddle.stack(b, axis=-1)
|
||||
|
||||
|
||||
def get_size_with_aspect_ratio(image_size, size, max_size=None):
|
||||
w, h = image_size
|
||||
if max_size is not None:
|
||||
min_original_size = float(min((w, h)))
|
||||
max_original_size = float(max((w, h)))
|
||||
if max_original_size / min_original_size * size > max_size:
|
||||
size = int(round(max_size * min_original_size / max_original_size))
|
||||
|
||||
if (w <= h and w == size) or (h <= w and h == size):
|
||||
return (w, h)
|
||||
|
||||
if w < h:
|
||||
ow = size
|
||||
oh = int(size * h / w)
|
||||
else:
|
||||
oh = size
|
||||
ow = int(size * w / h)
|
||||
|
||||
return (ow, oh)
|
||||
|
||||
|
||||
def align_weak_strong_shape(data_weak, data_strong):
|
||||
shape_x = data_strong['image'].shape[2]
|
||||
shape_y = data_strong['image'].shape[3]
|
||||
|
||||
target_size = [shape_x, shape_y]
|
||||
data_weak['image'] = F.interpolate(
|
||||
data_weak['image'],
|
||||
size=target_size,
|
||||
mode='bilinear',
|
||||
align_corners=False)
|
||||
return data_weak, data_strong
|
||||
Reference in New Issue
Block a user