更换文档检测模型

This commit is contained in:
2024-08-27 14:42:45 +08:00
parent aea6f19951
commit 1514e09c40
2072 changed files with 254336 additions and 4967 deletions

View File

@@ -0,0 +1,110 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import distill_loss
from . import distill_model
from . import ofa
from . import prune
from . import quant
from . import unstructured_prune
from .distill_loss import *
from .distill_model import *
from .ofa import *
from .prune import *
from .quant import *
from .unstructured_prune import *
import yaml
from ppdet.core.workspace import load_config
from ppdet.utils.checkpoint import load_pretrain_weight
def build_slim_model(cfg, slim_cfg, mode='train'):
with open(slim_cfg) as f:
slim_load_cfg = yaml.load(f, Loader=yaml.Loader)
if mode != 'train' and slim_load_cfg['slim'] == 'Distill':
return cfg
if slim_load_cfg['slim'] == 'Distill':
if "slim_method" in slim_load_cfg and slim_load_cfg[
'slim_method'] == "FGD":
model = FGDDistillModel(cfg, slim_cfg)
elif "slim_method" in slim_load_cfg and slim_load_cfg[
'slim_method'] == "LD":
model = LDDistillModel(cfg, slim_cfg)
elif "slim_method" in slim_load_cfg and slim_load_cfg[
'slim_method'] == "CWD":
model = CWDDistillModel(cfg, slim_cfg)
elif "slim_method" in slim_load_cfg and slim_load_cfg[
'slim_method'] == "PPYOLOEDistill":
model = PPYOLOEDistillModel(cfg, slim_cfg)
else:
# common distillation model
model = DistillModel(cfg, slim_cfg)
cfg['model'] = model
cfg['slim_type'] = cfg.slim
elif slim_load_cfg['slim'] == 'OFA':
load_config(slim_cfg)
model = create(cfg.architecture)
load_pretrain_weight(model, cfg.weights)
slim = create(cfg.slim)
cfg['slim'] = slim
cfg['model'] = slim(model, model.state_dict())
cfg['slim_type'] = cfg.slim
elif slim_load_cfg['slim'] == 'DistillPrune':
if mode == 'train':
model = DistillModel(cfg, slim_cfg)
pruner = create(cfg.pruner)
pruner(model.student_model)
else:
model = create(cfg.architecture)
weights = cfg.weights
load_config(slim_cfg)
pruner = create(cfg.pruner)
model = pruner(model)
load_pretrain_weight(model, weights)
cfg['model'] = model
cfg['slim_type'] = cfg.slim
elif slim_load_cfg['slim'] == 'PTQ':
model = create(cfg.architecture)
load_config(slim_cfg)
load_pretrain_weight(model, cfg.weights)
slim = create(cfg.slim)
cfg['slim_type'] = cfg.slim
cfg['slim'] = slim
cfg['model'] = slim(model)
elif slim_load_cfg['slim'] == 'UnstructuredPruner':
load_config(slim_cfg)
slim = create(cfg.slim)
cfg['slim_type'] = cfg.slim
cfg['slim'] = slim
cfg['unstructured_prune'] = True
else:
load_config(slim_cfg)
model = create(cfg.architecture)
if mode == 'train':
load_pretrain_weight(model, cfg.pretrain_weights)
slim = create(cfg.slim)
cfg['slim_type'] = cfg.slim
# TODO: fix quant export model in framework.
if mode == 'test' and 'QAT' in slim_load_cfg['slim']:
slim.quant_config['activation_preprocess_type'] = None
cfg['model'] = slim(model)
cfg['slim'] = slim
if mode != 'train':
load_pretrain_weight(cfg['model'], cfg.weights)
return cfg

View File

@@ -0,0 +1,919 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from ppdet.core.workspace import register
from ppdet.modeling import ops
from ppdet.modeling.losses.iou_loss import GIoULoss
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
__all__ = [
'DistillYOLOv3Loss',
'KnowledgeDistillationKLDivLoss',
'DistillPPYOLOELoss',
'FGDFeatureLoss',
'CWDFeatureLoss',
'PKDFeatureLoss',
'MGDFeatureLoss',
]
def parameter_init(mode="kaiming", value=0.):
if mode == "kaiming":
weight_attr = paddle.nn.initializer.KaimingUniform()
elif mode == "constant":
weight_attr = paddle.nn.initializer.Constant(value=value)
else:
weight_attr = paddle.nn.initializer.KaimingUniform()
weight_init = ParamAttr(initializer=weight_attr)
return weight_init
def feature_norm(feat):
# Normalize the feature maps to have zero mean and unit variances.
assert len(feat.shape) == 4
N, C, H, W = feat.shape
feat = feat.transpose([1, 0, 2, 3]).reshape([C, -1])
mean = feat.mean(axis=-1, keepdim=True)
std = feat.std(axis=-1, keepdim=True)
feat = (feat - mean) / (std + 1e-6)
return feat.reshape([C, N, H, W]).transpose([1, 0, 2, 3])
@register
class DistillYOLOv3Loss(nn.Layer):
def __init__(self, weight=1000):
super(DistillYOLOv3Loss, self).__init__()
self.loss_weight = weight
def obj_weighted_reg(self, sx, sy, sw, sh, tx, ty, tw, th, tobj):
loss_x = ops.sigmoid_cross_entropy_with_logits(sx, F.sigmoid(tx))
loss_y = ops.sigmoid_cross_entropy_with_logits(sy, F.sigmoid(ty))
loss_w = paddle.abs(sw - tw)
loss_h = paddle.abs(sh - th)
loss = paddle.add_n([loss_x, loss_y, loss_w, loss_h])
weighted_loss = paddle.mean(loss * F.sigmoid(tobj))
return weighted_loss
def obj_weighted_cls(self, scls, tcls, tobj):
loss = ops.sigmoid_cross_entropy_with_logits(scls, F.sigmoid(tcls))
weighted_loss = paddle.mean(paddle.multiply(loss, F.sigmoid(tobj)))
return weighted_loss
def obj_loss(self, sobj, tobj):
obj_mask = paddle.cast(tobj > 0., dtype="float32")
obj_mask.stop_gradient = True
loss = paddle.mean(
ops.sigmoid_cross_entropy_with_logits(sobj, obj_mask))
return loss
def forward(self, teacher_model, student_model):
teacher_distill_pairs = teacher_model.yolo_head.loss.distill_pairs
student_distill_pairs = student_model.yolo_head.loss.distill_pairs
distill_reg_loss, distill_cls_loss, distill_obj_loss = [], [], []
for s_pair, t_pair in zip(student_distill_pairs, teacher_distill_pairs):
distill_reg_loss.append(
self.obj_weighted_reg(s_pair[0], s_pair[1], s_pair[2], s_pair[
3], t_pair[0], t_pair[1], t_pair[2], t_pair[3], t_pair[4]))
distill_cls_loss.append(
self.obj_weighted_cls(s_pair[5], t_pair[5], t_pair[4]))
distill_obj_loss.append(self.obj_loss(s_pair[4], t_pair[4]))
distill_reg_loss = paddle.add_n(distill_reg_loss)
distill_cls_loss = paddle.add_n(distill_cls_loss)
distill_obj_loss = paddle.add_n(distill_obj_loss)
loss = (distill_reg_loss + distill_cls_loss + distill_obj_loss
) * self.loss_weight
return loss
@register
class KnowledgeDistillationKLDivLoss(nn.Layer):
"""Loss function for knowledge distilling using KL divergence.
Args:
reduction (str): Options are `'none'`, `'mean'` and `'sum'`.
loss_weight (float): Loss weight of current loss.
T (int): Temperature for distillation.
"""
def __init__(self, reduction='mean', loss_weight=1.0, T=10):
super(KnowledgeDistillationKLDivLoss, self).__init__()
assert reduction in ('none', 'mean', 'sum')
assert T >= 1
self.reduction = reduction
self.loss_weight = loss_weight
self.T = T
def knowledge_distillation_kl_div_loss(self,
pred,
soft_label,
T,
detach_target=True):
r"""Loss function for knowledge distilling using KL divergence.
Args:
pred (Tensor): Predicted logits with shape (N, n + 1).
soft_label (Tensor): Target logits with shape (N, N + 1).
T (int): Temperature for distillation.
detach_target (bool): Remove soft_label from automatic differentiation
"""
assert pred.shape == soft_label.shape
target = F.softmax(soft_label / T, axis=1)
if detach_target:
target = target.detach()
kd_loss = F.kl_div(
F.log_softmax(
pred / T, axis=1), target, reduction='none').mean(1) * (T * T)
return kd_loss
def forward(self,
pred,
soft_label,
weight=None,
avg_factor=None,
reduction_override=None):
"""Forward function.
Args:
pred (Tensor): Predicted logits with shape (N, n + 1).
soft_label (Tensor): Target logits with shape (N, N + 1).
weight (Tensor, optional): The weight of loss for each
prediction. Defaults to None.
avg_factor (int, optional): Average factor that is used to average
the loss. Defaults to None.
reduction_override (str, optional): The reduction method used to
override the original reduction method of the loss.
Defaults to None.
"""
assert reduction_override in (None, 'none', 'mean', 'sum')
reduction = (reduction_override
if reduction_override else self.reduction)
loss_kd_out = self.knowledge_distillation_kl_div_loss(
pred, soft_label, T=self.T)
if weight is not None:
loss_kd_out = weight * loss_kd_out
if avg_factor is None:
if reduction == 'none':
loss = loss_kd_out
elif reduction == 'mean':
loss = loss_kd_out.mean()
elif reduction == 'sum':
loss = loss_kd_out.sum()
else:
# if reduction is mean, then average the loss by avg_factor
if reduction == 'mean':
loss = loss_kd_out.sum() / avg_factor
# if reduction is 'none', then do nothing, otherwise raise an error
elif reduction != 'none':
raise ValueError(
'avg_factor can not be used with reduction="sum"')
loss_kd = self.loss_weight * loss
return loss_kd
@register
class DistillPPYOLOELoss(nn.Layer):
def __init__(
self,
loss_weight={'logits': 4.0,
'feat': 1.0},
logits_distill=True,
logits_loss_weight={'class': 1.0,
'iou': 2.5,
'dfl': 0.5},
logits_ld_distill=False,
logits_ld_params={'weight': 20000,
'T': 10},
feat_distill=True,
feat_distiller='fgd',
feat_distill_place='neck_feats',
teacher_width_mult=1.0, # L
student_width_mult=0.75, # M
feat_out_channels=[768, 384, 192]):
super(DistillPPYOLOELoss, self).__init__()
self.loss_weight_logits = loss_weight['logits']
self.loss_weight_feat = loss_weight['feat']
self.logits_distill = logits_distill
self.logits_ld_distill = logits_ld_distill
self.feat_distill = feat_distill
if logits_distill and self.loss_weight_logits > 0:
self.bbox_loss_weight = logits_loss_weight['iou']
self.dfl_loss_weight = logits_loss_weight['dfl']
self.qfl_loss_weight = logits_loss_weight['class']
self.loss_bbox = GIoULoss()
if logits_ld_distill:
self.loss_kd = KnowledgeDistillationKLDivLoss(
loss_weight=logits_ld_params['weight'], T=logits_ld_params['T'])
if feat_distill and self.loss_weight_feat > 0:
assert feat_distiller in ['cwd', 'fgd', 'pkd', 'mgd', 'mimic']
assert feat_distill_place in ['backbone_feats', 'neck_feats']
self.feat_distill_place = feat_distill_place
self.t_channel_list = [
int(c * teacher_width_mult) for c in feat_out_channels
]
self.s_channel_list = [
int(c * student_width_mult) for c in feat_out_channels
]
self.distill_feat_loss_modules = []
for i in range(len(feat_out_channels)):
if feat_distiller == 'cwd':
feat_loss_module = CWDFeatureLoss(
student_channels=self.s_channel_list[i],
teacher_channels=self.t_channel_list[i],
normalize=True)
elif feat_distiller == 'fgd':
feat_loss_module = FGDFeatureLoss(
student_channels=self.s_channel_list[i],
teacher_channels=self.t_channel_list[i],
normalize=True,
alpha_fgd=0.00001,
beta_fgd=0.000005,
gamma_fgd=0.00001,
lambda_fgd=0.00000005)
elif feat_distiller == 'pkd':
feat_loss_module = PKDFeatureLoss(
student_channels=self.s_channel_list[i],
teacher_channels=self.t_channel_list[i],
normalize=True,
resize_stu=True)
elif feat_distiller == 'mgd':
feat_loss_module = MGDFeatureLoss(
student_channels=self.s_channel_list[i],
teacher_channels=self.t_channel_list[i],
normalize=True,
loss_func='ssim')
elif feat_distiller == 'mimic':
feat_loss_module = MimicFeatureLoss(
student_channels=self.s_channel_list[i],
teacher_channels=self.t_channel_list[i],
normalize=True)
else:
raise ValueError
self.distill_feat_loss_modules.append(feat_loss_module)
def quality_focal_loss(self,
pred_logits,
soft_target_logits,
beta=2.0,
use_sigmoid=False,
num_total_pos=None):
if use_sigmoid:
func = F.binary_cross_entropy_with_logits
soft_target = F.sigmoid(soft_target_logits)
pred_sigmoid = F.sigmoid(pred_logits)
preds = pred_logits
else:
func = F.binary_cross_entropy
soft_target = soft_target_logits
pred_sigmoid = pred_logits
preds = pred_sigmoid
scale_factor = pred_sigmoid - soft_target
loss = func(
preds, soft_target, reduction='none') * scale_factor.abs().pow(beta)
loss = loss.sum(1)
if num_total_pos is not None:
loss = loss.sum() / num_total_pos
else:
loss = loss.mean()
return loss
def bbox_loss(self, s_bbox, t_bbox, weight_targets=None):
# [x,y,w,h]
if weight_targets is not None:
loss = paddle.sum(self.loss_bbox(s_bbox, t_bbox) * weight_targets)
avg_factor = weight_targets.sum()
loss = loss / avg_factor
else:
loss = paddle.mean(self.loss_bbox(s_bbox, t_bbox))
return loss
def distribution_focal_loss(self,
pred_corners,
target_corners,
weight_targets=None):
target_corners_label = F.softmax(target_corners, axis=-1)
loss_dfl = F.cross_entropy(
pred_corners,
target_corners_label,
soft_label=True,
reduction='none')
loss_dfl = loss_dfl.sum(1)
if weight_targets is not None:
loss_dfl = loss_dfl * (weight_targets.expand([-1, 4]).reshape([-1]))
loss_dfl = loss_dfl.sum(-1) / weight_targets.sum()
else:
loss_dfl = loss_dfl.mean(-1)
return loss_dfl / 4.0 # 4 direction
def main_kd(self, mask_positive, pred_scores, soft_cls, num_classes):
num_pos = mask_positive.sum()
if num_pos > 0:
cls_mask = mask_positive.unsqueeze(-1).tile([1, 1, num_classes])
pred_scores_pos = paddle.masked_select(
pred_scores, cls_mask).reshape([-1, num_classes])
soft_cls_pos = paddle.masked_select(
soft_cls, cls_mask).reshape([-1, num_classes])
loss_kd = self.loss_kd(
pred_scores_pos, soft_cls_pos, avg_factor=num_pos)
else:
loss_kd = paddle.zeros([1])
return loss_kd
def forward(self, teacher_model, student_model):
teacher_distill_pairs = teacher_model.yolo_head.distill_pairs
student_distill_pairs = student_model.yolo_head.distill_pairs
if self.logits_distill and self.loss_weight_logits > 0:
distill_bbox_loss, distill_dfl_loss, distill_cls_loss = [], [], []
distill_cls_loss.append(
self.quality_focal_loss(
student_distill_pairs['pred_cls_scores'].reshape(
(-1, student_distill_pairs['pred_cls_scores'].shape[-1]
)),
teacher_distill_pairs['pred_cls_scores'].detach().reshape(
(-1, teacher_distill_pairs['pred_cls_scores'].shape[-1]
)),
num_total_pos=student_distill_pairs['pos_num'],
use_sigmoid=False))
distill_bbox_loss.append(
self.bbox_loss(student_distill_pairs['pred_bboxes_pos'],
teacher_distill_pairs['pred_bboxes_pos'].detach(),
weight_targets=student_distill_pairs['bbox_weight']
) if 'pred_bboxes_pos' in student_distill_pairs and \
'pred_bboxes_pos' in teacher_distill_pairs and \
'bbox_weight' in student_distill_pairs
else paddle.zeros([1]))
distill_dfl_loss.append(
self.distribution_focal_loss(
student_distill_pairs['pred_dist_pos'].reshape((-1, student_distill_pairs['pred_dist_pos'].shape[-1])),
teacher_distill_pairs['pred_dist_pos'].detach().reshape((-1, teacher_distill_pairs['pred_dist_pos'].shape[-1])), \
weight_targets=student_distill_pairs['bbox_weight']
) if 'pred_dist_pos' in student_distill_pairs and \
'pred_dist_pos' in teacher_distill_pairs and \
'bbox_weight' in student_distill_pairs
else paddle.zeros([1]))
distill_cls_loss = paddle.add_n(distill_cls_loss)
distill_bbox_loss = paddle.add_n(distill_bbox_loss)
distill_dfl_loss = paddle.add_n(distill_dfl_loss)
logits_loss = distill_bbox_loss * self.bbox_loss_weight + distill_cls_loss * self.qfl_loss_weight + distill_dfl_loss * self.dfl_loss_weight
if self.logits_ld_distill:
loss_kd = self.main_kd(
student_distill_pairs['mask_positive_select'],
student_distill_pairs['pred_cls_scores'],
teacher_distill_pairs['pred_cls_scores'],
student_model.yolo_head.num_classes, )
logits_loss += loss_kd
else:
logits_loss = paddle.zeros([1])
if self.feat_distill and self.loss_weight_feat > 0:
feat_loss_list = []
inputs = student_model.inputs
assert 'gt_bbox' in inputs
assert self.feat_distill_place in student_distill_pairs
assert self.feat_distill_place in teacher_distill_pairs
stu_feats = student_distill_pairs[self.feat_distill_place]
tea_feats = teacher_distill_pairs[self.feat_distill_place]
for i, loss_module in enumerate(self.distill_feat_loss_modules):
feat_loss_list.append(
loss_module(stu_feats[i], tea_feats[i], inputs))
feat_loss = paddle.add_n(feat_loss_list)
else:
feat_loss = paddle.zeros([1])
student_model.yolo_head.distill_pairs.clear()
teacher_model.yolo_head.distill_pairs.clear()
return logits_loss * self.loss_weight_logits, feat_loss * self.loss_weight_feat
@register
class CWDFeatureLoss(nn.Layer):
def __init__(self,
student_channels,
teacher_channels,
normalize=False,
tau=1.0,
weight=1.0):
super(CWDFeatureLoss, self).__init__()
self.normalize = normalize
self.tau = tau
self.loss_weight = weight
if student_channels != teacher_channels:
self.align = nn.Conv2D(
student_channels,
teacher_channels,
kernel_size=1,
stride=1,
padding=0)
else:
self.align = None
def distill_softmax(self, x, tau):
_, _, w, h = paddle.shape(x)
x = paddle.reshape(x, [-1, w * h])
x /= tau
return F.softmax(x, axis=1)
def forward(self, preds_s, preds_t, inputs=None):
assert preds_s.shape[-2:] == preds_t.shape[-2:]
N, C, H, W = preds_s.shape
eps = 1e-5
if self.align is not None:
preds_s = self.align(preds_s)
if self.normalize:
preds_s = feature_norm(preds_s)
preds_t = feature_norm(preds_t)
softmax_pred_s = self.distill_softmax(preds_s, self.tau)
softmax_pred_t = self.distill_softmax(preds_t, self.tau)
loss = paddle.sum(-softmax_pred_t * paddle.log(eps + softmax_pred_s) +
softmax_pred_t * paddle.log(eps + softmax_pred_t))
return self.loss_weight * loss / (C * N)
@register
class FGDFeatureLoss(nn.Layer):
"""
Focal and Global Knowledge Distillation for Detectors
The code is reference from https://github.com/yzd-v/FGD/blob/master/mmdet/distillation/losses/fgd.py
Args:
student_channels (int): The number of channels in the student's FPN feature map. Default to 256.
teacher_channels (int): The number of channels in the teacher's FPN feature map. Default to 256.
normalize (bool): Whether to normalize the feature maps.
temp (float, optional): The temperature coefficient. Defaults to 0.5.
alpha_fgd (float, optional): The weight of fg_loss. Defaults to 0.001
beta_fgd (float, optional): The weight of bg_loss. Defaults to 0.0005
gamma_fgd (float, optional): The weight of mask_loss. Defaults to 0.001
lambda_fgd (float, optional): The weight of relation_loss. Defaults to 0.000005
"""
def __init__(self,
student_channels,
teacher_channels,
normalize=False,
loss_weight=1.0,
temp=0.5,
alpha_fgd=0.001,
beta_fgd=0.0005,
gamma_fgd=0.001,
lambda_fgd=0.000005):
super(FGDFeatureLoss, self).__init__()
self.normalize = normalize
self.loss_weight = loss_weight
self.temp = temp
self.alpha_fgd = alpha_fgd
self.beta_fgd = beta_fgd
self.gamma_fgd = gamma_fgd
self.lambda_fgd = lambda_fgd
kaiming_init = parameter_init("kaiming")
zeros_init = parameter_init("constant", 0.0)
if student_channels != teacher_channels:
self.align = nn.Conv2D(
student_channels,
teacher_channels,
kernel_size=1,
stride=1,
padding=0,
weight_attr=kaiming_init)
student_channels = teacher_channels
else:
self.align = None
self.conv_mask_s = nn.Conv2D(
student_channels, 1, kernel_size=1, weight_attr=kaiming_init)
self.conv_mask_t = nn.Conv2D(
teacher_channels, 1, kernel_size=1, weight_attr=kaiming_init)
self.stu_conv_block = nn.Sequential(
nn.Conv2D(
student_channels,
student_channels // 2,
kernel_size=1,
weight_attr=zeros_init),
nn.LayerNorm([student_channels // 2, 1, 1]),
nn.ReLU(),
nn.Conv2D(
student_channels // 2,
student_channels,
kernel_size=1,
weight_attr=zeros_init))
self.tea_conv_block = nn.Sequential(
nn.Conv2D(
teacher_channels,
teacher_channels // 2,
kernel_size=1,
weight_attr=zeros_init),
nn.LayerNorm([teacher_channels // 2, 1, 1]),
nn.ReLU(),
nn.Conv2D(
teacher_channels // 2,
teacher_channels,
kernel_size=1,
weight_attr=zeros_init))
def spatial_channel_attention(self, x, t=0.5):
shape = paddle.shape(x)
N, C, H, W = shape
_f = paddle.abs(x)
spatial_map = paddle.reshape(
paddle.mean(
_f, axis=1, keepdim=True) / t, [N, -1])
spatial_map = F.softmax(spatial_map, axis=1, dtype="float32") * H * W
spatial_att = paddle.reshape(spatial_map, [N, H, W])
channel_map = paddle.mean(
paddle.mean(
_f, axis=2, keepdim=False), axis=2, keepdim=False)
channel_att = F.softmax(channel_map / t, axis=1, dtype="float32") * C
return [spatial_att, channel_att]
def spatial_pool(self, x, mode="teacher"):
batch, channel, width, height = x.shape
x_copy = x
x_copy = paddle.reshape(x_copy, [batch, channel, height * width])
x_copy = x_copy.unsqueeze(1)
if mode.lower() == "student":
context_mask = self.conv_mask_s(x)
else:
context_mask = self.conv_mask_t(x)
context_mask = paddle.reshape(context_mask, [batch, 1, height * width])
context_mask = F.softmax(context_mask, axis=2)
context_mask = context_mask.unsqueeze(-1)
context = paddle.matmul(x_copy, context_mask)
context = paddle.reshape(context, [batch, channel, 1, 1])
return context
def mask_loss(self, stu_channel_att, tea_channel_att, stu_spatial_att,
tea_spatial_att):
def _func(a, b):
return paddle.sum(paddle.abs(a - b)) / len(a)
mask_loss = _func(stu_channel_att, tea_channel_att) + _func(
stu_spatial_att, tea_spatial_att)
return mask_loss
def feature_loss(self, stu_feature, tea_feature, mask_fg, mask_bg,
tea_channel_att, tea_spatial_att):
mask_fg = mask_fg.unsqueeze(axis=1)
mask_bg = mask_bg.unsqueeze(axis=1)
tea_channel_att = tea_channel_att.unsqueeze(axis=-1).unsqueeze(axis=-1)
tea_spatial_att = tea_spatial_att.unsqueeze(axis=1)
fea_t = paddle.multiply(tea_feature, paddle.sqrt(tea_spatial_att))
fea_t = paddle.multiply(fea_t, paddle.sqrt(tea_channel_att))
fg_fea_t = paddle.multiply(fea_t, paddle.sqrt(mask_fg))
bg_fea_t = paddle.multiply(fea_t, paddle.sqrt(mask_bg))
fea_s = paddle.multiply(stu_feature, paddle.sqrt(tea_spatial_att))
fea_s = paddle.multiply(fea_s, paddle.sqrt(tea_channel_att))
fg_fea_s = paddle.multiply(fea_s, paddle.sqrt(mask_fg))
bg_fea_s = paddle.multiply(fea_s, paddle.sqrt(mask_bg))
fg_loss = F.mse_loss(fg_fea_s, fg_fea_t, reduction="sum") / len(mask_fg)
bg_loss = F.mse_loss(bg_fea_s, bg_fea_t, reduction="sum") / len(mask_bg)
return fg_loss, bg_loss
def relation_loss(self, stu_feature, tea_feature):
context_s = self.spatial_pool(stu_feature, "student")
context_t = self.spatial_pool(tea_feature, "teacher")
out_s = stu_feature + self.stu_conv_block(context_s)
out_t = tea_feature + self.tea_conv_block(context_t)
rela_loss = F.mse_loss(out_s, out_t, reduction="sum") / len(out_s)
return rela_loss
def mask_value(self, mask, xl, xr, yl, yr, value):
mask[xl:xr, yl:yr] = paddle.maximum(mask[xl:xr, yl:yr], value)
return mask
def forward(self, stu_feature, tea_feature, inputs):
assert stu_feature.shape[-2:] == stu_feature.shape[-2:]
assert "gt_bbox" in inputs.keys() and "im_shape" in inputs.keys()
gt_bboxes = inputs['gt_bbox']
ins_shape = [
inputs['im_shape'][i] for i in range(inputs['im_shape'].shape[0])
]
index_gt = []
for i in range(len(gt_bboxes)):
if gt_bboxes[i].size > 2:
index_gt.append(i)
# only distill feature with labeled GTbox
if len(index_gt) != len(gt_bboxes):
index_gt_t = paddle.to_tensor(index_gt)
stu_feature = paddle.index_select(stu_feature, index_gt_t)
tea_feature = paddle.index_select(tea_feature, index_gt_t)
ins_shape = [ins_shape[c] for c in index_gt]
gt_bboxes = [gt_bboxes[c] for c in index_gt]
assert len(gt_bboxes) == tea_feature.shape[0]
if self.align is not None:
stu_feature = self.align(stu_feature)
if self.normalize:
stu_feature = feature_norm(stu_feature)
tea_feature = feature_norm(tea_feature)
tea_spatial_att, tea_channel_att = self.spatial_channel_attention(
tea_feature, self.temp)
stu_spatial_att, stu_channel_att = self.spatial_channel_attention(
stu_feature, self.temp)
mask_fg = paddle.zeros(tea_spatial_att.shape)
mask_bg = paddle.ones_like(tea_spatial_att)
one_tmp = paddle.ones([*tea_spatial_att.shape[1:]])
zero_tmp = paddle.zeros([*tea_spatial_att.shape[1:]])
mask_fg.stop_gradient = True
mask_bg.stop_gradient = True
one_tmp.stop_gradient = True
zero_tmp.stop_gradient = True
wmin, wmax, hmin, hmax = [], [], [], []
if len(gt_bboxes) == 0:
loss = self.relation_loss(stu_feature, tea_feature)
return self.lambda_fgd * loss
N, _, H, W = stu_feature.shape
for i in range(N):
tmp_box = paddle.ones_like(gt_bboxes[i])
tmp_box.stop_gradient = True
tmp_box[:, 0] = gt_bboxes[i][:, 0] / ins_shape[i][1] * W
tmp_box[:, 2] = gt_bboxes[i][:, 2] / ins_shape[i][1] * W
tmp_box[:, 1] = gt_bboxes[i][:, 1] / ins_shape[i][0] * H
tmp_box[:, 3] = gt_bboxes[i][:, 3] / ins_shape[i][0] * H
zero = paddle.zeros_like(tmp_box[:, 0], dtype="int32")
ones = paddle.ones_like(tmp_box[:, 2], dtype="int32")
zero.stop_gradient = True
ones.stop_gradient = True
wmin.append(
paddle.cast(paddle.floor(tmp_box[:, 0]), "int32").maximum(zero))
wmax.append(paddle.cast(paddle.ceil(tmp_box[:, 2]), "int32"))
hmin.append(
paddle.cast(paddle.floor(tmp_box[:, 1]), "int32").maximum(zero))
hmax.append(paddle.cast(paddle.ceil(tmp_box[:, 3]), "int32"))
area_recip = 1.0 / (
hmax[i].reshape([1, -1]) + 1 - hmin[i].reshape([1, -1])) / (
wmax[i].reshape([1, -1]) + 1 - wmin[i].reshape([1, -1]))
for j in range(len(gt_bboxes[i])):
if gt_bboxes[i][j].sum() > 0:
mask_fg[i] = self.mask_value(
mask_fg[i], hmin[i][j], hmax[i][j] + 1, wmin[i][j],
wmax[i][j] + 1, area_recip[0][j])
mask_bg[i] = paddle.where(mask_fg[i] > zero_tmp, zero_tmp, one_tmp)
if paddle.sum(mask_bg[i]):
mask_bg[i] /= paddle.sum(mask_bg[i])
fg_loss, bg_loss = self.feature_loss(stu_feature, tea_feature, mask_fg,
mask_bg, tea_channel_att,
tea_spatial_att)
mask_loss = self.mask_loss(stu_channel_att, tea_channel_att,
stu_spatial_att, tea_spatial_att)
rela_loss = self.relation_loss(stu_feature, tea_feature)
loss = self.alpha_fgd * fg_loss + self.beta_fgd * bg_loss \
+ self.gamma_fgd * mask_loss + self.lambda_fgd * rela_loss
return loss * self.loss_weight
@register
class PKDFeatureLoss(nn.Layer):
"""
PKD: General Distillation Framework for Object Detectors via Pearson Correlation Coefficient.
Args:
loss_weight (float): Weight of loss. Defaults to 1.0.
resize_stu (bool): If True, we'll down/up sample the features of the
student model to the spatial size of those of the teacher model if
their spatial sizes are different. And vice versa. Defaults to
True.
"""
def __init__(self,
student_channels=256,
teacher_channels=256,
normalize=True,
loss_weight=1.0,
resize_stu=True):
super(PKDFeatureLoss, self).__init__()
self.normalize = normalize
self.loss_weight = loss_weight
self.resize_stu = resize_stu
def forward(self, stu_feature, tea_feature, inputs=None):
size_s, size_t = stu_feature.shape[2:], tea_feature.shape[2:]
if size_s[0] != size_t[0]:
if self.resize_stu:
stu_feature = F.interpolate(
stu_feature, size_t, mode='bilinear')
else:
tea_feature = F.interpolate(
tea_feature, size_s, mode='bilinear')
assert stu_feature.shape == tea_feature.shape
if self.normalize:
stu_feature = feature_norm(stu_feature)
tea_feature = feature_norm(tea_feature)
loss = F.mse_loss(stu_feature, tea_feature) / 2
return loss * self.loss_weight
@register
class MimicFeatureLoss(nn.Layer):
def __init__(self,
student_channels=256,
teacher_channels=256,
normalize=True,
loss_weight=1.0):
super(MimicFeatureLoss, self).__init__()
self.normalize = normalize
self.loss_weight = loss_weight
self.mse_loss = nn.MSELoss()
if student_channels != teacher_channels:
self.align = nn.Conv2D(
student_channels,
teacher_channels,
kernel_size=1,
stride=1,
padding=0)
else:
self.align = None
def forward(self, stu_feature, tea_feature, inputs=None):
if self.align is not None:
stu_feature = self.align(stu_feature)
if self.normalize:
stu_feature = feature_norm(stu_feature)
tea_feature = feature_norm(tea_feature)
loss = self.mse_loss(stu_feature, tea_feature)
return loss * self.loss_weight
@register
class MGDFeatureLoss(nn.Layer):
def __init__(self,
student_channels=256,
teacher_channels=256,
normalize=True,
loss_weight=1.0,
loss_func='mse'):
super(MGDFeatureLoss, self).__init__()
self.normalize = normalize
self.loss_weight = loss_weight
assert loss_func in ['mse', 'ssim']
self.loss_func = loss_func
self.mse_loss = nn.MSELoss(reduction='sum')
self.ssim_loss = SSIM(11)
kaiming_init = parameter_init("kaiming")
if student_channels != teacher_channels:
self.align = nn.Conv2D(
student_channels,
teacher_channels,
kernel_size=1,
stride=1,
padding=0,
weight_attr=kaiming_init,
bias_attr=False)
else:
self.align = None
self.generation = nn.Sequential(
nn.Conv2D(
teacher_channels, teacher_channels, kernel_size=3, padding=1),
nn.ReLU(),
nn.Conv2D(
teacher_channels, teacher_channels, kernel_size=3, padding=1))
def forward(self, stu_feature, tea_feature, inputs=None):
N = stu_feature.shape[0]
if self.align is not None:
stu_feature = self.align(stu_feature)
stu_feature = self.generation(stu_feature)
if self.normalize:
stu_feature = feature_norm(stu_feature)
tea_feature = feature_norm(tea_feature)
if self.loss_func == 'mse':
loss = self.mse_loss(stu_feature, tea_feature) / N
elif self.loss_func == 'ssim':
ssim_loss = self.ssim_loss(stu_feature, tea_feature)
loss = paddle.clip((1 - ssim_loss) / 2, 0, 1)
else:
raise ValueError
return loss * self.loss_weight
class SSIM(nn.Layer):
def __init__(self, window_size=11, size_average=True):
super(SSIM, self).__init__()
self.window_size = window_size
self.size_average = size_average
self.channel = 1
self.window = self.create_window(window_size, self.channel)
def gaussian(self, window_size, sigma):
gauss = paddle.to_tensor([
math.exp(-(x - window_size // 2)**2 / float(2 * sigma**2))
for x in range(window_size)
])
return gauss / gauss.sum()
def create_window(self, window_size, channel):
_1D_window = self.gaussian(window_size, 1.5).unsqueeze(1)
_2D_window = _1D_window.mm(_1D_window.t()).unsqueeze(0).unsqueeze(0)
window = _2D_window.expand([channel, 1, window_size, window_size])
return window
def _ssim(self, img1, img2, window, window_size, channel,
size_average=True):
mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel)
mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel)
mu1_sq = mu1.pow(2)
mu2_sq = mu2.pow(2)
mu1_mu2 = mu1 * mu2
sigma1_sq = F.conv2d(
img1 * img1, window, padding=window_size // 2,
groups=channel) - mu1_sq
sigma2_sq = F.conv2d(
img2 * img2, window, padding=window_size // 2,
groups=channel) - mu2_sq
sigma12 = F.conv2d(
img1 * img2, window, padding=window_size // 2,
groups=channel) - mu1_mu2
C1 = 0.01**2
C2 = 0.03**2
ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / (
1e-12 + (mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
if size_average:
return ssim_map.mean()
else:
return ssim_map.mean([1, 2, 3])
def forward(self, img1, img2):
channel = img1.shape[1]
if channel == self.channel and self.window.dtype == img1.dtype:
window = self.window
else:
window = self.create_window(self.window_size, channel)
self.window = window
self.channel = channel
return self._ssim(img1, img2, window, self.window_size, channel,
self.size_average)

View File

@@ -0,0 +1,352 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
from ppdet.core.workspace import register, create, load_config
from ppdet.utils.checkpoint import load_pretrain_weight
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
__all__ = [
'DistillModel',
'FGDDistillModel',
'CWDDistillModel',
'LDDistillModel',
'PPYOLOEDistillModel',
]
@register
class DistillModel(nn.Layer):
"""
Build common distill model.
Args:
cfg: The student config.
slim_cfg: The teacher and distill config.
"""
def __init__(self, cfg, slim_cfg):
super(DistillModel, self).__init__()
self.arch = cfg.architecture
self.stu_cfg = cfg
self.student_model = create(self.stu_cfg.architecture)
if 'pretrain_weights' in self.stu_cfg and self.stu_cfg.pretrain_weights:
stu_pretrain = self.stu_cfg.pretrain_weights
else:
stu_pretrain = None
slim_cfg = load_config(slim_cfg)
self.tea_cfg = slim_cfg
self.teacher_model = create(self.tea_cfg.architecture)
if 'pretrain_weights' in self.tea_cfg and self.tea_cfg.pretrain_weights:
tea_pretrain = self.tea_cfg.pretrain_weights
else:
tea_pretrain = None
self.distill_cfg = slim_cfg
# load pretrain weights
self.is_inherit = False
if stu_pretrain:
if self.is_inherit and tea_pretrain:
load_pretrain_weight(self.student_model, tea_pretrain)
logger.debug(
"Inheriting! loading teacher weights to student model!")
load_pretrain_weight(self.student_model, stu_pretrain)
logger.info("Student model has loaded pretrain weights!")
if tea_pretrain:
load_pretrain_weight(self.teacher_model, tea_pretrain)
logger.info("Teacher model has loaded pretrain weights!")
self.teacher_model.eval()
for param in self.teacher_model.parameters():
param.trainable = False
self.distill_loss = self.build_loss(self.distill_cfg)
def build_loss(self, distill_cfg):
if 'distill_loss' in distill_cfg and distill_cfg.distill_loss:
return create(distill_cfg.distill_loss)
else:
return None
def parameters(self):
return self.student_model.parameters()
def forward(self, inputs):
if self.training:
student_loss = self.student_model(inputs)
with paddle.no_grad():
teacher_loss = self.teacher_model(inputs)
loss = self.distill_loss(self.teacher_model, self.student_model)
student_loss['distill_loss'] = loss
student_loss['teacher_loss'] = teacher_loss['loss']
student_loss['loss'] += student_loss['distill_loss']
return student_loss
else:
return self.student_model(inputs)
@register
class FGDDistillModel(DistillModel):
"""
Build FGD distill model.
Args:
cfg: The student config.
slim_cfg: The teacher and distill config.
"""
def __init__(self, cfg, slim_cfg):
super(FGDDistillModel, self).__init__(cfg=cfg, slim_cfg=slim_cfg)
assert self.arch in ['RetinaNet', 'PicoDet'
], 'Unsupported arch: {}'.format(self.arch)
self.is_inherit = True
def build_loss(self, distill_cfg):
assert 'distill_loss_name' in distill_cfg and distill_cfg.distill_loss_name
assert 'distill_loss' in distill_cfg and distill_cfg.distill_loss
loss_func = dict()
name_list = distill_cfg.distill_loss_name
for name in name_list:
loss_func[name] = create(distill_cfg.distill_loss)
return loss_func
def forward(self, inputs):
if self.training:
s_body_feats = self.student_model.backbone(inputs)
s_neck_feats = self.student_model.neck(s_body_feats)
with paddle.no_grad():
t_body_feats = self.teacher_model.backbone(inputs)
t_neck_feats = self.teacher_model.neck(t_body_feats)
loss_dict = {}
for idx, k in enumerate(self.distill_loss):
loss_dict[k] = self.distill_loss[k](s_neck_feats[idx],
t_neck_feats[idx], inputs)
if self.arch == "RetinaNet":
loss = self.student_model.head(s_neck_feats, inputs)
elif self.arch == "PicoDet":
head_outs = self.student_model.head(
s_neck_feats, self.student_model.export_post_process)
loss_gfl = self.student_model.head.get_loss(head_outs, inputs)
total_loss = paddle.add_n(list(loss_gfl.values()))
loss = {}
loss.update(loss_gfl)
loss.update({'loss': total_loss})
else:
raise ValueError(f"Unsupported model {self.arch}")
for k in loss_dict:
loss['loss'] += loss_dict[k]
loss[k] = loss_dict[k]
return loss
else:
body_feats = self.student_model.backbone(inputs)
neck_feats = self.student_model.neck(body_feats)
head_outs = self.student_model.head(neck_feats)
if self.arch == "RetinaNet":
bbox, bbox_num = self.student_model.head.post_process(
head_outs, inputs['im_shape'], inputs['scale_factor'])
return {'bbox': bbox, 'bbox_num': bbox_num}
elif self.arch == "PicoDet":
head_outs = self.student_model.head(
neck_feats, self.student_model.export_post_process)
scale_factor = inputs['scale_factor']
bboxes, bbox_num = self.student_model.head.post_process(
head_outs,
scale_factor,
export_nms=self.student_model.export_nms)
return {'bbox': bboxes, 'bbox_num': bbox_num}
else:
raise ValueError(f"Unsupported model {self.arch}")
@register
class CWDDistillModel(DistillModel):
"""
Build CWD distill model.
Args:
cfg: The student config.
slim_cfg: The teacher and distill config.
"""
def __init__(self, cfg, slim_cfg):
super(CWDDistillModel, self).__init__(cfg=cfg, slim_cfg=slim_cfg)
assert self.arch in ['GFL', 'RetinaNet'], 'Unsupported arch: {}'.format(
self.arch)
def build_loss(self, distill_cfg):
assert 'distill_loss_name' in distill_cfg and distill_cfg.distill_loss_name
assert 'distill_loss' in distill_cfg and distill_cfg.distill_loss
loss_func = dict()
name_list = distill_cfg.distill_loss_name
for name in name_list:
loss_func[name] = create(distill_cfg.distill_loss)
return loss_func
def get_loss_retinanet(self, stu_fea_list, tea_fea_list, inputs):
loss = self.student_model.head(stu_fea_list, inputs)
loss_dict = {}
for idx, k in enumerate(self.distill_loss):
loss_dict[k] = self.distill_loss[k](stu_fea_list[idx],
tea_fea_list[idx])
loss['loss'] += loss_dict[k]
loss[k] = loss_dict[k]
return loss
def get_loss_gfl(self, stu_fea_list, tea_fea_list, inputs):
loss = {}
head_outs = self.student_model.head(stu_fea_list)
loss_gfl = self.student_model.head.get_loss(head_outs, inputs)
loss.update(loss_gfl)
total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss})
feat_loss = {}
loss_dict = {}
s_cls_feat, t_cls_feat = [], []
for s_neck_f, t_neck_f in zip(stu_fea_list, tea_fea_list):
conv_cls_feat, _ = self.student_model.head.conv_feat(s_neck_f)
cls_score = self.student_model.head.gfl_head_cls(conv_cls_feat)
t_conv_cls_feat, _ = self.teacher_model.head.conv_feat(t_neck_f)
t_cls_score = self.teacher_model.head.gfl_head_cls(t_conv_cls_feat)
s_cls_feat.append(cls_score)
t_cls_feat.append(t_cls_score)
for idx, k in enumerate(self.distill_loss):
loss_dict[k] = self.distill_loss[k](s_cls_feat[idx],
t_cls_feat[idx])
feat_loss[f"neck_f_{idx}"] = self.distill_loss[k](stu_fea_list[idx],
tea_fea_list[idx])
for k in feat_loss:
loss['loss'] += feat_loss[k]
loss[k] = feat_loss[k]
for k in loss_dict:
loss['loss'] += loss_dict[k]
loss[k] = loss_dict[k]
return loss
def forward(self, inputs):
if self.training:
s_body_feats = self.student_model.backbone(inputs)
s_neck_feats = self.student_model.neck(s_body_feats)
with paddle.no_grad():
t_body_feats = self.teacher_model.backbone(inputs)
t_neck_feats = self.teacher_model.neck(t_body_feats)
if self.arch == "RetinaNet":
loss = self.get_loss_retinanet(s_neck_feats, t_neck_feats,
inputs)
elif self.arch == "GFL":
loss = self.get_loss_gfl(s_neck_feats, t_neck_feats, inputs)
else:
raise ValueError(f"unsupported arch {self.arch}")
return loss
else:
body_feats = self.student_model.backbone(inputs)
neck_feats = self.student_model.neck(body_feats)
head_outs = self.student_model.head(neck_feats)
if self.arch == "RetinaNet":
bbox, bbox_num = self.student_model.head.post_process(
head_outs, inputs['im_shape'], inputs['scale_factor'])
return {'bbox': bbox, 'bbox_num': bbox_num}
elif self.arch == "GFL":
bbox_pred, bbox_num = head_outs
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
return output
else:
raise ValueError(f"unsupported arch {self.arch}")
@register
class LDDistillModel(DistillModel):
"""
Build LD distill model.
Args:
cfg: The student config.
slim_cfg: The teacher and distill config.
"""
def __init__(self, cfg, slim_cfg):
super(LDDistillModel, self).__init__(cfg=cfg, slim_cfg=slim_cfg)
assert self.arch in ['GFL'], 'Unsupported arch: {}'.format(self.arch)
def forward(self, inputs):
if self.training:
s_body_feats = self.student_model.backbone(inputs)
s_neck_feats = self.student_model.neck(s_body_feats)
s_head_outs = self.student_model.head(s_neck_feats)
with paddle.no_grad():
t_body_feats = self.teacher_model.backbone(inputs)
t_neck_feats = self.teacher_model.neck(t_body_feats)
t_head_outs = self.teacher_model.head(t_neck_feats)
soft_label_list = t_head_outs[0]
soft_targets_list = t_head_outs[1]
student_loss = self.student_model.head.get_loss(
s_head_outs, inputs, soft_label_list, soft_targets_list)
total_loss = paddle.add_n(list(student_loss.values()))
student_loss['loss'] = total_loss
return student_loss
else:
return self.student_model(inputs)
@register
class PPYOLOEDistillModel(DistillModel):
"""
Build PPYOLOE distill model, only used in PPYOLOE
Args:
cfg: The student config.
slim_cfg: The teacher and distill config.
"""
def __init__(self, cfg, slim_cfg):
super(PPYOLOEDistillModel, self).__init__(cfg=cfg, slim_cfg=slim_cfg)
assert self.arch in ['PPYOLOE'], 'Unsupported arch: {}'.format(
self.arch)
def forward(self, inputs, alpha=0.125):
if self.training:
with paddle.no_grad():
teacher_loss = self.teacher_model(inputs)
if hasattr(self.teacher_model.yolo_head, "assigned_labels"):
self.student_model.yolo_head.assigned_labels, self.student_model.yolo_head.assigned_bboxes, self.student_model.yolo_head.assigned_scores = \
self.teacher_model.yolo_head.assigned_labels, self.teacher_model.yolo_head.assigned_bboxes, self.teacher_model.yolo_head.assigned_scores
delattr(self.teacher_model.yolo_head, "assigned_labels")
delattr(self.teacher_model.yolo_head, "assigned_bboxes")
delattr(self.teacher_model.yolo_head, "assigned_scores")
student_loss = self.student_model(inputs)
logits_loss, feat_loss = self.distill_loss(self.teacher_model,
self.student_model)
det_total_loss = student_loss['loss']
total_loss = alpha * (det_total_loss + logits_loss + feat_loss)
student_loss['loss'] = total_loss
student_loss['det_loss'] = det_total_loss
student_loss['logits_loss'] = logits_loss
student_loss['feat_loss'] = feat_loss
return student_loss
else:
return self.student_model(inputs)

View File

@@ -0,0 +1,89 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import load_config, merge_config, create
from ppdet.utils.checkpoint import load_weight, load_pretrain_weight
from ppdet.utils.logger import setup_logger
from ppdet.core.workspace import register, serializable
from paddle.utils import try_import
logger = setup_logger(__name__)
@register
@serializable
class OFA(object):
def __init__(self, ofa_config):
super(OFA, self).__init__()
self.ofa_config = ofa_config
def __call__(self, model, param_state_dict):
paddleslim = try_import('paddleslim')
from paddleslim.nas.ofa import OFA, RunConfig, utils
from paddleslim.nas.ofa.convert_super import Convert, supernet
task = self.ofa_config['task']
expand_ratio = self.ofa_config['expand_ratio']
skip_neck = self.ofa_config['skip_neck']
skip_head = self.ofa_config['skip_head']
run_config = self.ofa_config['RunConfig']
if 'skip_layers' in run_config:
skip_layers = run_config['skip_layers']
else:
skip_layers = []
# supernet config
sp_config = supernet(expand_ratio=expand_ratio)
# convert to supernet
model = Convert(sp_config).convert(model)
skip_names = []
if skip_neck:
skip_names.append('neck.')
if skip_head:
skip_names.append('head.')
for name, sublayer in model.named_sublayers():
for n in skip_names:
if n in name:
skip_layers.append(name)
run_config['skip_layers'] = skip_layers
run_config = RunConfig(**run_config)
# build ofa model
ofa_model = OFA(model, run_config=run_config)
ofa_model.set_epoch(0)
ofa_model.set_task(task)
input_spec = [{
"image": paddle.ones(
shape=[1, 3, 640, 640], dtype='float32'),
"im_shape": paddle.full(
[1, 2], 640, dtype='float32'),
"scale_factor": paddle.ones(
shape=[1, 2], dtype='float32')
}]
ofa_model._clear_search_space(input_spec=input_spec)
ofa_model._build_ss = True
check_ss = ofa_model._sample_config('expand_ratio', phase=None)
# tokenize the search space
ofa_model.tokenize()
# check token map, search cands and search space
logger.info('Token map is {}'.format(ofa_model.token_map))
logger.info('Search candidates is {}'.format(ofa_model.search_cands))
logger.info('The length of search_space is {}, search_space is {}'.
format(len(ofa_model._ofa_layers), ofa_model._ofa_layers))
# set model state dict into ofa model
utils.set_state_dict(ofa_model.model, param_state_dict)
return ofa_model

View File

@@ -0,0 +1,151 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddle.utils import try_import
from ppdet.core.workspace import register, serializable
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
def print_prune_params(model):
model_dict = model.state_dict()
for key in model_dict.keys():
weight_name = model_dict[key].name
logger.info('Parameter name: {}, shape: {}'.format(
weight_name, model_dict[key].shape))
@register
@serializable
class Pruner(object):
def __init__(self,
criterion,
pruned_params,
pruned_ratios,
print_params=False):
super(Pruner, self).__init__()
assert criterion in ['l1_norm', 'fpgm'], \
"unsupported prune criterion: {}".format(criterion)
self.criterion = criterion
self.pruned_params = pruned_params
self.pruned_ratios = pruned_ratios
self.print_params = print_params
def __call__(self, model):
# FIXME: adapt to network graph when Training and inference are
# inconsistent, now only supports prune inference network graph.
model.eval()
paddleslim = try_import('paddleslim')
from paddleslim.analysis import dygraph_flops as flops
input_spec = [{
"image": paddle.ones(
shape=[1, 3, 640, 640], dtype='float32'),
"im_shape": paddle.full(
[1, 2], 640, dtype='float32'),
"scale_factor": paddle.ones(
shape=[1, 2], dtype='float32')
}]
if self.print_params:
print_prune_params(model)
ori_flops = flops(model, input_spec) / (1000**3)
logger.info("FLOPs before pruning: {}GFLOPs".format(ori_flops))
if self.criterion == 'fpgm':
pruner = paddleslim.dygraph.FPGMFilterPruner(model, input_spec)
elif self.criterion == 'l1_norm':
pruner = paddleslim.dygraph.L1NormFilterPruner(model, input_spec)
logger.info("pruned params: {}".format(self.pruned_params))
pruned_ratios = [float(n) for n in self.pruned_ratios]
ratios = {}
for i, param in enumerate(self.pruned_params):
ratios[param] = pruned_ratios[i]
pruner.prune_vars(ratios, [0])
pruned_flops = flops(model, input_spec) / (1000**3)
logger.info("FLOPs after pruning: {}GFLOPs; pruned ratio: {}".format(
pruned_flops, (ori_flops - pruned_flops) / ori_flops))
return model
@register
@serializable
class PrunerQAT(object):
def __init__(self, criterion, pruned_params, pruned_ratios,
print_prune_params, quant_config, print_qat_model):
super(PrunerQAT, self).__init__()
assert criterion in ['l1_norm', 'fpgm'], \
"unsupported prune criterion: {}".format(criterion)
# Pruner hyperparameter
self.criterion = criterion
self.pruned_params = pruned_params
self.pruned_ratios = pruned_ratios
self.print_prune_params = print_prune_params
# QAT hyperparameter
self.quant_config = quant_config
self.print_qat_model = print_qat_model
def __call__(self, model):
# FIXME: adapt to network graph when Training and inference are
# inconsistent, now only supports prune inference network graph.
model.eval()
paddleslim = try_import('paddleslim')
from paddleslim.analysis import dygraph_flops as flops
input_spec = [{
"image": paddle.ones(
shape=[1, 3, 640, 640], dtype='float32'),
"im_shape": paddle.full(
[1, 2], 640, dtype='float32'),
"scale_factor": paddle.ones(
shape=[1, 2], dtype='float32')
}]
if self.print_prune_params:
print_prune_params(model)
ori_flops = flops(model, input_spec) / 1000
logger.info("FLOPs before pruning: {}GFLOPs".format(ori_flops))
if self.criterion == 'fpgm':
pruner = paddleslim.dygraph.FPGMFilterPruner(model, input_spec)
elif self.criterion == 'l1_norm':
pruner = paddleslim.dygraph.L1NormFilterPruner(model, input_spec)
logger.info("pruned params: {}".format(self.pruned_params))
pruned_ratios = [float(n) for n in self.pruned_ratios]
ratios = {}
for i, param in enumerate(self.pruned_params):
ratios[param] = pruned_ratios[i]
pruner.prune_vars(ratios, [0])
pruned_flops = flops(model, input_spec) / 1000
logger.info("FLOPs after pruning: {}GFLOPs; pruned ratio: {}".format(
pruned_flops, (ori_flops - pruned_flops) / ori_flops))
self.quanter = paddleslim.dygraph.quant.QAT(config=self.quant_config)
self.quanter.quantize(model)
if self.print_qat_model:
logger.info("Quantized model:")
logger.info(model)
return model
def save_quantized_model(self, layer, path, input_spec=None, **config):
self.quanter.save_quantized_model(
model=layer, path=path, input_spec=input_spec, **config)

View File

@@ -0,0 +1,89 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddle.utils import try_import
from ppdet.core.workspace import register, serializable
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
@register
@serializable
class QAT(object):
def __init__(self, quant_config, print_model):
super(QAT, self).__init__()
self.quant_config = quant_config
self.print_model = print_model
def __call__(self, model):
paddleslim = try_import('paddleslim')
self.quanter = paddleslim.dygraph.quant.QAT(config=self.quant_config)
if self.print_model:
logger.info("Model before quant:")
logger.info(model)
# For PP-YOLOE, convert model to deploy firstly.
for layer in model.sublayers():
if hasattr(layer, 'convert_to_deploy'):
layer.convert_to_deploy()
self.quanter.quantize(model)
if self.print_model:
logger.info("Quantized model:")
logger.info(model)
return model
def save_quantized_model(self, layer, path, input_spec=None, **config):
self.quanter.save_quantized_model(
model=layer, path=path, input_spec=input_spec, **config)
@register
@serializable
class PTQ(object):
def __init__(self,
ptq_config,
quant_batch_num=10,
output_dir='output_inference',
fuse=True,
fuse_list=None):
super(PTQ, self).__init__()
self.ptq_config = ptq_config
self.quant_batch_num = quant_batch_num
self.output_dir = output_dir
self.fuse = fuse
self.fuse_list = fuse_list
def __call__(self, model):
paddleslim = try_import('paddleslim')
self.ptq = paddleslim.PTQ(**self.ptq_config)
model.eval()
quant_model = self.ptq.quantize(
model, fuse=self.fuse, fuse_list=self.fuse_list)
return quant_model
def save_quantized_model(self,
quant_model,
quantize_model_path,
input_spec=None):
self.ptq.save_quantized_model(quant_model, quantize_model_path,
input_spec)

View File

@@ -0,0 +1,66 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddle.utils import try_import
from ppdet.core.workspace import register, serializable
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
@register
@serializable
class UnstructuredPruner(object):
def __init__(self,
stable_epochs,
pruning_epochs,
tunning_epochs,
pruning_steps,
ratio,
initial_ratio,
prune_params_type=None):
self.stable_epochs = stable_epochs
self.pruning_epochs = pruning_epochs
self.tunning_epochs = tunning_epochs
self.ratio = ratio
self.prune_params_type = prune_params_type
self.initial_ratio = initial_ratio
self.pruning_steps = pruning_steps
def __call__(self, model, steps_per_epoch, skip_params_func=None):
paddleslim = try_import('paddleslim')
from paddleslim import GMPUnstructuredPruner
configs = {
'pruning_strategy': 'gmp',
'stable_iterations': self.stable_epochs * steps_per_epoch,
'pruning_iterations': self.pruning_epochs * steps_per_epoch,
'tunning_iterations': self.tunning_epochs * steps_per_epoch,
'resume_iteration': 0,
'pruning_steps': self.pruning_steps,
'initial_ratio': self.initial_ratio,
}
pruner = GMPUnstructuredPruner(
model,
ratio=self.ratio,
skip_params_func=skip_params_func,
prune_params_type=self.prune_params_type,
local_sparsity=True,
configs=configs)
return pruner