更换文档检测模型
This commit is contained in:
110
paddle_detection/ppdet/slim/__init__.py
Normal file
110
paddle_detection/ppdet/slim/__init__.py
Normal file
@@ -0,0 +1,110 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from . import distill_loss
|
||||
from . import distill_model
|
||||
from . import ofa
|
||||
from . import prune
|
||||
from . import quant
|
||||
from . import unstructured_prune
|
||||
|
||||
from .distill_loss import *
|
||||
from .distill_model import *
|
||||
from .ofa import *
|
||||
from .prune import *
|
||||
from .quant import *
|
||||
from .unstructured_prune import *
|
||||
|
||||
import yaml
|
||||
from ppdet.core.workspace import load_config
|
||||
from ppdet.utils.checkpoint import load_pretrain_weight
|
||||
|
||||
|
||||
def build_slim_model(cfg, slim_cfg, mode='train'):
|
||||
with open(slim_cfg) as f:
|
||||
slim_load_cfg = yaml.load(f, Loader=yaml.Loader)
|
||||
|
||||
if mode != 'train' and slim_load_cfg['slim'] == 'Distill':
|
||||
return cfg
|
||||
|
||||
if slim_load_cfg['slim'] == 'Distill':
|
||||
if "slim_method" in slim_load_cfg and slim_load_cfg[
|
||||
'slim_method'] == "FGD":
|
||||
model = FGDDistillModel(cfg, slim_cfg)
|
||||
elif "slim_method" in slim_load_cfg and slim_load_cfg[
|
||||
'slim_method'] == "LD":
|
||||
model = LDDistillModel(cfg, slim_cfg)
|
||||
elif "slim_method" in slim_load_cfg and slim_load_cfg[
|
||||
'slim_method'] == "CWD":
|
||||
model = CWDDistillModel(cfg, slim_cfg)
|
||||
elif "slim_method" in slim_load_cfg and slim_load_cfg[
|
||||
'slim_method'] == "PPYOLOEDistill":
|
||||
model = PPYOLOEDistillModel(cfg, slim_cfg)
|
||||
else:
|
||||
# common distillation model
|
||||
model = DistillModel(cfg, slim_cfg)
|
||||
cfg['model'] = model
|
||||
cfg['slim_type'] = cfg.slim
|
||||
elif slim_load_cfg['slim'] == 'OFA':
|
||||
load_config(slim_cfg)
|
||||
model = create(cfg.architecture)
|
||||
load_pretrain_weight(model, cfg.weights)
|
||||
slim = create(cfg.slim)
|
||||
cfg['slim'] = slim
|
||||
cfg['model'] = slim(model, model.state_dict())
|
||||
cfg['slim_type'] = cfg.slim
|
||||
elif slim_load_cfg['slim'] == 'DistillPrune':
|
||||
if mode == 'train':
|
||||
model = DistillModel(cfg, slim_cfg)
|
||||
pruner = create(cfg.pruner)
|
||||
pruner(model.student_model)
|
||||
else:
|
||||
model = create(cfg.architecture)
|
||||
weights = cfg.weights
|
||||
load_config(slim_cfg)
|
||||
pruner = create(cfg.pruner)
|
||||
model = pruner(model)
|
||||
load_pretrain_weight(model, weights)
|
||||
cfg['model'] = model
|
||||
cfg['slim_type'] = cfg.slim
|
||||
elif slim_load_cfg['slim'] == 'PTQ':
|
||||
model = create(cfg.architecture)
|
||||
load_config(slim_cfg)
|
||||
load_pretrain_weight(model, cfg.weights)
|
||||
slim = create(cfg.slim)
|
||||
cfg['slim_type'] = cfg.slim
|
||||
cfg['slim'] = slim
|
||||
cfg['model'] = slim(model)
|
||||
elif slim_load_cfg['slim'] == 'UnstructuredPruner':
|
||||
load_config(slim_cfg)
|
||||
slim = create(cfg.slim)
|
||||
cfg['slim_type'] = cfg.slim
|
||||
cfg['slim'] = slim
|
||||
cfg['unstructured_prune'] = True
|
||||
else:
|
||||
load_config(slim_cfg)
|
||||
model = create(cfg.architecture)
|
||||
if mode == 'train':
|
||||
load_pretrain_weight(model, cfg.pretrain_weights)
|
||||
slim = create(cfg.slim)
|
||||
cfg['slim_type'] = cfg.slim
|
||||
# TODO: fix quant export model in framework.
|
||||
if mode == 'test' and 'QAT' in slim_load_cfg['slim']:
|
||||
slim.quant_config['activation_preprocess_type'] = None
|
||||
cfg['model'] = slim(model)
|
||||
cfg['slim'] = slim
|
||||
if mode != 'train':
|
||||
load_pretrain_weight(cfg['model'], cfg.weights)
|
||||
|
||||
return cfg
|
||||
919
paddle_detection/ppdet/slim/distill_loss.py
Normal file
919
paddle_detection/ppdet/slim/distill_loss.py
Normal file
@@ -0,0 +1,919 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import math
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle import ParamAttr
|
||||
|
||||
from ppdet.core.workspace import register
|
||||
from ppdet.modeling import ops
|
||||
from ppdet.modeling.losses.iou_loss import GIoULoss
|
||||
from ppdet.utils.logger import setup_logger
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
__all__ = [
|
||||
'DistillYOLOv3Loss',
|
||||
'KnowledgeDistillationKLDivLoss',
|
||||
'DistillPPYOLOELoss',
|
||||
'FGDFeatureLoss',
|
||||
'CWDFeatureLoss',
|
||||
'PKDFeatureLoss',
|
||||
'MGDFeatureLoss',
|
||||
]
|
||||
|
||||
|
||||
def parameter_init(mode="kaiming", value=0.):
|
||||
if mode == "kaiming":
|
||||
weight_attr = paddle.nn.initializer.KaimingUniform()
|
||||
elif mode == "constant":
|
||||
weight_attr = paddle.nn.initializer.Constant(value=value)
|
||||
else:
|
||||
weight_attr = paddle.nn.initializer.KaimingUniform()
|
||||
|
||||
weight_init = ParamAttr(initializer=weight_attr)
|
||||
return weight_init
|
||||
|
||||
|
||||
def feature_norm(feat):
|
||||
# Normalize the feature maps to have zero mean and unit variances.
|
||||
assert len(feat.shape) == 4
|
||||
N, C, H, W = feat.shape
|
||||
feat = feat.transpose([1, 0, 2, 3]).reshape([C, -1])
|
||||
mean = feat.mean(axis=-1, keepdim=True)
|
||||
std = feat.std(axis=-1, keepdim=True)
|
||||
feat = (feat - mean) / (std + 1e-6)
|
||||
return feat.reshape([C, N, H, W]).transpose([1, 0, 2, 3])
|
||||
|
||||
|
||||
@register
|
||||
class DistillYOLOv3Loss(nn.Layer):
|
||||
def __init__(self, weight=1000):
|
||||
super(DistillYOLOv3Loss, self).__init__()
|
||||
self.loss_weight = weight
|
||||
|
||||
def obj_weighted_reg(self, sx, sy, sw, sh, tx, ty, tw, th, tobj):
|
||||
loss_x = ops.sigmoid_cross_entropy_with_logits(sx, F.sigmoid(tx))
|
||||
loss_y = ops.sigmoid_cross_entropy_with_logits(sy, F.sigmoid(ty))
|
||||
loss_w = paddle.abs(sw - tw)
|
||||
loss_h = paddle.abs(sh - th)
|
||||
loss = paddle.add_n([loss_x, loss_y, loss_w, loss_h])
|
||||
weighted_loss = paddle.mean(loss * F.sigmoid(tobj))
|
||||
return weighted_loss
|
||||
|
||||
def obj_weighted_cls(self, scls, tcls, tobj):
|
||||
loss = ops.sigmoid_cross_entropy_with_logits(scls, F.sigmoid(tcls))
|
||||
weighted_loss = paddle.mean(paddle.multiply(loss, F.sigmoid(tobj)))
|
||||
return weighted_loss
|
||||
|
||||
def obj_loss(self, sobj, tobj):
|
||||
obj_mask = paddle.cast(tobj > 0., dtype="float32")
|
||||
obj_mask.stop_gradient = True
|
||||
loss = paddle.mean(
|
||||
ops.sigmoid_cross_entropy_with_logits(sobj, obj_mask))
|
||||
return loss
|
||||
|
||||
def forward(self, teacher_model, student_model):
|
||||
teacher_distill_pairs = teacher_model.yolo_head.loss.distill_pairs
|
||||
student_distill_pairs = student_model.yolo_head.loss.distill_pairs
|
||||
distill_reg_loss, distill_cls_loss, distill_obj_loss = [], [], []
|
||||
for s_pair, t_pair in zip(student_distill_pairs, teacher_distill_pairs):
|
||||
distill_reg_loss.append(
|
||||
self.obj_weighted_reg(s_pair[0], s_pair[1], s_pair[2], s_pair[
|
||||
3], t_pair[0], t_pair[1], t_pair[2], t_pair[3], t_pair[4]))
|
||||
distill_cls_loss.append(
|
||||
self.obj_weighted_cls(s_pair[5], t_pair[5], t_pair[4]))
|
||||
distill_obj_loss.append(self.obj_loss(s_pair[4], t_pair[4]))
|
||||
distill_reg_loss = paddle.add_n(distill_reg_loss)
|
||||
distill_cls_loss = paddle.add_n(distill_cls_loss)
|
||||
distill_obj_loss = paddle.add_n(distill_obj_loss)
|
||||
loss = (distill_reg_loss + distill_cls_loss + distill_obj_loss
|
||||
) * self.loss_weight
|
||||
return loss
|
||||
|
||||
|
||||
@register
|
||||
class KnowledgeDistillationKLDivLoss(nn.Layer):
|
||||
"""Loss function for knowledge distilling using KL divergence.
|
||||
|
||||
Args:
|
||||
reduction (str): Options are `'none'`, `'mean'` and `'sum'`.
|
||||
loss_weight (float): Loss weight of current loss.
|
||||
T (int): Temperature for distillation.
|
||||
"""
|
||||
|
||||
def __init__(self, reduction='mean', loss_weight=1.0, T=10):
|
||||
super(KnowledgeDistillationKLDivLoss, self).__init__()
|
||||
assert reduction in ('none', 'mean', 'sum')
|
||||
assert T >= 1
|
||||
self.reduction = reduction
|
||||
self.loss_weight = loss_weight
|
||||
self.T = T
|
||||
|
||||
def knowledge_distillation_kl_div_loss(self,
|
||||
pred,
|
||||
soft_label,
|
||||
T,
|
||||
detach_target=True):
|
||||
r"""Loss function for knowledge distilling using KL divergence.
|
||||
|
||||
Args:
|
||||
pred (Tensor): Predicted logits with shape (N, n + 1).
|
||||
soft_label (Tensor): Target logits with shape (N, N + 1).
|
||||
T (int): Temperature for distillation.
|
||||
detach_target (bool): Remove soft_label from automatic differentiation
|
||||
"""
|
||||
assert pred.shape == soft_label.shape
|
||||
target = F.softmax(soft_label / T, axis=1)
|
||||
if detach_target:
|
||||
target = target.detach()
|
||||
|
||||
kd_loss = F.kl_div(
|
||||
F.log_softmax(
|
||||
pred / T, axis=1), target, reduction='none').mean(1) * (T * T)
|
||||
|
||||
return kd_loss
|
||||
|
||||
def forward(self,
|
||||
pred,
|
||||
soft_label,
|
||||
weight=None,
|
||||
avg_factor=None,
|
||||
reduction_override=None):
|
||||
"""Forward function.
|
||||
|
||||
Args:
|
||||
pred (Tensor): Predicted logits with shape (N, n + 1).
|
||||
soft_label (Tensor): Target logits with shape (N, N + 1).
|
||||
weight (Tensor, optional): The weight of loss for each
|
||||
prediction. Defaults to None.
|
||||
avg_factor (int, optional): Average factor that is used to average
|
||||
the loss. Defaults to None.
|
||||
reduction_override (str, optional): The reduction method used to
|
||||
override the original reduction method of the loss.
|
||||
Defaults to None.
|
||||
"""
|
||||
assert reduction_override in (None, 'none', 'mean', 'sum')
|
||||
|
||||
reduction = (reduction_override
|
||||
if reduction_override else self.reduction)
|
||||
|
||||
loss_kd_out = self.knowledge_distillation_kl_div_loss(
|
||||
pred, soft_label, T=self.T)
|
||||
|
||||
if weight is not None:
|
||||
loss_kd_out = weight * loss_kd_out
|
||||
|
||||
if avg_factor is None:
|
||||
if reduction == 'none':
|
||||
loss = loss_kd_out
|
||||
elif reduction == 'mean':
|
||||
loss = loss_kd_out.mean()
|
||||
elif reduction == 'sum':
|
||||
loss = loss_kd_out.sum()
|
||||
else:
|
||||
# if reduction is mean, then average the loss by avg_factor
|
||||
if reduction == 'mean':
|
||||
loss = loss_kd_out.sum() / avg_factor
|
||||
# if reduction is 'none', then do nothing, otherwise raise an error
|
||||
elif reduction != 'none':
|
||||
raise ValueError(
|
||||
'avg_factor can not be used with reduction="sum"')
|
||||
|
||||
loss_kd = self.loss_weight * loss
|
||||
return loss_kd
|
||||
|
||||
|
||||
@register
|
||||
class DistillPPYOLOELoss(nn.Layer):
|
||||
def __init__(
|
||||
self,
|
||||
loss_weight={'logits': 4.0,
|
||||
'feat': 1.0},
|
||||
logits_distill=True,
|
||||
logits_loss_weight={'class': 1.0,
|
||||
'iou': 2.5,
|
||||
'dfl': 0.5},
|
||||
logits_ld_distill=False,
|
||||
logits_ld_params={'weight': 20000,
|
||||
'T': 10},
|
||||
feat_distill=True,
|
||||
feat_distiller='fgd',
|
||||
feat_distill_place='neck_feats',
|
||||
teacher_width_mult=1.0, # L
|
||||
student_width_mult=0.75, # M
|
||||
feat_out_channels=[768, 384, 192]):
|
||||
super(DistillPPYOLOELoss, self).__init__()
|
||||
self.loss_weight_logits = loss_weight['logits']
|
||||
self.loss_weight_feat = loss_weight['feat']
|
||||
self.logits_distill = logits_distill
|
||||
self.logits_ld_distill = logits_ld_distill
|
||||
self.feat_distill = feat_distill
|
||||
|
||||
if logits_distill and self.loss_weight_logits > 0:
|
||||
self.bbox_loss_weight = logits_loss_weight['iou']
|
||||
self.dfl_loss_weight = logits_loss_weight['dfl']
|
||||
self.qfl_loss_weight = logits_loss_weight['class']
|
||||
self.loss_bbox = GIoULoss()
|
||||
|
||||
if logits_ld_distill:
|
||||
self.loss_kd = KnowledgeDistillationKLDivLoss(
|
||||
loss_weight=logits_ld_params['weight'], T=logits_ld_params['T'])
|
||||
|
||||
if feat_distill and self.loss_weight_feat > 0:
|
||||
assert feat_distiller in ['cwd', 'fgd', 'pkd', 'mgd', 'mimic']
|
||||
assert feat_distill_place in ['backbone_feats', 'neck_feats']
|
||||
self.feat_distill_place = feat_distill_place
|
||||
self.t_channel_list = [
|
||||
int(c * teacher_width_mult) for c in feat_out_channels
|
||||
]
|
||||
self.s_channel_list = [
|
||||
int(c * student_width_mult) for c in feat_out_channels
|
||||
]
|
||||
self.distill_feat_loss_modules = []
|
||||
for i in range(len(feat_out_channels)):
|
||||
if feat_distiller == 'cwd':
|
||||
feat_loss_module = CWDFeatureLoss(
|
||||
student_channels=self.s_channel_list[i],
|
||||
teacher_channels=self.t_channel_list[i],
|
||||
normalize=True)
|
||||
elif feat_distiller == 'fgd':
|
||||
feat_loss_module = FGDFeatureLoss(
|
||||
student_channels=self.s_channel_list[i],
|
||||
teacher_channels=self.t_channel_list[i],
|
||||
normalize=True,
|
||||
alpha_fgd=0.00001,
|
||||
beta_fgd=0.000005,
|
||||
gamma_fgd=0.00001,
|
||||
lambda_fgd=0.00000005)
|
||||
elif feat_distiller == 'pkd':
|
||||
feat_loss_module = PKDFeatureLoss(
|
||||
student_channels=self.s_channel_list[i],
|
||||
teacher_channels=self.t_channel_list[i],
|
||||
normalize=True,
|
||||
resize_stu=True)
|
||||
elif feat_distiller == 'mgd':
|
||||
feat_loss_module = MGDFeatureLoss(
|
||||
student_channels=self.s_channel_list[i],
|
||||
teacher_channels=self.t_channel_list[i],
|
||||
normalize=True,
|
||||
loss_func='ssim')
|
||||
elif feat_distiller == 'mimic':
|
||||
feat_loss_module = MimicFeatureLoss(
|
||||
student_channels=self.s_channel_list[i],
|
||||
teacher_channels=self.t_channel_list[i],
|
||||
normalize=True)
|
||||
else:
|
||||
raise ValueError
|
||||
self.distill_feat_loss_modules.append(feat_loss_module)
|
||||
|
||||
def quality_focal_loss(self,
|
||||
pred_logits,
|
||||
soft_target_logits,
|
||||
beta=2.0,
|
||||
use_sigmoid=False,
|
||||
num_total_pos=None):
|
||||
if use_sigmoid:
|
||||
func = F.binary_cross_entropy_with_logits
|
||||
soft_target = F.sigmoid(soft_target_logits)
|
||||
pred_sigmoid = F.sigmoid(pred_logits)
|
||||
preds = pred_logits
|
||||
else:
|
||||
func = F.binary_cross_entropy
|
||||
soft_target = soft_target_logits
|
||||
pred_sigmoid = pred_logits
|
||||
preds = pred_sigmoid
|
||||
|
||||
scale_factor = pred_sigmoid - soft_target
|
||||
loss = func(
|
||||
preds, soft_target, reduction='none') * scale_factor.abs().pow(beta)
|
||||
loss = loss.sum(1)
|
||||
|
||||
if num_total_pos is not None:
|
||||
loss = loss.sum() / num_total_pos
|
||||
else:
|
||||
loss = loss.mean()
|
||||
return loss
|
||||
|
||||
def bbox_loss(self, s_bbox, t_bbox, weight_targets=None):
|
||||
# [x,y,w,h]
|
||||
if weight_targets is not None:
|
||||
loss = paddle.sum(self.loss_bbox(s_bbox, t_bbox) * weight_targets)
|
||||
avg_factor = weight_targets.sum()
|
||||
loss = loss / avg_factor
|
||||
else:
|
||||
loss = paddle.mean(self.loss_bbox(s_bbox, t_bbox))
|
||||
return loss
|
||||
|
||||
def distribution_focal_loss(self,
|
||||
pred_corners,
|
||||
target_corners,
|
||||
weight_targets=None):
|
||||
target_corners_label = F.softmax(target_corners, axis=-1)
|
||||
loss_dfl = F.cross_entropy(
|
||||
pred_corners,
|
||||
target_corners_label,
|
||||
soft_label=True,
|
||||
reduction='none')
|
||||
loss_dfl = loss_dfl.sum(1)
|
||||
|
||||
if weight_targets is not None:
|
||||
loss_dfl = loss_dfl * (weight_targets.expand([-1, 4]).reshape([-1]))
|
||||
loss_dfl = loss_dfl.sum(-1) / weight_targets.sum()
|
||||
else:
|
||||
loss_dfl = loss_dfl.mean(-1)
|
||||
return loss_dfl / 4.0 # 4 direction
|
||||
|
||||
def main_kd(self, mask_positive, pred_scores, soft_cls, num_classes):
|
||||
num_pos = mask_positive.sum()
|
||||
if num_pos > 0:
|
||||
cls_mask = mask_positive.unsqueeze(-1).tile([1, 1, num_classes])
|
||||
pred_scores_pos = paddle.masked_select(
|
||||
pred_scores, cls_mask).reshape([-1, num_classes])
|
||||
soft_cls_pos = paddle.masked_select(
|
||||
soft_cls, cls_mask).reshape([-1, num_classes])
|
||||
loss_kd = self.loss_kd(
|
||||
pred_scores_pos, soft_cls_pos, avg_factor=num_pos)
|
||||
else:
|
||||
loss_kd = paddle.zeros([1])
|
||||
return loss_kd
|
||||
|
||||
def forward(self, teacher_model, student_model):
|
||||
teacher_distill_pairs = teacher_model.yolo_head.distill_pairs
|
||||
student_distill_pairs = student_model.yolo_head.distill_pairs
|
||||
if self.logits_distill and self.loss_weight_logits > 0:
|
||||
distill_bbox_loss, distill_dfl_loss, distill_cls_loss = [], [], []
|
||||
|
||||
distill_cls_loss.append(
|
||||
self.quality_focal_loss(
|
||||
student_distill_pairs['pred_cls_scores'].reshape(
|
||||
(-1, student_distill_pairs['pred_cls_scores'].shape[-1]
|
||||
)),
|
||||
teacher_distill_pairs['pred_cls_scores'].detach().reshape(
|
||||
(-1, teacher_distill_pairs['pred_cls_scores'].shape[-1]
|
||||
)),
|
||||
num_total_pos=student_distill_pairs['pos_num'],
|
||||
use_sigmoid=False))
|
||||
|
||||
distill_bbox_loss.append(
|
||||
self.bbox_loss(student_distill_pairs['pred_bboxes_pos'],
|
||||
teacher_distill_pairs['pred_bboxes_pos'].detach(),
|
||||
weight_targets=student_distill_pairs['bbox_weight']
|
||||
) if 'pred_bboxes_pos' in student_distill_pairs and \
|
||||
'pred_bboxes_pos' in teacher_distill_pairs and \
|
||||
'bbox_weight' in student_distill_pairs
|
||||
else paddle.zeros([1]))
|
||||
|
||||
distill_dfl_loss.append(
|
||||
self.distribution_focal_loss(
|
||||
student_distill_pairs['pred_dist_pos'].reshape((-1, student_distill_pairs['pred_dist_pos'].shape[-1])),
|
||||
teacher_distill_pairs['pred_dist_pos'].detach().reshape((-1, teacher_distill_pairs['pred_dist_pos'].shape[-1])), \
|
||||
weight_targets=student_distill_pairs['bbox_weight']
|
||||
) if 'pred_dist_pos' in student_distill_pairs and \
|
||||
'pred_dist_pos' in teacher_distill_pairs and \
|
||||
'bbox_weight' in student_distill_pairs
|
||||
else paddle.zeros([1]))
|
||||
|
||||
distill_cls_loss = paddle.add_n(distill_cls_loss)
|
||||
distill_bbox_loss = paddle.add_n(distill_bbox_loss)
|
||||
distill_dfl_loss = paddle.add_n(distill_dfl_loss)
|
||||
logits_loss = distill_bbox_loss * self.bbox_loss_weight + distill_cls_loss * self.qfl_loss_weight + distill_dfl_loss * self.dfl_loss_weight
|
||||
|
||||
if self.logits_ld_distill:
|
||||
loss_kd = self.main_kd(
|
||||
student_distill_pairs['mask_positive_select'],
|
||||
student_distill_pairs['pred_cls_scores'],
|
||||
teacher_distill_pairs['pred_cls_scores'],
|
||||
student_model.yolo_head.num_classes, )
|
||||
logits_loss += loss_kd
|
||||
else:
|
||||
logits_loss = paddle.zeros([1])
|
||||
|
||||
if self.feat_distill and self.loss_weight_feat > 0:
|
||||
feat_loss_list = []
|
||||
inputs = student_model.inputs
|
||||
assert 'gt_bbox' in inputs
|
||||
assert self.feat_distill_place in student_distill_pairs
|
||||
assert self.feat_distill_place in teacher_distill_pairs
|
||||
stu_feats = student_distill_pairs[self.feat_distill_place]
|
||||
tea_feats = teacher_distill_pairs[self.feat_distill_place]
|
||||
for i, loss_module in enumerate(self.distill_feat_loss_modules):
|
||||
feat_loss_list.append(
|
||||
loss_module(stu_feats[i], tea_feats[i], inputs))
|
||||
feat_loss = paddle.add_n(feat_loss_list)
|
||||
else:
|
||||
feat_loss = paddle.zeros([1])
|
||||
|
||||
student_model.yolo_head.distill_pairs.clear()
|
||||
teacher_model.yolo_head.distill_pairs.clear()
|
||||
return logits_loss * self.loss_weight_logits, feat_loss * self.loss_weight_feat
|
||||
|
||||
|
||||
@register
|
||||
class CWDFeatureLoss(nn.Layer):
|
||||
def __init__(self,
|
||||
student_channels,
|
||||
teacher_channels,
|
||||
normalize=False,
|
||||
tau=1.0,
|
||||
weight=1.0):
|
||||
super(CWDFeatureLoss, self).__init__()
|
||||
self.normalize = normalize
|
||||
self.tau = tau
|
||||
self.loss_weight = weight
|
||||
|
||||
if student_channels != teacher_channels:
|
||||
self.align = nn.Conv2D(
|
||||
student_channels,
|
||||
teacher_channels,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0)
|
||||
else:
|
||||
self.align = None
|
||||
|
||||
def distill_softmax(self, x, tau):
|
||||
_, _, w, h = paddle.shape(x)
|
||||
x = paddle.reshape(x, [-1, w * h])
|
||||
x /= tau
|
||||
return F.softmax(x, axis=1)
|
||||
|
||||
def forward(self, preds_s, preds_t, inputs=None):
|
||||
assert preds_s.shape[-2:] == preds_t.shape[-2:]
|
||||
N, C, H, W = preds_s.shape
|
||||
eps = 1e-5
|
||||
if self.align is not None:
|
||||
preds_s = self.align(preds_s)
|
||||
|
||||
if self.normalize:
|
||||
preds_s = feature_norm(preds_s)
|
||||
preds_t = feature_norm(preds_t)
|
||||
|
||||
softmax_pred_s = self.distill_softmax(preds_s, self.tau)
|
||||
softmax_pred_t = self.distill_softmax(preds_t, self.tau)
|
||||
|
||||
loss = paddle.sum(-softmax_pred_t * paddle.log(eps + softmax_pred_s) +
|
||||
softmax_pred_t * paddle.log(eps + softmax_pred_t))
|
||||
return self.loss_weight * loss / (C * N)
|
||||
|
||||
|
||||
@register
|
||||
class FGDFeatureLoss(nn.Layer):
|
||||
"""
|
||||
Focal and Global Knowledge Distillation for Detectors
|
||||
The code is reference from https://github.com/yzd-v/FGD/blob/master/mmdet/distillation/losses/fgd.py
|
||||
|
||||
Args:
|
||||
student_channels (int): The number of channels in the student's FPN feature map. Default to 256.
|
||||
teacher_channels (int): The number of channels in the teacher's FPN feature map. Default to 256.
|
||||
normalize (bool): Whether to normalize the feature maps.
|
||||
temp (float, optional): The temperature coefficient. Defaults to 0.5.
|
||||
alpha_fgd (float, optional): The weight of fg_loss. Defaults to 0.001
|
||||
beta_fgd (float, optional): The weight of bg_loss. Defaults to 0.0005
|
||||
gamma_fgd (float, optional): The weight of mask_loss. Defaults to 0.001
|
||||
lambda_fgd (float, optional): The weight of relation_loss. Defaults to 0.000005
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
student_channels,
|
||||
teacher_channels,
|
||||
normalize=False,
|
||||
loss_weight=1.0,
|
||||
temp=0.5,
|
||||
alpha_fgd=0.001,
|
||||
beta_fgd=0.0005,
|
||||
gamma_fgd=0.001,
|
||||
lambda_fgd=0.000005):
|
||||
super(FGDFeatureLoss, self).__init__()
|
||||
self.normalize = normalize
|
||||
self.loss_weight = loss_weight
|
||||
self.temp = temp
|
||||
self.alpha_fgd = alpha_fgd
|
||||
self.beta_fgd = beta_fgd
|
||||
self.gamma_fgd = gamma_fgd
|
||||
self.lambda_fgd = lambda_fgd
|
||||
kaiming_init = parameter_init("kaiming")
|
||||
zeros_init = parameter_init("constant", 0.0)
|
||||
|
||||
if student_channels != teacher_channels:
|
||||
self.align = nn.Conv2D(
|
||||
student_channels,
|
||||
teacher_channels,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
weight_attr=kaiming_init)
|
||||
student_channels = teacher_channels
|
||||
else:
|
||||
self.align = None
|
||||
|
||||
self.conv_mask_s = nn.Conv2D(
|
||||
student_channels, 1, kernel_size=1, weight_attr=kaiming_init)
|
||||
self.conv_mask_t = nn.Conv2D(
|
||||
teacher_channels, 1, kernel_size=1, weight_attr=kaiming_init)
|
||||
|
||||
self.stu_conv_block = nn.Sequential(
|
||||
nn.Conv2D(
|
||||
student_channels,
|
||||
student_channels // 2,
|
||||
kernel_size=1,
|
||||
weight_attr=zeros_init),
|
||||
nn.LayerNorm([student_channels // 2, 1, 1]),
|
||||
nn.ReLU(),
|
||||
nn.Conv2D(
|
||||
student_channels // 2,
|
||||
student_channels,
|
||||
kernel_size=1,
|
||||
weight_attr=zeros_init))
|
||||
self.tea_conv_block = nn.Sequential(
|
||||
nn.Conv2D(
|
||||
teacher_channels,
|
||||
teacher_channels // 2,
|
||||
kernel_size=1,
|
||||
weight_attr=zeros_init),
|
||||
nn.LayerNorm([teacher_channels // 2, 1, 1]),
|
||||
nn.ReLU(),
|
||||
nn.Conv2D(
|
||||
teacher_channels // 2,
|
||||
teacher_channels,
|
||||
kernel_size=1,
|
||||
weight_attr=zeros_init))
|
||||
|
||||
def spatial_channel_attention(self, x, t=0.5):
|
||||
shape = paddle.shape(x)
|
||||
N, C, H, W = shape
|
||||
_f = paddle.abs(x)
|
||||
spatial_map = paddle.reshape(
|
||||
paddle.mean(
|
||||
_f, axis=1, keepdim=True) / t, [N, -1])
|
||||
spatial_map = F.softmax(spatial_map, axis=1, dtype="float32") * H * W
|
||||
spatial_att = paddle.reshape(spatial_map, [N, H, W])
|
||||
|
||||
channel_map = paddle.mean(
|
||||
paddle.mean(
|
||||
_f, axis=2, keepdim=False), axis=2, keepdim=False)
|
||||
channel_att = F.softmax(channel_map / t, axis=1, dtype="float32") * C
|
||||
return [spatial_att, channel_att]
|
||||
|
||||
def spatial_pool(self, x, mode="teacher"):
|
||||
batch, channel, width, height = x.shape
|
||||
x_copy = x
|
||||
x_copy = paddle.reshape(x_copy, [batch, channel, height * width])
|
||||
x_copy = x_copy.unsqueeze(1)
|
||||
if mode.lower() == "student":
|
||||
context_mask = self.conv_mask_s(x)
|
||||
else:
|
||||
context_mask = self.conv_mask_t(x)
|
||||
|
||||
context_mask = paddle.reshape(context_mask, [batch, 1, height * width])
|
||||
context_mask = F.softmax(context_mask, axis=2)
|
||||
context_mask = context_mask.unsqueeze(-1)
|
||||
context = paddle.matmul(x_copy, context_mask)
|
||||
context = paddle.reshape(context, [batch, channel, 1, 1])
|
||||
return context
|
||||
|
||||
def mask_loss(self, stu_channel_att, tea_channel_att, stu_spatial_att,
|
||||
tea_spatial_att):
|
||||
def _func(a, b):
|
||||
return paddle.sum(paddle.abs(a - b)) / len(a)
|
||||
|
||||
mask_loss = _func(stu_channel_att, tea_channel_att) + _func(
|
||||
stu_spatial_att, tea_spatial_att)
|
||||
return mask_loss
|
||||
|
||||
def feature_loss(self, stu_feature, tea_feature, mask_fg, mask_bg,
|
||||
tea_channel_att, tea_spatial_att):
|
||||
mask_fg = mask_fg.unsqueeze(axis=1)
|
||||
mask_bg = mask_bg.unsqueeze(axis=1)
|
||||
tea_channel_att = tea_channel_att.unsqueeze(axis=-1).unsqueeze(axis=-1)
|
||||
tea_spatial_att = tea_spatial_att.unsqueeze(axis=1)
|
||||
|
||||
fea_t = paddle.multiply(tea_feature, paddle.sqrt(tea_spatial_att))
|
||||
fea_t = paddle.multiply(fea_t, paddle.sqrt(tea_channel_att))
|
||||
fg_fea_t = paddle.multiply(fea_t, paddle.sqrt(mask_fg))
|
||||
bg_fea_t = paddle.multiply(fea_t, paddle.sqrt(mask_bg))
|
||||
|
||||
fea_s = paddle.multiply(stu_feature, paddle.sqrt(tea_spatial_att))
|
||||
fea_s = paddle.multiply(fea_s, paddle.sqrt(tea_channel_att))
|
||||
fg_fea_s = paddle.multiply(fea_s, paddle.sqrt(mask_fg))
|
||||
bg_fea_s = paddle.multiply(fea_s, paddle.sqrt(mask_bg))
|
||||
|
||||
fg_loss = F.mse_loss(fg_fea_s, fg_fea_t, reduction="sum") / len(mask_fg)
|
||||
bg_loss = F.mse_loss(bg_fea_s, bg_fea_t, reduction="sum") / len(mask_bg)
|
||||
return fg_loss, bg_loss
|
||||
|
||||
def relation_loss(self, stu_feature, tea_feature):
|
||||
context_s = self.spatial_pool(stu_feature, "student")
|
||||
context_t = self.spatial_pool(tea_feature, "teacher")
|
||||
out_s = stu_feature + self.stu_conv_block(context_s)
|
||||
out_t = tea_feature + self.tea_conv_block(context_t)
|
||||
rela_loss = F.mse_loss(out_s, out_t, reduction="sum") / len(out_s)
|
||||
return rela_loss
|
||||
|
||||
def mask_value(self, mask, xl, xr, yl, yr, value):
|
||||
mask[xl:xr, yl:yr] = paddle.maximum(mask[xl:xr, yl:yr], value)
|
||||
return mask
|
||||
|
||||
def forward(self, stu_feature, tea_feature, inputs):
|
||||
assert stu_feature.shape[-2:] == stu_feature.shape[-2:]
|
||||
assert "gt_bbox" in inputs.keys() and "im_shape" in inputs.keys()
|
||||
gt_bboxes = inputs['gt_bbox']
|
||||
ins_shape = [
|
||||
inputs['im_shape'][i] for i in range(inputs['im_shape'].shape[0])
|
||||
]
|
||||
index_gt = []
|
||||
for i in range(len(gt_bboxes)):
|
||||
if gt_bboxes[i].size > 2:
|
||||
index_gt.append(i)
|
||||
# only distill feature with labeled GTbox
|
||||
if len(index_gt) != len(gt_bboxes):
|
||||
index_gt_t = paddle.to_tensor(index_gt)
|
||||
stu_feature = paddle.index_select(stu_feature, index_gt_t)
|
||||
tea_feature = paddle.index_select(tea_feature, index_gt_t)
|
||||
|
||||
ins_shape = [ins_shape[c] for c in index_gt]
|
||||
gt_bboxes = [gt_bboxes[c] for c in index_gt]
|
||||
assert len(gt_bboxes) == tea_feature.shape[0]
|
||||
|
||||
if self.align is not None:
|
||||
stu_feature = self.align(stu_feature)
|
||||
|
||||
if self.normalize:
|
||||
stu_feature = feature_norm(stu_feature)
|
||||
tea_feature = feature_norm(tea_feature)
|
||||
|
||||
tea_spatial_att, tea_channel_att = self.spatial_channel_attention(
|
||||
tea_feature, self.temp)
|
||||
stu_spatial_att, stu_channel_att = self.spatial_channel_attention(
|
||||
stu_feature, self.temp)
|
||||
|
||||
mask_fg = paddle.zeros(tea_spatial_att.shape)
|
||||
mask_bg = paddle.ones_like(tea_spatial_att)
|
||||
one_tmp = paddle.ones([*tea_spatial_att.shape[1:]])
|
||||
zero_tmp = paddle.zeros([*tea_spatial_att.shape[1:]])
|
||||
mask_fg.stop_gradient = True
|
||||
mask_bg.stop_gradient = True
|
||||
one_tmp.stop_gradient = True
|
||||
zero_tmp.stop_gradient = True
|
||||
|
||||
wmin, wmax, hmin, hmax = [], [], [], []
|
||||
|
||||
if len(gt_bboxes) == 0:
|
||||
loss = self.relation_loss(stu_feature, tea_feature)
|
||||
return self.lambda_fgd * loss
|
||||
|
||||
N, _, H, W = stu_feature.shape
|
||||
for i in range(N):
|
||||
tmp_box = paddle.ones_like(gt_bboxes[i])
|
||||
tmp_box.stop_gradient = True
|
||||
tmp_box[:, 0] = gt_bboxes[i][:, 0] / ins_shape[i][1] * W
|
||||
tmp_box[:, 2] = gt_bboxes[i][:, 2] / ins_shape[i][1] * W
|
||||
tmp_box[:, 1] = gt_bboxes[i][:, 1] / ins_shape[i][0] * H
|
||||
tmp_box[:, 3] = gt_bboxes[i][:, 3] / ins_shape[i][0] * H
|
||||
|
||||
zero = paddle.zeros_like(tmp_box[:, 0], dtype="int32")
|
||||
ones = paddle.ones_like(tmp_box[:, 2], dtype="int32")
|
||||
zero.stop_gradient = True
|
||||
ones.stop_gradient = True
|
||||
wmin.append(
|
||||
paddle.cast(paddle.floor(tmp_box[:, 0]), "int32").maximum(zero))
|
||||
wmax.append(paddle.cast(paddle.ceil(tmp_box[:, 2]), "int32"))
|
||||
hmin.append(
|
||||
paddle.cast(paddle.floor(tmp_box[:, 1]), "int32").maximum(zero))
|
||||
hmax.append(paddle.cast(paddle.ceil(tmp_box[:, 3]), "int32"))
|
||||
|
||||
area_recip = 1.0 / (
|
||||
hmax[i].reshape([1, -1]) + 1 - hmin[i].reshape([1, -1])) / (
|
||||
wmax[i].reshape([1, -1]) + 1 - wmin[i].reshape([1, -1]))
|
||||
|
||||
for j in range(len(gt_bboxes[i])):
|
||||
if gt_bboxes[i][j].sum() > 0:
|
||||
mask_fg[i] = self.mask_value(
|
||||
mask_fg[i], hmin[i][j], hmax[i][j] + 1, wmin[i][j],
|
||||
wmax[i][j] + 1, area_recip[0][j])
|
||||
|
||||
mask_bg[i] = paddle.where(mask_fg[i] > zero_tmp, zero_tmp, one_tmp)
|
||||
|
||||
if paddle.sum(mask_bg[i]):
|
||||
mask_bg[i] /= paddle.sum(mask_bg[i])
|
||||
|
||||
fg_loss, bg_loss = self.feature_loss(stu_feature, tea_feature, mask_fg,
|
||||
mask_bg, tea_channel_att,
|
||||
tea_spatial_att)
|
||||
mask_loss = self.mask_loss(stu_channel_att, tea_channel_att,
|
||||
stu_spatial_att, tea_spatial_att)
|
||||
rela_loss = self.relation_loss(stu_feature, tea_feature)
|
||||
loss = self.alpha_fgd * fg_loss + self.beta_fgd * bg_loss \
|
||||
+ self.gamma_fgd * mask_loss + self.lambda_fgd * rela_loss
|
||||
return loss * self.loss_weight
|
||||
|
||||
|
||||
@register
|
||||
class PKDFeatureLoss(nn.Layer):
|
||||
"""
|
||||
PKD: General Distillation Framework for Object Detectors via Pearson Correlation Coefficient.
|
||||
|
||||
Args:
|
||||
loss_weight (float): Weight of loss. Defaults to 1.0.
|
||||
resize_stu (bool): If True, we'll down/up sample the features of the
|
||||
student model to the spatial size of those of the teacher model if
|
||||
their spatial sizes are different. And vice versa. Defaults to
|
||||
True.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
student_channels=256,
|
||||
teacher_channels=256,
|
||||
normalize=True,
|
||||
loss_weight=1.0,
|
||||
resize_stu=True):
|
||||
super(PKDFeatureLoss, self).__init__()
|
||||
self.normalize = normalize
|
||||
self.loss_weight = loss_weight
|
||||
self.resize_stu = resize_stu
|
||||
|
||||
def forward(self, stu_feature, tea_feature, inputs=None):
|
||||
size_s, size_t = stu_feature.shape[2:], tea_feature.shape[2:]
|
||||
if size_s[0] != size_t[0]:
|
||||
if self.resize_stu:
|
||||
stu_feature = F.interpolate(
|
||||
stu_feature, size_t, mode='bilinear')
|
||||
else:
|
||||
tea_feature = F.interpolate(
|
||||
tea_feature, size_s, mode='bilinear')
|
||||
assert stu_feature.shape == tea_feature.shape
|
||||
|
||||
if self.normalize:
|
||||
stu_feature = feature_norm(stu_feature)
|
||||
tea_feature = feature_norm(tea_feature)
|
||||
|
||||
loss = F.mse_loss(stu_feature, tea_feature) / 2
|
||||
return loss * self.loss_weight
|
||||
|
||||
|
||||
@register
|
||||
class MimicFeatureLoss(nn.Layer):
|
||||
def __init__(self,
|
||||
student_channels=256,
|
||||
teacher_channels=256,
|
||||
normalize=True,
|
||||
loss_weight=1.0):
|
||||
super(MimicFeatureLoss, self).__init__()
|
||||
self.normalize = normalize
|
||||
self.loss_weight = loss_weight
|
||||
self.mse_loss = nn.MSELoss()
|
||||
|
||||
if student_channels != teacher_channels:
|
||||
self.align = nn.Conv2D(
|
||||
student_channels,
|
||||
teacher_channels,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0)
|
||||
else:
|
||||
self.align = None
|
||||
|
||||
def forward(self, stu_feature, tea_feature, inputs=None):
|
||||
if self.align is not None:
|
||||
stu_feature = self.align(stu_feature)
|
||||
|
||||
if self.normalize:
|
||||
stu_feature = feature_norm(stu_feature)
|
||||
tea_feature = feature_norm(tea_feature)
|
||||
|
||||
loss = self.mse_loss(stu_feature, tea_feature)
|
||||
return loss * self.loss_weight
|
||||
|
||||
|
||||
@register
|
||||
class MGDFeatureLoss(nn.Layer):
|
||||
def __init__(self,
|
||||
student_channels=256,
|
||||
teacher_channels=256,
|
||||
normalize=True,
|
||||
loss_weight=1.0,
|
||||
loss_func='mse'):
|
||||
super(MGDFeatureLoss, self).__init__()
|
||||
self.normalize = normalize
|
||||
self.loss_weight = loss_weight
|
||||
assert loss_func in ['mse', 'ssim']
|
||||
self.loss_func = loss_func
|
||||
self.mse_loss = nn.MSELoss(reduction='sum')
|
||||
self.ssim_loss = SSIM(11)
|
||||
|
||||
kaiming_init = parameter_init("kaiming")
|
||||
if student_channels != teacher_channels:
|
||||
self.align = nn.Conv2D(
|
||||
student_channels,
|
||||
teacher_channels,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
weight_attr=kaiming_init,
|
||||
bias_attr=False)
|
||||
else:
|
||||
self.align = None
|
||||
|
||||
self.generation = nn.Sequential(
|
||||
nn.Conv2D(
|
||||
teacher_channels, teacher_channels, kernel_size=3, padding=1),
|
||||
nn.ReLU(),
|
||||
nn.Conv2D(
|
||||
teacher_channels, teacher_channels, kernel_size=3, padding=1))
|
||||
|
||||
def forward(self, stu_feature, tea_feature, inputs=None):
|
||||
N = stu_feature.shape[0]
|
||||
if self.align is not None:
|
||||
stu_feature = self.align(stu_feature)
|
||||
stu_feature = self.generation(stu_feature)
|
||||
|
||||
if self.normalize:
|
||||
stu_feature = feature_norm(stu_feature)
|
||||
tea_feature = feature_norm(tea_feature)
|
||||
|
||||
if self.loss_func == 'mse':
|
||||
loss = self.mse_loss(stu_feature, tea_feature) / N
|
||||
elif self.loss_func == 'ssim':
|
||||
ssim_loss = self.ssim_loss(stu_feature, tea_feature)
|
||||
loss = paddle.clip((1 - ssim_loss) / 2, 0, 1)
|
||||
else:
|
||||
raise ValueError
|
||||
return loss * self.loss_weight
|
||||
|
||||
|
||||
class SSIM(nn.Layer):
|
||||
def __init__(self, window_size=11, size_average=True):
|
||||
super(SSIM, self).__init__()
|
||||
self.window_size = window_size
|
||||
self.size_average = size_average
|
||||
self.channel = 1
|
||||
self.window = self.create_window(window_size, self.channel)
|
||||
|
||||
def gaussian(self, window_size, sigma):
|
||||
gauss = paddle.to_tensor([
|
||||
math.exp(-(x - window_size // 2)**2 / float(2 * sigma**2))
|
||||
for x in range(window_size)
|
||||
])
|
||||
return gauss / gauss.sum()
|
||||
|
||||
def create_window(self, window_size, channel):
|
||||
_1D_window = self.gaussian(window_size, 1.5).unsqueeze(1)
|
||||
_2D_window = _1D_window.mm(_1D_window.t()).unsqueeze(0).unsqueeze(0)
|
||||
window = _2D_window.expand([channel, 1, window_size, window_size])
|
||||
return window
|
||||
|
||||
def _ssim(self, img1, img2, window, window_size, channel,
|
||||
size_average=True):
|
||||
mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel)
|
||||
mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel)
|
||||
mu1_sq = mu1.pow(2)
|
||||
mu2_sq = mu2.pow(2)
|
||||
mu1_mu2 = mu1 * mu2
|
||||
|
||||
sigma1_sq = F.conv2d(
|
||||
img1 * img1, window, padding=window_size // 2,
|
||||
groups=channel) - mu1_sq
|
||||
sigma2_sq = F.conv2d(
|
||||
img2 * img2, window, padding=window_size // 2,
|
||||
groups=channel) - mu2_sq
|
||||
sigma12 = F.conv2d(
|
||||
img1 * img2, window, padding=window_size // 2,
|
||||
groups=channel) - mu1_mu2
|
||||
|
||||
C1 = 0.01**2
|
||||
C2 = 0.03**2
|
||||
ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / (
|
||||
1e-12 + (mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
|
||||
|
||||
if size_average:
|
||||
return ssim_map.mean()
|
||||
else:
|
||||
return ssim_map.mean([1, 2, 3])
|
||||
|
||||
def forward(self, img1, img2):
|
||||
channel = img1.shape[1]
|
||||
if channel == self.channel and self.window.dtype == img1.dtype:
|
||||
window = self.window
|
||||
else:
|
||||
window = self.create_window(self.window_size, channel)
|
||||
self.window = window
|
||||
self.channel = channel
|
||||
|
||||
return self._ssim(img1, img2, window, self.window_size, channel,
|
||||
self.size_average)
|
||||
352
paddle_detection/ppdet/slim/distill_model.py
Normal file
352
paddle_detection/ppdet/slim/distill_model.py
Normal file
@@ -0,0 +1,352 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
|
||||
from ppdet.core.workspace import register, create, load_config
|
||||
from ppdet.utils.checkpoint import load_pretrain_weight
|
||||
from ppdet.utils.logger import setup_logger
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
__all__ = [
|
||||
'DistillModel',
|
||||
'FGDDistillModel',
|
||||
'CWDDistillModel',
|
||||
'LDDistillModel',
|
||||
'PPYOLOEDistillModel',
|
||||
]
|
||||
|
||||
|
||||
@register
|
||||
class DistillModel(nn.Layer):
|
||||
"""
|
||||
Build common distill model.
|
||||
Args:
|
||||
cfg: The student config.
|
||||
slim_cfg: The teacher and distill config.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg, slim_cfg):
|
||||
super(DistillModel, self).__init__()
|
||||
self.arch = cfg.architecture
|
||||
|
||||
self.stu_cfg = cfg
|
||||
self.student_model = create(self.stu_cfg.architecture)
|
||||
if 'pretrain_weights' in self.stu_cfg and self.stu_cfg.pretrain_weights:
|
||||
stu_pretrain = self.stu_cfg.pretrain_weights
|
||||
else:
|
||||
stu_pretrain = None
|
||||
|
||||
slim_cfg = load_config(slim_cfg)
|
||||
self.tea_cfg = slim_cfg
|
||||
self.teacher_model = create(self.tea_cfg.architecture)
|
||||
if 'pretrain_weights' in self.tea_cfg and self.tea_cfg.pretrain_weights:
|
||||
tea_pretrain = self.tea_cfg.pretrain_weights
|
||||
else:
|
||||
tea_pretrain = None
|
||||
self.distill_cfg = slim_cfg
|
||||
|
||||
# load pretrain weights
|
||||
self.is_inherit = False
|
||||
if stu_pretrain:
|
||||
if self.is_inherit and tea_pretrain:
|
||||
load_pretrain_weight(self.student_model, tea_pretrain)
|
||||
logger.debug(
|
||||
"Inheriting! loading teacher weights to student model!")
|
||||
load_pretrain_weight(self.student_model, stu_pretrain)
|
||||
logger.info("Student model has loaded pretrain weights!")
|
||||
if tea_pretrain:
|
||||
load_pretrain_weight(self.teacher_model, tea_pretrain)
|
||||
logger.info("Teacher model has loaded pretrain weights!")
|
||||
|
||||
self.teacher_model.eval()
|
||||
for param in self.teacher_model.parameters():
|
||||
param.trainable = False
|
||||
|
||||
self.distill_loss = self.build_loss(self.distill_cfg)
|
||||
|
||||
def build_loss(self, distill_cfg):
|
||||
if 'distill_loss' in distill_cfg and distill_cfg.distill_loss:
|
||||
return create(distill_cfg.distill_loss)
|
||||
else:
|
||||
return None
|
||||
|
||||
def parameters(self):
|
||||
return self.student_model.parameters()
|
||||
|
||||
def forward(self, inputs):
|
||||
if self.training:
|
||||
student_loss = self.student_model(inputs)
|
||||
with paddle.no_grad():
|
||||
teacher_loss = self.teacher_model(inputs)
|
||||
|
||||
loss = self.distill_loss(self.teacher_model, self.student_model)
|
||||
student_loss['distill_loss'] = loss
|
||||
student_loss['teacher_loss'] = teacher_loss['loss']
|
||||
student_loss['loss'] += student_loss['distill_loss']
|
||||
return student_loss
|
||||
else:
|
||||
return self.student_model(inputs)
|
||||
|
||||
|
||||
@register
|
||||
class FGDDistillModel(DistillModel):
|
||||
"""
|
||||
Build FGD distill model.
|
||||
Args:
|
||||
cfg: The student config.
|
||||
slim_cfg: The teacher and distill config.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg, slim_cfg):
|
||||
super(FGDDistillModel, self).__init__(cfg=cfg, slim_cfg=slim_cfg)
|
||||
assert self.arch in ['RetinaNet', 'PicoDet'
|
||||
], 'Unsupported arch: {}'.format(self.arch)
|
||||
self.is_inherit = True
|
||||
|
||||
def build_loss(self, distill_cfg):
|
||||
assert 'distill_loss_name' in distill_cfg and distill_cfg.distill_loss_name
|
||||
assert 'distill_loss' in distill_cfg and distill_cfg.distill_loss
|
||||
loss_func = dict()
|
||||
name_list = distill_cfg.distill_loss_name
|
||||
for name in name_list:
|
||||
loss_func[name] = create(distill_cfg.distill_loss)
|
||||
return loss_func
|
||||
|
||||
def forward(self, inputs):
|
||||
if self.training:
|
||||
s_body_feats = self.student_model.backbone(inputs)
|
||||
s_neck_feats = self.student_model.neck(s_body_feats)
|
||||
with paddle.no_grad():
|
||||
t_body_feats = self.teacher_model.backbone(inputs)
|
||||
t_neck_feats = self.teacher_model.neck(t_body_feats)
|
||||
|
||||
loss_dict = {}
|
||||
for idx, k in enumerate(self.distill_loss):
|
||||
loss_dict[k] = self.distill_loss[k](s_neck_feats[idx],
|
||||
t_neck_feats[idx], inputs)
|
||||
if self.arch == "RetinaNet":
|
||||
loss = self.student_model.head(s_neck_feats, inputs)
|
||||
elif self.arch == "PicoDet":
|
||||
head_outs = self.student_model.head(
|
||||
s_neck_feats, self.student_model.export_post_process)
|
||||
loss_gfl = self.student_model.head.get_loss(head_outs, inputs)
|
||||
total_loss = paddle.add_n(list(loss_gfl.values()))
|
||||
loss = {}
|
||||
loss.update(loss_gfl)
|
||||
loss.update({'loss': total_loss})
|
||||
else:
|
||||
raise ValueError(f"Unsupported model {self.arch}")
|
||||
|
||||
for k in loss_dict:
|
||||
loss['loss'] += loss_dict[k]
|
||||
loss[k] = loss_dict[k]
|
||||
return loss
|
||||
else:
|
||||
body_feats = self.student_model.backbone(inputs)
|
||||
neck_feats = self.student_model.neck(body_feats)
|
||||
head_outs = self.student_model.head(neck_feats)
|
||||
if self.arch == "RetinaNet":
|
||||
bbox, bbox_num = self.student_model.head.post_process(
|
||||
head_outs, inputs['im_shape'], inputs['scale_factor'])
|
||||
return {'bbox': bbox, 'bbox_num': bbox_num}
|
||||
elif self.arch == "PicoDet":
|
||||
head_outs = self.student_model.head(
|
||||
neck_feats, self.student_model.export_post_process)
|
||||
scale_factor = inputs['scale_factor']
|
||||
bboxes, bbox_num = self.student_model.head.post_process(
|
||||
head_outs,
|
||||
scale_factor,
|
||||
export_nms=self.student_model.export_nms)
|
||||
return {'bbox': bboxes, 'bbox_num': bbox_num}
|
||||
else:
|
||||
raise ValueError(f"Unsupported model {self.arch}")
|
||||
|
||||
|
||||
@register
|
||||
class CWDDistillModel(DistillModel):
|
||||
"""
|
||||
Build CWD distill model.
|
||||
Args:
|
||||
cfg: The student config.
|
||||
slim_cfg: The teacher and distill config.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg, slim_cfg):
|
||||
super(CWDDistillModel, self).__init__(cfg=cfg, slim_cfg=slim_cfg)
|
||||
assert self.arch in ['GFL', 'RetinaNet'], 'Unsupported arch: {}'.format(
|
||||
self.arch)
|
||||
|
||||
def build_loss(self, distill_cfg):
|
||||
assert 'distill_loss_name' in distill_cfg and distill_cfg.distill_loss_name
|
||||
assert 'distill_loss' in distill_cfg and distill_cfg.distill_loss
|
||||
loss_func = dict()
|
||||
name_list = distill_cfg.distill_loss_name
|
||||
for name in name_list:
|
||||
loss_func[name] = create(distill_cfg.distill_loss)
|
||||
return loss_func
|
||||
|
||||
def get_loss_retinanet(self, stu_fea_list, tea_fea_list, inputs):
|
||||
loss = self.student_model.head(stu_fea_list, inputs)
|
||||
loss_dict = {}
|
||||
for idx, k in enumerate(self.distill_loss):
|
||||
loss_dict[k] = self.distill_loss[k](stu_fea_list[idx],
|
||||
tea_fea_list[idx])
|
||||
|
||||
loss['loss'] += loss_dict[k]
|
||||
loss[k] = loss_dict[k]
|
||||
return loss
|
||||
|
||||
def get_loss_gfl(self, stu_fea_list, tea_fea_list, inputs):
|
||||
loss = {}
|
||||
head_outs = self.student_model.head(stu_fea_list)
|
||||
loss_gfl = self.student_model.head.get_loss(head_outs, inputs)
|
||||
loss.update(loss_gfl)
|
||||
total_loss = paddle.add_n(list(loss.values()))
|
||||
loss.update({'loss': total_loss})
|
||||
|
||||
feat_loss = {}
|
||||
loss_dict = {}
|
||||
s_cls_feat, t_cls_feat = [], []
|
||||
for s_neck_f, t_neck_f in zip(stu_fea_list, tea_fea_list):
|
||||
conv_cls_feat, _ = self.student_model.head.conv_feat(s_neck_f)
|
||||
cls_score = self.student_model.head.gfl_head_cls(conv_cls_feat)
|
||||
t_conv_cls_feat, _ = self.teacher_model.head.conv_feat(t_neck_f)
|
||||
t_cls_score = self.teacher_model.head.gfl_head_cls(t_conv_cls_feat)
|
||||
s_cls_feat.append(cls_score)
|
||||
t_cls_feat.append(t_cls_score)
|
||||
|
||||
for idx, k in enumerate(self.distill_loss):
|
||||
loss_dict[k] = self.distill_loss[k](s_cls_feat[idx],
|
||||
t_cls_feat[idx])
|
||||
feat_loss[f"neck_f_{idx}"] = self.distill_loss[k](stu_fea_list[idx],
|
||||
tea_fea_list[idx])
|
||||
|
||||
for k in feat_loss:
|
||||
loss['loss'] += feat_loss[k]
|
||||
loss[k] = feat_loss[k]
|
||||
|
||||
for k in loss_dict:
|
||||
loss['loss'] += loss_dict[k]
|
||||
loss[k] = loss_dict[k]
|
||||
return loss
|
||||
|
||||
def forward(self, inputs):
|
||||
if self.training:
|
||||
s_body_feats = self.student_model.backbone(inputs)
|
||||
s_neck_feats = self.student_model.neck(s_body_feats)
|
||||
with paddle.no_grad():
|
||||
t_body_feats = self.teacher_model.backbone(inputs)
|
||||
t_neck_feats = self.teacher_model.neck(t_body_feats)
|
||||
|
||||
if self.arch == "RetinaNet":
|
||||
loss = self.get_loss_retinanet(s_neck_feats, t_neck_feats,
|
||||
inputs)
|
||||
elif self.arch == "GFL":
|
||||
loss = self.get_loss_gfl(s_neck_feats, t_neck_feats, inputs)
|
||||
else:
|
||||
raise ValueError(f"unsupported arch {self.arch}")
|
||||
return loss
|
||||
else:
|
||||
body_feats = self.student_model.backbone(inputs)
|
||||
neck_feats = self.student_model.neck(body_feats)
|
||||
head_outs = self.student_model.head(neck_feats)
|
||||
if self.arch == "RetinaNet":
|
||||
bbox, bbox_num = self.student_model.head.post_process(
|
||||
head_outs, inputs['im_shape'], inputs['scale_factor'])
|
||||
return {'bbox': bbox, 'bbox_num': bbox_num}
|
||||
elif self.arch == "GFL":
|
||||
bbox_pred, bbox_num = head_outs
|
||||
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
|
||||
return output
|
||||
else:
|
||||
raise ValueError(f"unsupported arch {self.arch}")
|
||||
|
||||
|
||||
@register
|
||||
class LDDistillModel(DistillModel):
|
||||
"""
|
||||
Build LD distill model.
|
||||
Args:
|
||||
cfg: The student config.
|
||||
slim_cfg: The teacher and distill config.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg, slim_cfg):
|
||||
super(LDDistillModel, self).__init__(cfg=cfg, slim_cfg=slim_cfg)
|
||||
assert self.arch in ['GFL'], 'Unsupported arch: {}'.format(self.arch)
|
||||
|
||||
def forward(self, inputs):
|
||||
if self.training:
|
||||
s_body_feats = self.student_model.backbone(inputs)
|
||||
s_neck_feats = self.student_model.neck(s_body_feats)
|
||||
s_head_outs = self.student_model.head(s_neck_feats)
|
||||
with paddle.no_grad():
|
||||
t_body_feats = self.teacher_model.backbone(inputs)
|
||||
t_neck_feats = self.teacher_model.neck(t_body_feats)
|
||||
t_head_outs = self.teacher_model.head(t_neck_feats)
|
||||
|
||||
soft_label_list = t_head_outs[0]
|
||||
soft_targets_list = t_head_outs[1]
|
||||
student_loss = self.student_model.head.get_loss(
|
||||
s_head_outs, inputs, soft_label_list, soft_targets_list)
|
||||
total_loss = paddle.add_n(list(student_loss.values()))
|
||||
student_loss['loss'] = total_loss
|
||||
return student_loss
|
||||
else:
|
||||
return self.student_model(inputs)
|
||||
|
||||
|
||||
@register
|
||||
class PPYOLOEDistillModel(DistillModel):
|
||||
"""
|
||||
Build PPYOLOE distill model, only used in PPYOLOE
|
||||
Args:
|
||||
cfg: The student config.
|
||||
slim_cfg: The teacher and distill config.
|
||||
"""
|
||||
|
||||
def __init__(self, cfg, slim_cfg):
|
||||
super(PPYOLOEDistillModel, self).__init__(cfg=cfg, slim_cfg=slim_cfg)
|
||||
assert self.arch in ['PPYOLOE'], 'Unsupported arch: {}'.format(
|
||||
self.arch)
|
||||
|
||||
def forward(self, inputs, alpha=0.125):
|
||||
if self.training:
|
||||
with paddle.no_grad():
|
||||
teacher_loss = self.teacher_model(inputs)
|
||||
if hasattr(self.teacher_model.yolo_head, "assigned_labels"):
|
||||
self.student_model.yolo_head.assigned_labels, self.student_model.yolo_head.assigned_bboxes, self.student_model.yolo_head.assigned_scores = \
|
||||
self.teacher_model.yolo_head.assigned_labels, self.teacher_model.yolo_head.assigned_bboxes, self.teacher_model.yolo_head.assigned_scores
|
||||
delattr(self.teacher_model.yolo_head, "assigned_labels")
|
||||
delattr(self.teacher_model.yolo_head, "assigned_bboxes")
|
||||
delattr(self.teacher_model.yolo_head, "assigned_scores")
|
||||
student_loss = self.student_model(inputs)
|
||||
|
||||
logits_loss, feat_loss = self.distill_loss(self.teacher_model,
|
||||
self.student_model)
|
||||
det_total_loss = student_loss['loss']
|
||||
total_loss = alpha * (det_total_loss + logits_loss + feat_loss)
|
||||
student_loss['loss'] = total_loss
|
||||
student_loss['det_loss'] = det_total_loss
|
||||
student_loss['logits_loss'] = logits_loss
|
||||
student_loss['feat_loss'] = feat_loss
|
||||
return student_loss
|
||||
else:
|
||||
return self.student_model(inputs)
|
||||
89
paddle_detection/ppdet/slim/ofa.py
Normal file
89
paddle_detection/ppdet/slim/ofa.py
Normal file
@@ -0,0 +1,89 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
|
||||
from ppdet.core.workspace import load_config, merge_config, create
|
||||
from ppdet.utils.checkpoint import load_weight, load_pretrain_weight
|
||||
from ppdet.utils.logger import setup_logger
|
||||
from ppdet.core.workspace import register, serializable
|
||||
|
||||
from paddle.utils import try_import
|
||||
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class OFA(object):
|
||||
def __init__(self, ofa_config):
|
||||
super(OFA, self).__init__()
|
||||
self.ofa_config = ofa_config
|
||||
|
||||
def __call__(self, model, param_state_dict):
|
||||
|
||||
paddleslim = try_import('paddleslim')
|
||||
from paddleslim.nas.ofa import OFA, RunConfig, utils
|
||||
from paddleslim.nas.ofa.convert_super import Convert, supernet
|
||||
task = self.ofa_config['task']
|
||||
expand_ratio = self.ofa_config['expand_ratio']
|
||||
|
||||
skip_neck = self.ofa_config['skip_neck']
|
||||
skip_head = self.ofa_config['skip_head']
|
||||
|
||||
run_config = self.ofa_config['RunConfig']
|
||||
if 'skip_layers' in run_config:
|
||||
skip_layers = run_config['skip_layers']
|
||||
else:
|
||||
skip_layers = []
|
||||
|
||||
# supernet config
|
||||
sp_config = supernet(expand_ratio=expand_ratio)
|
||||
# convert to supernet
|
||||
model = Convert(sp_config).convert(model)
|
||||
|
||||
skip_names = []
|
||||
if skip_neck:
|
||||
skip_names.append('neck.')
|
||||
if skip_head:
|
||||
skip_names.append('head.')
|
||||
|
||||
for name, sublayer in model.named_sublayers():
|
||||
for n in skip_names:
|
||||
if n in name:
|
||||
skip_layers.append(name)
|
||||
|
||||
run_config['skip_layers'] = skip_layers
|
||||
run_config = RunConfig(**run_config)
|
||||
|
||||
# build ofa model
|
||||
ofa_model = OFA(model, run_config=run_config)
|
||||
|
||||
ofa_model.set_epoch(0)
|
||||
ofa_model.set_task(task)
|
||||
|
||||
input_spec = [{
|
||||
"image": paddle.ones(
|
||||
shape=[1, 3, 640, 640], dtype='float32'),
|
||||
"im_shape": paddle.full(
|
||||
[1, 2], 640, dtype='float32'),
|
||||
"scale_factor": paddle.ones(
|
||||
shape=[1, 2], dtype='float32')
|
||||
}]
|
||||
|
||||
ofa_model._clear_search_space(input_spec=input_spec)
|
||||
ofa_model._build_ss = True
|
||||
check_ss = ofa_model._sample_config('expand_ratio', phase=None)
|
||||
# tokenize the search space
|
||||
ofa_model.tokenize()
|
||||
# check token map, search cands and search space
|
||||
logger.info('Token map is {}'.format(ofa_model.token_map))
|
||||
logger.info('Search candidates is {}'.format(ofa_model.search_cands))
|
||||
logger.info('The length of search_space is {}, search_space is {}'.
|
||||
format(len(ofa_model._ofa_layers), ofa_model._ofa_layers))
|
||||
# set model state dict into ofa model
|
||||
utils.set_state_dict(ofa_model.model, param_state_dict)
|
||||
return ofa_model
|
||||
151
paddle_detection/ppdet/slim/prune.py
Normal file
151
paddle_detection/ppdet/slim/prune.py
Normal file
@@ -0,0 +1,151 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from paddle.utils import try_import
|
||||
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from ppdet.utils.logger import setup_logger
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
|
||||
def print_prune_params(model):
|
||||
model_dict = model.state_dict()
|
||||
for key in model_dict.keys():
|
||||
weight_name = model_dict[key].name
|
||||
logger.info('Parameter name: {}, shape: {}'.format(
|
||||
weight_name, model_dict[key].shape))
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class Pruner(object):
|
||||
def __init__(self,
|
||||
criterion,
|
||||
pruned_params,
|
||||
pruned_ratios,
|
||||
print_params=False):
|
||||
super(Pruner, self).__init__()
|
||||
assert criterion in ['l1_norm', 'fpgm'], \
|
||||
"unsupported prune criterion: {}".format(criterion)
|
||||
self.criterion = criterion
|
||||
self.pruned_params = pruned_params
|
||||
self.pruned_ratios = pruned_ratios
|
||||
self.print_params = print_params
|
||||
|
||||
def __call__(self, model):
|
||||
# FIXME: adapt to network graph when Training and inference are
|
||||
# inconsistent, now only supports prune inference network graph.
|
||||
model.eval()
|
||||
paddleslim = try_import('paddleslim')
|
||||
from paddleslim.analysis import dygraph_flops as flops
|
||||
input_spec = [{
|
||||
"image": paddle.ones(
|
||||
shape=[1, 3, 640, 640], dtype='float32'),
|
||||
"im_shape": paddle.full(
|
||||
[1, 2], 640, dtype='float32'),
|
||||
"scale_factor": paddle.ones(
|
||||
shape=[1, 2], dtype='float32')
|
||||
}]
|
||||
if self.print_params:
|
||||
print_prune_params(model)
|
||||
|
||||
ori_flops = flops(model, input_spec) / (1000**3)
|
||||
logger.info("FLOPs before pruning: {}GFLOPs".format(ori_flops))
|
||||
if self.criterion == 'fpgm':
|
||||
pruner = paddleslim.dygraph.FPGMFilterPruner(model, input_spec)
|
||||
elif self.criterion == 'l1_norm':
|
||||
pruner = paddleslim.dygraph.L1NormFilterPruner(model, input_spec)
|
||||
|
||||
logger.info("pruned params: {}".format(self.pruned_params))
|
||||
pruned_ratios = [float(n) for n in self.pruned_ratios]
|
||||
ratios = {}
|
||||
for i, param in enumerate(self.pruned_params):
|
||||
ratios[param] = pruned_ratios[i]
|
||||
pruner.prune_vars(ratios, [0])
|
||||
pruned_flops = flops(model, input_spec) / (1000**3)
|
||||
logger.info("FLOPs after pruning: {}GFLOPs; pruned ratio: {}".format(
|
||||
pruned_flops, (ori_flops - pruned_flops) / ori_flops))
|
||||
|
||||
return model
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class PrunerQAT(object):
|
||||
def __init__(self, criterion, pruned_params, pruned_ratios,
|
||||
print_prune_params, quant_config, print_qat_model):
|
||||
super(PrunerQAT, self).__init__()
|
||||
assert criterion in ['l1_norm', 'fpgm'], \
|
||||
"unsupported prune criterion: {}".format(criterion)
|
||||
# Pruner hyperparameter
|
||||
self.criterion = criterion
|
||||
self.pruned_params = pruned_params
|
||||
self.pruned_ratios = pruned_ratios
|
||||
self.print_prune_params = print_prune_params
|
||||
# QAT hyperparameter
|
||||
self.quant_config = quant_config
|
||||
self.print_qat_model = print_qat_model
|
||||
|
||||
def __call__(self, model):
|
||||
# FIXME: adapt to network graph when Training and inference are
|
||||
# inconsistent, now only supports prune inference network graph.
|
||||
model.eval()
|
||||
paddleslim = try_import('paddleslim')
|
||||
from paddleslim.analysis import dygraph_flops as flops
|
||||
input_spec = [{
|
||||
"image": paddle.ones(
|
||||
shape=[1, 3, 640, 640], dtype='float32'),
|
||||
"im_shape": paddle.full(
|
||||
[1, 2], 640, dtype='float32'),
|
||||
"scale_factor": paddle.ones(
|
||||
shape=[1, 2], dtype='float32')
|
||||
}]
|
||||
if self.print_prune_params:
|
||||
print_prune_params(model)
|
||||
|
||||
ori_flops = flops(model, input_spec) / 1000
|
||||
logger.info("FLOPs before pruning: {}GFLOPs".format(ori_flops))
|
||||
if self.criterion == 'fpgm':
|
||||
pruner = paddleslim.dygraph.FPGMFilterPruner(model, input_spec)
|
||||
elif self.criterion == 'l1_norm':
|
||||
pruner = paddleslim.dygraph.L1NormFilterPruner(model, input_spec)
|
||||
|
||||
logger.info("pruned params: {}".format(self.pruned_params))
|
||||
pruned_ratios = [float(n) for n in self.pruned_ratios]
|
||||
ratios = {}
|
||||
for i, param in enumerate(self.pruned_params):
|
||||
ratios[param] = pruned_ratios[i]
|
||||
pruner.prune_vars(ratios, [0])
|
||||
pruned_flops = flops(model, input_spec) / 1000
|
||||
logger.info("FLOPs after pruning: {}GFLOPs; pruned ratio: {}".format(
|
||||
pruned_flops, (ori_flops - pruned_flops) / ori_flops))
|
||||
|
||||
self.quanter = paddleslim.dygraph.quant.QAT(config=self.quant_config)
|
||||
|
||||
self.quanter.quantize(model)
|
||||
|
||||
if self.print_qat_model:
|
||||
logger.info("Quantized model:")
|
||||
logger.info(model)
|
||||
|
||||
return model
|
||||
|
||||
def save_quantized_model(self, layer, path, input_spec=None, **config):
|
||||
self.quanter.save_quantized_model(
|
||||
model=layer, path=path, input_spec=input_spec, **config)
|
||||
89
paddle_detection/ppdet/slim/quant.py
Normal file
89
paddle_detection/ppdet/slim/quant.py
Normal file
@@ -0,0 +1,89 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from paddle.utils import try_import
|
||||
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from ppdet.utils.logger import setup_logger
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class QAT(object):
|
||||
def __init__(self, quant_config, print_model):
|
||||
super(QAT, self).__init__()
|
||||
self.quant_config = quant_config
|
||||
self.print_model = print_model
|
||||
|
||||
def __call__(self, model):
|
||||
paddleslim = try_import('paddleslim')
|
||||
self.quanter = paddleslim.dygraph.quant.QAT(config=self.quant_config)
|
||||
if self.print_model:
|
||||
logger.info("Model before quant:")
|
||||
logger.info(model)
|
||||
|
||||
# For PP-YOLOE, convert model to deploy firstly.
|
||||
for layer in model.sublayers():
|
||||
if hasattr(layer, 'convert_to_deploy'):
|
||||
layer.convert_to_deploy()
|
||||
|
||||
self.quanter.quantize(model)
|
||||
|
||||
if self.print_model:
|
||||
logger.info("Quantized model:")
|
||||
logger.info(model)
|
||||
|
||||
return model
|
||||
|
||||
def save_quantized_model(self, layer, path, input_spec=None, **config):
|
||||
self.quanter.save_quantized_model(
|
||||
model=layer, path=path, input_spec=input_spec, **config)
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class PTQ(object):
|
||||
def __init__(self,
|
||||
ptq_config,
|
||||
quant_batch_num=10,
|
||||
output_dir='output_inference',
|
||||
fuse=True,
|
||||
fuse_list=None):
|
||||
super(PTQ, self).__init__()
|
||||
self.ptq_config = ptq_config
|
||||
self.quant_batch_num = quant_batch_num
|
||||
self.output_dir = output_dir
|
||||
self.fuse = fuse
|
||||
self.fuse_list = fuse_list
|
||||
|
||||
def __call__(self, model):
|
||||
paddleslim = try_import('paddleslim')
|
||||
self.ptq = paddleslim.PTQ(**self.ptq_config)
|
||||
model.eval()
|
||||
quant_model = self.ptq.quantize(
|
||||
model, fuse=self.fuse, fuse_list=self.fuse_list)
|
||||
|
||||
return quant_model
|
||||
|
||||
def save_quantized_model(self,
|
||||
quant_model,
|
||||
quantize_model_path,
|
||||
input_spec=None):
|
||||
self.ptq.save_quantized_model(quant_model, quantize_model_path,
|
||||
input_spec)
|
||||
66
paddle_detection/ppdet/slim/unstructured_prune.py
Normal file
66
paddle_detection/ppdet/slim/unstructured_prune.py
Normal file
@@ -0,0 +1,66 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from paddle.utils import try_import
|
||||
|
||||
from ppdet.core.workspace import register, serializable
|
||||
from ppdet.utils.logger import setup_logger
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
|
||||
@register
|
||||
@serializable
|
||||
class UnstructuredPruner(object):
|
||||
def __init__(self,
|
||||
stable_epochs,
|
||||
pruning_epochs,
|
||||
tunning_epochs,
|
||||
pruning_steps,
|
||||
ratio,
|
||||
initial_ratio,
|
||||
prune_params_type=None):
|
||||
self.stable_epochs = stable_epochs
|
||||
self.pruning_epochs = pruning_epochs
|
||||
self.tunning_epochs = tunning_epochs
|
||||
self.ratio = ratio
|
||||
self.prune_params_type = prune_params_type
|
||||
self.initial_ratio = initial_ratio
|
||||
self.pruning_steps = pruning_steps
|
||||
|
||||
def __call__(self, model, steps_per_epoch, skip_params_func=None):
|
||||
paddleslim = try_import('paddleslim')
|
||||
from paddleslim import GMPUnstructuredPruner
|
||||
configs = {
|
||||
'pruning_strategy': 'gmp',
|
||||
'stable_iterations': self.stable_epochs * steps_per_epoch,
|
||||
'pruning_iterations': self.pruning_epochs * steps_per_epoch,
|
||||
'tunning_iterations': self.tunning_epochs * steps_per_epoch,
|
||||
'resume_iteration': 0,
|
||||
'pruning_steps': self.pruning_steps,
|
||||
'initial_ratio': self.initial_ratio,
|
||||
}
|
||||
|
||||
pruner = GMPUnstructuredPruner(
|
||||
model,
|
||||
ratio=self.ratio,
|
||||
skip_params_func=skip_params_func,
|
||||
prune_params_type=self.prune_params_type,
|
||||
local_sparsity=True,
|
||||
configs=configs)
|
||||
|
||||
return pruner
|
||||
Reference in New Issue
Block a user