更换文档检测模型
This commit is contained in:
693
paddle_detection/ppdet/engine/callbacks.py
Normal file
693
paddle_detection/ppdet/engine/callbacks.py
Normal file
@@ -0,0 +1,693 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
import datetime
|
||||
import six
|
||||
import copy
|
||||
import json
|
||||
|
||||
import paddle
|
||||
import paddle.distributed as dist
|
||||
|
||||
from ppdet.utils.checkpoint import save_model, save_semi_model
|
||||
from ppdet.metrics import get_infer_results
|
||||
|
||||
from ppdet.utils.logger import setup_logger
|
||||
logger = setup_logger('ppdet.engine')
|
||||
|
||||
__all__ = [
|
||||
'Callback', 'ComposeCallback', 'LogPrinter', 'Checkpointer',
|
||||
'VisualDLWriter', 'SniperProposalsGenerator'
|
||||
]
|
||||
|
||||
|
||||
class Callback(object):
|
||||
def __init__(self, model):
|
||||
self.model = model
|
||||
|
||||
def on_step_begin(self, status):
|
||||
pass
|
||||
|
||||
def on_step_end(self, status):
|
||||
pass
|
||||
|
||||
def on_epoch_begin(self, status):
|
||||
pass
|
||||
|
||||
def on_epoch_end(self, status):
|
||||
pass
|
||||
|
||||
def on_train_begin(self, status):
|
||||
pass
|
||||
|
||||
def on_train_end(self, status):
|
||||
pass
|
||||
|
||||
|
||||
class ComposeCallback(object):
|
||||
def __init__(self, callbacks):
|
||||
callbacks = [c for c in list(callbacks) if c is not None]
|
||||
for c in callbacks:
|
||||
assert isinstance(
|
||||
c, Callback), "callback should be subclass of Callback"
|
||||
self._callbacks = callbacks
|
||||
|
||||
def on_step_begin(self, status):
|
||||
for c in self._callbacks:
|
||||
c.on_step_begin(status)
|
||||
|
||||
def on_step_end(self, status):
|
||||
for c in self._callbacks:
|
||||
c.on_step_end(status)
|
||||
|
||||
def on_epoch_begin(self, status):
|
||||
for c in self._callbacks:
|
||||
c.on_epoch_begin(status)
|
||||
|
||||
def on_epoch_end(self, status):
|
||||
for c in self._callbacks:
|
||||
c.on_epoch_end(status)
|
||||
|
||||
def on_train_begin(self, status):
|
||||
for c in self._callbacks:
|
||||
c.on_train_begin(status)
|
||||
|
||||
def on_train_end(self, status):
|
||||
for c in self._callbacks:
|
||||
c.on_train_end(status)
|
||||
|
||||
|
||||
class LogPrinter(Callback):
|
||||
def __init__(self, model):
|
||||
super(LogPrinter, self).__init__(model)
|
||||
|
||||
def on_step_end(self, status):
|
||||
if dist.get_world_size() < 2 or dist.get_rank() == 0:
|
||||
mode = status['mode']
|
||||
if mode == 'train':
|
||||
epoch_id = status['epoch_id']
|
||||
step_id = status['step_id']
|
||||
steps_per_epoch = status['steps_per_epoch']
|
||||
training_staus = status['training_staus']
|
||||
batch_time = status['batch_time']
|
||||
data_time = status['data_time']
|
||||
|
||||
epoches = self.model.cfg.epoch
|
||||
batch_size = self.model.cfg['{}Reader'.format(mode.capitalize(
|
||||
))]['batch_size']
|
||||
|
||||
logs = training_staus.log()
|
||||
space_fmt = ':' + str(len(str(steps_per_epoch))) + 'd'
|
||||
if step_id % self.model.cfg.log_iter == 0:
|
||||
eta_steps = (epoches - epoch_id) * steps_per_epoch - step_id
|
||||
eta_sec = eta_steps * batch_time.global_avg
|
||||
eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
|
||||
ips = float(batch_size) / batch_time.avg
|
||||
fmt = ' '.join([
|
||||
'Epoch: [{}]',
|
||||
'[{' + space_fmt + '}/{}]',
|
||||
'learning_rate: {lr:.6f}',
|
||||
'{meters}',
|
||||
'eta: {eta}',
|
||||
'batch_cost: {btime}',
|
||||
'data_cost: {dtime}',
|
||||
'ips: {ips:.4f} images/s',
|
||||
])
|
||||
fmt = fmt.format(
|
||||
epoch_id,
|
||||
step_id,
|
||||
steps_per_epoch,
|
||||
lr=status['learning_rate'],
|
||||
meters=logs,
|
||||
eta=eta_str,
|
||||
btime=str(batch_time),
|
||||
dtime=str(data_time),
|
||||
ips=ips)
|
||||
logger.info(fmt)
|
||||
if mode == 'eval':
|
||||
step_id = status['step_id']
|
||||
if step_id % 100 == 0:
|
||||
logger.info("Eval iter: {}".format(step_id))
|
||||
|
||||
def on_epoch_end(self, status):
|
||||
if dist.get_world_size() < 2 or dist.get_rank() == 0:
|
||||
mode = status['mode']
|
||||
if mode == 'eval':
|
||||
sample_num = status['sample_num']
|
||||
cost_time = status['cost_time']
|
||||
logger.info('Total sample number: {}, average FPS: {}'.format(
|
||||
sample_num, sample_num / cost_time))
|
||||
|
||||
|
||||
class Checkpointer(Callback):
|
||||
def __init__(self, model):
|
||||
super(Checkpointer, self).__init__(model)
|
||||
self.best_ap = -1000.
|
||||
self.save_dir = self.model.cfg.save_dir
|
||||
if hasattr(self.model.model, 'student_model'):
|
||||
self.weight = self.model.model.student_model
|
||||
else:
|
||||
self.weight = self.model.model
|
||||
|
||||
def on_epoch_end(self, status):
|
||||
# Checkpointer only performed during training
|
||||
mode = status['mode']
|
||||
epoch_id = status['epoch_id']
|
||||
weight = None
|
||||
save_name = None
|
||||
if dist.get_world_size() < 2 or dist.get_rank() == 0:
|
||||
if mode == 'train':
|
||||
end_epoch = self.model.cfg.epoch
|
||||
if (
|
||||
epoch_id + 1
|
||||
) % self.model.cfg.snapshot_epoch == 0 or epoch_id == end_epoch - 1:
|
||||
save_name = str(
|
||||
epoch_id) if epoch_id != end_epoch - 1 else "model_final"
|
||||
weight = self.weight.state_dict()
|
||||
elif mode == 'eval':
|
||||
if 'save_best_model' in status and status['save_best_model']:
|
||||
for metric in self.model._metrics:
|
||||
map_res = metric.get_results()
|
||||
eval_func = "ap"
|
||||
if 'pose3d' in map_res:
|
||||
key = 'pose3d'
|
||||
eval_func = "mpjpe"
|
||||
elif 'bbox' in map_res:
|
||||
key = 'bbox'
|
||||
elif 'keypoint' in map_res:
|
||||
key = 'keypoint'
|
||||
else:
|
||||
key = 'mask'
|
||||
if key not in map_res:
|
||||
logger.warning("Evaluation results empty, this may be due to " \
|
||||
"training iterations being too few or not " \
|
||||
"loading the correct weights.")
|
||||
return
|
||||
if map_res[key][0] >= self.best_ap:
|
||||
self.best_ap = map_res[key][0]
|
||||
save_name = 'best_model'
|
||||
weight = self.weight.state_dict()
|
||||
logger.info("Best test {} {} is {:0.3f}.".format(
|
||||
key, eval_func, abs(self.best_ap)))
|
||||
if weight:
|
||||
if self.model.use_ema:
|
||||
exchange_save_model = status.get('exchange_save_model',
|
||||
False)
|
||||
if not exchange_save_model:
|
||||
# save model and ema_model
|
||||
save_model(
|
||||
status['weight'],
|
||||
self.model.optimizer,
|
||||
self.save_dir,
|
||||
save_name,
|
||||
epoch_id + 1,
|
||||
ema_model=weight)
|
||||
else:
|
||||
# save model(student model) and ema_model(teacher model)
|
||||
# in DenseTeacher SSOD, the teacher model will be higher,
|
||||
# so exchange when saving pdparams
|
||||
student_model = status['weight'] # model
|
||||
teacher_model = weight # ema_model
|
||||
save_model(
|
||||
teacher_model,
|
||||
self.model.optimizer,
|
||||
self.save_dir,
|
||||
save_name,
|
||||
epoch_id + 1,
|
||||
ema_model=student_model)
|
||||
del teacher_model
|
||||
del student_model
|
||||
else:
|
||||
save_model(weight, self.model.optimizer, self.save_dir,
|
||||
save_name, epoch_id + 1)
|
||||
|
||||
|
||||
class WiferFaceEval(Callback):
|
||||
def __init__(self, model):
|
||||
super(WiferFaceEval, self).__init__(model)
|
||||
|
||||
def on_epoch_begin(self, status):
|
||||
assert self.model.mode == 'eval', \
|
||||
"WiferFaceEval can only be set during evaluation"
|
||||
for metric in self.model._metrics:
|
||||
metric.update(self.model.model)
|
||||
sys.exit()
|
||||
|
||||
|
||||
class VisualDLWriter(Callback):
|
||||
"""
|
||||
Use VisualDL to log data or image
|
||||
"""
|
||||
|
||||
def __init__(self, model):
|
||||
super(VisualDLWriter, self).__init__(model)
|
||||
|
||||
assert six.PY3, "VisualDL requires Python >= 3.5"
|
||||
try:
|
||||
from visualdl import LogWriter
|
||||
except Exception as e:
|
||||
logger.error('visualdl not found, plaese install visualdl. '
|
||||
'for example: `pip install visualdl`.')
|
||||
raise e
|
||||
self.vdl_writer = LogWriter(
|
||||
model.cfg.get('vdl_log_dir', 'vdl_log_dir/scalar'))
|
||||
self.vdl_loss_step = 0
|
||||
self.vdl_mAP_step = 0
|
||||
self.vdl_image_step = 0
|
||||
self.vdl_image_frame = 0
|
||||
|
||||
def on_step_end(self, status):
|
||||
mode = status['mode']
|
||||
if dist.get_world_size() < 2 or dist.get_rank() == 0:
|
||||
if mode == 'train':
|
||||
training_staus = status['training_staus']
|
||||
for loss_name, loss_value in training_staus.get().items():
|
||||
self.vdl_writer.add_scalar(loss_name, loss_value,
|
||||
self.vdl_loss_step)
|
||||
self.vdl_loss_step += 1
|
||||
elif mode == 'test':
|
||||
ori_image = status['original_image']
|
||||
result_image = status['result_image']
|
||||
self.vdl_writer.add_image(
|
||||
"original/frame_{}".format(self.vdl_image_frame), ori_image,
|
||||
self.vdl_image_step)
|
||||
self.vdl_writer.add_image(
|
||||
"result/frame_{}".format(self.vdl_image_frame),
|
||||
result_image, self.vdl_image_step)
|
||||
self.vdl_image_step += 1
|
||||
# each frame can display ten pictures at most.
|
||||
if self.vdl_image_step % 10 == 0:
|
||||
self.vdl_image_step = 0
|
||||
self.vdl_image_frame += 1
|
||||
|
||||
def on_epoch_end(self, status):
|
||||
mode = status['mode']
|
||||
if dist.get_world_size() < 2 or dist.get_rank() == 0:
|
||||
if mode == 'eval':
|
||||
for metric in self.model._metrics:
|
||||
for key, map_value in metric.get_results().items():
|
||||
self.vdl_writer.add_scalar("{}-mAP".format(key),
|
||||
map_value[0],
|
||||
self.vdl_mAP_step)
|
||||
self.vdl_mAP_step += 1
|
||||
|
||||
|
||||
class WandbCallback(Callback):
|
||||
def __init__(self, model):
|
||||
super(WandbCallback, self).__init__(model)
|
||||
|
||||
try:
|
||||
import wandb
|
||||
self.wandb = wandb
|
||||
except Exception as e:
|
||||
logger.error('wandb not found, please install wandb. '
|
||||
'Use: `pip install wandb`.')
|
||||
raise e
|
||||
|
||||
self.wandb_params = model.cfg.get('wandb', None)
|
||||
self.save_dir = self.model.cfg.save_dir
|
||||
if self.wandb_params is None:
|
||||
self.wandb_params = {}
|
||||
for k, v in model.cfg.items():
|
||||
if k.startswith("wandb_"):
|
||||
self.wandb_params.update({k.lstrip("wandb_"): v})
|
||||
|
||||
self._run = None
|
||||
if dist.get_world_size() < 2 or dist.get_rank() == 0:
|
||||
_ = self.run
|
||||
self.run.config.update(self.model.cfg)
|
||||
self.run.define_metric("epoch")
|
||||
self.run.define_metric("eval/*", step_metric="epoch")
|
||||
|
||||
self.best_ap = -1000.
|
||||
self.fps = []
|
||||
|
||||
@property
|
||||
def run(self):
|
||||
if self._run is None:
|
||||
if self.wandb.run is not None:
|
||||
logger.info(
|
||||
"There is an ongoing wandb run which will be used"
|
||||
"for logging. Please use `wandb.finish()` to end that"
|
||||
"if the behaviour is not intended")
|
||||
self._run = self.wandb.run
|
||||
else:
|
||||
self._run = self.wandb.init(**self.wandb_params)
|
||||
return self._run
|
||||
|
||||
def save_model(self,
|
||||
optimizer,
|
||||
save_dir,
|
||||
save_name,
|
||||
last_epoch,
|
||||
ema_model=None,
|
||||
ap=None,
|
||||
fps=None,
|
||||
tags=None):
|
||||
if dist.get_world_size() < 2 or dist.get_rank() == 0:
|
||||
model_path = os.path.join(save_dir, save_name)
|
||||
metadata = {}
|
||||
metadata["last_epoch"] = last_epoch
|
||||
if ap:
|
||||
metadata["ap"] = ap
|
||||
|
||||
if fps:
|
||||
metadata["fps"] = fps
|
||||
|
||||
if ema_model is None:
|
||||
ema_artifact = self.wandb.Artifact(
|
||||
name="ema_model-{}".format(self.run.id),
|
||||
type="model",
|
||||
metadata=metadata)
|
||||
model_artifact = self.wandb.Artifact(
|
||||
name="model-{}".format(self.run.id),
|
||||
type="model",
|
||||
metadata=metadata)
|
||||
|
||||
ema_artifact.add_file(model_path + ".pdema", name="model_ema")
|
||||
model_artifact.add_file(model_path + ".pdparams", name="model")
|
||||
|
||||
self.run.log_artifact(ema_artifact, aliases=tags)
|
||||
self.run.log_artfact(model_artifact, aliases=tags)
|
||||
else:
|
||||
model_artifact = self.wandb.Artifact(
|
||||
name="model-{}".format(self.run.id),
|
||||
type="model",
|
||||
metadata=metadata)
|
||||
model_artifact.add_file(model_path + ".pdparams", name="model")
|
||||
self.run.log_artifact(model_artifact, aliases=tags)
|
||||
|
||||
def on_step_end(self, status):
|
||||
|
||||
mode = status['mode']
|
||||
if dist.get_world_size() < 2 or dist.get_rank() == 0:
|
||||
if mode == 'train':
|
||||
training_status = status['training_staus'].get()
|
||||
for k, v in training_status.items():
|
||||
training_status[k] = float(v)
|
||||
|
||||
# calculate ips, data_cost, batch_cost
|
||||
batch_time = status['batch_time']
|
||||
data_time = status['data_time']
|
||||
batch_size = self.model.cfg['{}Reader'.format(mode.capitalize(
|
||||
))]['batch_size']
|
||||
|
||||
ips = float(batch_size) / float(batch_time.avg)
|
||||
data_cost = float(data_time.avg)
|
||||
batch_cost = float(batch_time.avg)
|
||||
|
||||
metrics = {"train/" + k: v for k, v in training_status.items()}
|
||||
|
||||
metrics["train/ips"] = ips
|
||||
metrics["train/data_cost"] = data_cost
|
||||
metrics["train/batch_cost"] = batch_cost
|
||||
|
||||
self.fps.append(ips)
|
||||
self.run.log(metrics)
|
||||
|
||||
def on_epoch_end(self, status):
|
||||
mode = status['mode']
|
||||
epoch_id = status['epoch_id']
|
||||
save_name = None
|
||||
if dist.get_world_size() < 2 or dist.get_rank() == 0:
|
||||
if mode == 'train':
|
||||
fps = sum(self.fps) / len(self.fps)
|
||||
self.fps = []
|
||||
|
||||
end_epoch = self.model.cfg.epoch
|
||||
if (
|
||||
epoch_id + 1
|
||||
) % self.model.cfg.snapshot_epoch == 0 or epoch_id == end_epoch - 1:
|
||||
save_name = str(
|
||||
epoch_id) if epoch_id != end_epoch - 1 else "model_final"
|
||||
tags = ["latest", "epoch_{}".format(epoch_id)]
|
||||
self.save_model(
|
||||
self.model.optimizer,
|
||||
self.save_dir,
|
||||
save_name,
|
||||
epoch_id + 1,
|
||||
self.model.use_ema,
|
||||
fps=fps,
|
||||
tags=tags)
|
||||
if mode == 'eval':
|
||||
sample_num = status['sample_num']
|
||||
cost_time = status['cost_time']
|
||||
|
||||
fps = sample_num / cost_time
|
||||
|
||||
merged_dict = {}
|
||||
for metric in self.model._metrics:
|
||||
for key, map_value in metric.get_results().items():
|
||||
merged_dict["eval/{}-mAP".format(key)] = map_value[0]
|
||||
merged_dict["epoch"] = status["epoch_id"]
|
||||
merged_dict["eval/fps"] = sample_num / cost_time
|
||||
|
||||
self.run.log(merged_dict)
|
||||
|
||||
if 'save_best_model' in status and status['save_best_model']:
|
||||
for metric in self.model._metrics:
|
||||
map_res = metric.get_results()
|
||||
if 'pose3d' in map_res:
|
||||
key = 'pose3d'
|
||||
elif 'bbox' in map_res:
|
||||
key = 'bbox'
|
||||
elif 'keypoint' in map_res:
|
||||
key = 'keypoint'
|
||||
else:
|
||||
key = 'mask'
|
||||
if key not in map_res:
|
||||
logger.warning("Evaluation results empty, this may be due to " \
|
||||
"training iterations being too few or not " \
|
||||
"loading the correct weights.")
|
||||
return
|
||||
if map_res[key][0] >= self.best_ap:
|
||||
self.best_ap = map_res[key][0]
|
||||
save_name = 'best_model'
|
||||
tags = ["best", "epoch_{}".format(epoch_id)]
|
||||
|
||||
self.save_model(
|
||||
self.model.optimizer,
|
||||
self.save_dir,
|
||||
save_name,
|
||||
last_epoch=epoch_id + 1,
|
||||
ema_model=self.model.use_ema,
|
||||
ap=abs(self.best_ap),
|
||||
fps=fps,
|
||||
tags=tags)
|
||||
|
||||
def on_train_end(self, status):
|
||||
self.run.finish()
|
||||
|
||||
|
||||
class SniperProposalsGenerator(Callback):
|
||||
def __init__(self, model):
|
||||
super(SniperProposalsGenerator, self).__init__(model)
|
||||
ori_dataset = self.model.dataset
|
||||
self.dataset = self._create_new_dataset(ori_dataset)
|
||||
self.loader = self.model.loader
|
||||
self.cfg = self.model.cfg
|
||||
self.infer_model = self.model.model
|
||||
|
||||
def _create_new_dataset(self, ori_dataset):
|
||||
dataset = copy.deepcopy(ori_dataset)
|
||||
# init anno_cropper
|
||||
dataset.init_anno_cropper()
|
||||
# generate infer roidbs
|
||||
ori_roidbs = dataset.get_ori_roidbs()
|
||||
roidbs = dataset.anno_cropper.crop_infer_anno_records(ori_roidbs)
|
||||
# set new roidbs
|
||||
dataset.set_roidbs(roidbs)
|
||||
|
||||
return dataset
|
||||
|
||||
def _eval_with_loader(self, loader):
|
||||
results = []
|
||||
with paddle.no_grad():
|
||||
self.infer_model.eval()
|
||||
for step_id, data in enumerate(loader):
|
||||
outs = self.infer_model(data)
|
||||
for key in ['im_shape', 'scale_factor', 'im_id']:
|
||||
outs[key] = data[key]
|
||||
for key, value in outs.items():
|
||||
if hasattr(value, 'numpy'):
|
||||
outs[key] = value.numpy()
|
||||
|
||||
results.append(outs)
|
||||
|
||||
return results
|
||||
|
||||
def on_train_end(self, status):
|
||||
self.loader.dataset = self.dataset
|
||||
results = self._eval_with_loader(self.loader)
|
||||
results = self.dataset.anno_cropper.aggregate_chips_detections(results)
|
||||
# sniper
|
||||
proposals = []
|
||||
clsid2catid = {v: k for k, v in self.dataset.catid2clsid.items()}
|
||||
for outs in results:
|
||||
batch_res = get_infer_results(outs, clsid2catid)
|
||||
start = 0
|
||||
for i, im_id in enumerate(outs['im_id']):
|
||||
bbox_num = outs['bbox_num']
|
||||
end = start + bbox_num[i]
|
||||
bbox_res = batch_res['bbox'][start:end] \
|
||||
if 'bbox' in batch_res else None
|
||||
if bbox_res:
|
||||
proposals += bbox_res
|
||||
logger.info("save proposals in {}".format(self.cfg.proposals_path))
|
||||
with open(self.cfg.proposals_path, 'w') as f:
|
||||
json.dump(proposals, f)
|
||||
|
||||
|
||||
class SemiLogPrinter(LogPrinter):
|
||||
def __init__(self, model):
|
||||
super(SemiLogPrinter, self).__init__(model)
|
||||
|
||||
def on_step_end(self, status):
|
||||
if dist.get_world_size() < 2 or dist.get_rank() == 0:
|
||||
mode = status['mode']
|
||||
if mode == 'train':
|
||||
epoch_id = status['epoch_id']
|
||||
step_id = status['step_id']
|
||||
iter_id = status['iter_id']
|
||||
steps_per_epoch = status['steps_per_epoch']
|
||||
training_staus = status['training_staus']
|
||||
batch_time = status['batch_time']
|
||||
data_time = status['data_time']
|
||||
|
||||
epoches = self.model.cfg.epoch
|
||||
batch_size = self.model.cfg['{}Reader'.format(mode.capitalize(
|
||||
))]['batch_size']
|
||||
iters = epoches * steps_per_epoch
|
||||
logs = training_staus.log()
|
||||
iter_space_fmt = ':' + str(len(str(iters))) + 'd'
|
||||
space_fmt = ':' + str(len(str(iters))) + 'd'
|
||||
if step_id % self.model.cfg.log_iter == 0:
|
||||
eta_steps = (epoches - epoch_id) * steps_per_epoch - step_id
|
||||
eta_sec = eta_steps * batch_time.global_avg
|
||||
eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
|
||||
ips = float(batch_size) / batch_time.avg
|
||||
fmt = ' '.join([
|
||||
'{' + iter_space_fmt + '}/{} iters',
|
||||
'Epoch: [{}]',
|
||||
'[{' + space_fmt + '}/{}]',
|
||||
'learning_rate: {lr:.6f}',
|
||||
'{meters}',
|
||||
'eta: {eta}',
|
||||
'batch_cost: {btime}',
|
||||
'data_cost: {dtime}',
|
||||
'ips: {ips:.4f} images/s',
|
||||
])
|
||||
fmt = fmt.format(
|
||||
iter_id,
|
||||
iters,
|
||||
epoch_id,
|
||||
step_id,
|
||||
steps_per_epoch,
|
||||
lr=status['learning_rate'],
|
||||
meters=logs,
|
||||
eta=eta_str,
|
||||
btime=str(batch_time),
|
||||
dtime=str(data_time),
|
||||
ips=ips)
|
||||
logger.info(fmt)
|
||||
if mode == 'eval':
|
||||
step_id = status['step_id']
|
||||
if step_id % 100 == 0:
|
||||
logger.info("Eval iter: {}".format(step_id))
|
||||
|
||||
|
||||
class SemiCheckpointer(Checkpointer):
|
||||
def __init__(self, model):
|
||||
super(SemiCheckpointer, self).__init__(model)
|
||||
cfg = self.model.cfg
|
||||
self.best_ap = 0.
|
||||
self.save_dir = os.path.join(self.model.cfg.save_dir,
|
||||
self.model.cfg.filename)
|
||||
if hasattr(self.model.model, 'student') and hasattr(self.model.model,
|
||||
'teacher'):
|
||||
self.weight = (self.model.model.teacher, self.model.model.student)
|
||||
elif hasattr(self.model.model, 'student') or hasattr(self.model.model,
|
||||
'teacher'):
|
||||
raise AttributeError(
|
||||
"model has no attribute 'student' or 'teacher'")
|
||||
else:
|
||||
raise AttributeError(
|
||||
"model has no attribute 'student' and 'teacher'")
|
||||
|
||||
def every_n_iters(self, iter_id, n):
|
||||
return (iter_id + 1) % n == 0 if n > 0 else False
|
||||
|
||||
def on_step_end(self, status):
|
||||
# Checkpointer only performed during training
|
||||
mode = status['mode']
|
||||
eval_interval = status['eval_interval']
|
||||
save_interval = status['save_interval']
|
||||
iter_id = status['iter_id']
|
||||
epoch_id = status['epoch_id']
|
||||
t_weight = None
|
||||
s_weight = None
|
||||
save_name = None
|
||||
if dist.get_world_size() < 2 or dist.get_rank() == 0:
|
||||
if self.every_n_iters(iter_id, save_interval) and mode == 'train':
|
||||
save_name = "last_epoch"
|
||||
# save_name = str(iter_id + 1)
|
||||
t_weight = self.weight[0].state_dict()
|
||||
s_weight = self.weight[1].state_dict()
|
||||
save_semi_model(t_weight, s_weight, self.model.optimizer,
|
||||
self.save_dir, save_name, epoch_id + 1,
|
||||
iter_id + 1)
|
||||
|
||||
def on_epoch_end(self, status):
|
||||
# Checkpointer only performed during training
|
||||
mode = status['mode']
|
||||
eval_interval = status['eval_interval']
|
||||
save_interval = status['save_interval']
|
||||
iter_id = status['iter_id']
|
||||
epoch_id = status['epoch_id']
|
||||
t_weight = None
|
||||
s_weight = None
|
||||
save_name = None
|
||||
if dist.get_world_size() < 2 or dist.get_rank() == 0:
|
||||
if self.every_n_iters(iter_id, eval_interval) and mode == 'eval':
|
||||
if 'save_best_model' in status and status['save_best_model']:
|
||||
for metric in self.model._metrics:
|
||||
map_res = metric.get_results()
|
||||
if 'bbox' in map_res:
|
||||
key = 'bbox'
|
||||
elif 'keypoint' in map_res:
|
||||
key = 'keypoint'
|
||||
else:
|
||||
key = 'mask'
|
||||
if key not in map_res:
|
||||
logger.warning("Evaluation results empty, this may be due to " \
|
||||
"training iterations being too few or not " \
|
||||
"loading the correct weights.")
|
||||
return
|
||||
if map_res[key][0] > self.best_ap:
|
||||
self.best_ap = map_res[key][0]
|
||||
save_name = 'best_model'
|
||||
t_weight = self.weight[0].state_dict()
|
||||
s_weight = self.weight[1].state_dict()
|
||||
logger.info("Best teacher test {} ap is {:0.3f}.".
|
||||
format(key, self.best_ap))
|
||||
if t_weight and s_weight:
|
||||
save_semi_model(t_weight, s_weight,
|
||||
self.model.optimizer, self.save_dir,
|
||||
save_name, epoch_id + 1, iter_id + 1)
|
||||
Reference in New Issue
Block a user