Newer
Older
import torch.distributed as dist
from mmcv.runner import DistEvalHook as _DistEvalHook
from mmcv.runner import EvalHook as _EvalHook
from torch.nn.modules.batchnorm import _BatchNorm
class EvalHook(_EvalHook):
"""Single GPU EvalHook, with efficient test support.
Args:
by_epoch (bool): Determine perform evaluation by epoch or by iteration.
If set to True, it will perform by epoch. Otherwise, by iteration.
Default: False.
efficient_test (bool): Whether save the results as local numpy files to
save CPU memory during evaluation. Default: False.
Returns:
list: The prediction results.
greater_keys = ['mIoU', 'mAcc', 'aAcc']
def __init__(self, *args, by_epoch=False, efficient_test=False, **kwargs):
super().__init__(*args, by_epoch=by_epoch, **kwargs)
self.efficient_test = efficient_test
def _do_evaluate(self, runner):
"""perform evaluation and save ckpt."""
if not self._should_evaluate(runner):
from mmseg.apis import single_gpu_test
results = single_gpu_test(runner.model, self.dataloader, show=False)
runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)
key_score = self.evaluate(runner, results)
if self.save_best:
self._save_ckpt(runner, key_score)
class DistEvalHook(_DistEvalHook):
"""Distributed EvalHook, with efficient test support.
Args:
by_epoch (bool): Determine perform evaluation by epoch or by iteration.
If set to True, it will perform by epoch. Otherwise, by iteration.
efficient_test (bool): Whether save the results as local numpy files to
save CPU memory during evaluation. Default: False.
Returns:
list: The prediction results.
greater_keys = ['mIoU', 'mAcc', 'aAcc']
def __init__(self, *args, by_epoch=False, efficient_test=False, **kwargs):
super().__init__(*args, by_epoch=by_epoch, **kwargs)
self.efficient_test = efficient_test
def _do_evaluate(self, runner):
"""perform evaluation and save ckpt."""
# Synchronization of BatchNorm's buffer (running_mean
# and running_var) is not supported in the DDP of pytorch,
# which may cause the inconsistent performance of models in
# different ranks, so we broadcast BatchNorm's buffers
# of rank 0 to other ranks to avoid this.
if self.broadcast_bn_buffer:
model = runner.model
for name, module in model.named_modules():
if isinstance(module,
_BatchNorm) and module.track_running_stats:
dist.broadcast(module.running_var, 0)
dist.broadcast(module.running_mean, 0)
if not self._should_evaluate(runner):
tmpdir = self.tmpdir
if tmpdir is None:
tmpdir = osp.join(runner.work_dir, '.eval_hook')
from mmseg.apis import multi_gpu_test
results = multi_gpu_test(
runner.model,
self.dataloader,
gpu_collect=self.gpu_collect)
if runner.rank == 0:
print('\n')