Source code for mmflow.datasets.dataset_wrappers
# Copyright (c) OpenMMLab. All rights reserved.
from logging import Logger
from typing import Any, Optional, Sequence, Union
from mmcv.utils import print_log
from torch.utils.data import Dataset
from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
from .builder import DATASETS
[docs]@DATASETS.register_module()
class ConcatDataset(_ConcatDataset):
"""A wrapper of concatenated dataset.
Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but
concat the group flag for image aspect ratio.
Args:
datasets (list[:obj:`Dataset`]): A list of datasets.
separate_eval (bool): Whether to evaluate the results
separately if it is used as validation dataset.
Defaults to True.
"""
def __init__(self,
datasets: Sequence[Dataset],
separate_eval: bool = True) -> None:
super(ConcatDataset, self).__init__(datasets)
self.separate_eval = separate_eval
if not separate_eval:
if len(set([type(ds) for ds in datasets])) != 1:
raise NotImplementedError(
'All the datasets should have same types')
[docs] def evaluate(self,
results: dict,
logger: Optional[Union[str, Logger]] = None,
**kwargs: Any):
"""Evaluate the results.
Args:
results (list[list | tuple]): Testing results of the dataset.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
Returns:
dict[str: float]: AP results of the total dataset or each separate
dataset if `self.separate_eval=True`.
"""
assert len(results) == self.cumulative_sizes[-1], \
('Dataset and results have different sizes: '
f'{self.cumulative_sizes[-1]} v.s. {len(results)}')
# Check whether all the datasets support evaluation
for dataset in self.datasets:
assert hasattr(dataset, 'evaluate'), \
f'{type(dataset)} does not implement evaluate function'
if self.separate_eval:
dataset_idx = -1
total_eval_results = dict()
for size, dataset in zip(self.cumulative_sizes, self.datasets):
start_idx = 0 if dataset_idx == -1 else \
self.cumulative_sizes[dataset_idx]
end_idx = self.cumulative_sizes[dataset_idx + 1]
results_per_dataset = results[start_idx:end_idx]
print_log(
f'\nEvaluateing {dataset.__class__.__name__} with '
f'{len(results_per_dataset)} images now',
logger=logger)
eval_results_per_dataset = dataset.evaluate(
results_per_dataset, logger=logger, **kwargs)
dataset_idx += 1
for k, v in eval_results_per_dataset.items():
total_eval_results.update({f'{dataset_idx}_{k}': v})
return total_eval_results
elif len(set([type(ds) for ds in self.datasets])) != 1:
raise NotImplementedError(
'All the datasets should have same types')
else:
original_data_infos = self.datasets[0].data_infos
self.datasets[0].data_infos = sum(
[dataset.data_infos for dataset in self.datasets], [])
eval_results = self.datasets[0].evaluate(
results, logger=logger, **kwargs)
self.datasets[0].data_infos = original_data_infos
return eval_results
[docs]@DATASETS.register_module()
class RepeatDataset:
"""A wrapper of repeated dataset.
The length of repeated dataset will be `times` larger than the original
dataset. This is useful when the data loading time is long but the dataset
is small. Using RepeatDataset can reduce the data loading time between
epochs.
Args:
dataset (:obj:`Dataset`): The dataset to be repeated.
times (int): Repeat times.
"""
def __init__(self, dataset, times):
self.dataset = dataset
self.times = times
self._ori_len = len(self.dataset)
def __getitem__(self, idx):
"""Get item from original dataset."""
return self.dataset[idx % self._ori_len]
def __len__(self):
"""The length is multiplied by ``times``"""
return self.times * self._ori_len