Shortcuts

Source code for mmflow.datasets.dataset_wrappers

# Copyright (c) OpenMMLab. All rights reserved.
from logging import Logger
from typing import Any, Optional, Sequence, Union

from mmcv.utils import print_log
from torch.utils.data import Dataset
from torch.utils.data.dataset import ConcatDataset as _ConcatDataset

from .builder import DATASETS


[docs]@DATASETS.register_module() class ConcatDataset(_ConcatDataset): """A wrapper of concatenated dataset. Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but concat the group flag for image aspect ratio. Args: datasets (list[:obj:`Dataset`]): A list of datasets. separate_eval (bool): Whether to evaluate the results separately if it is used as validation dataset. Defaults to True. """ def __init__(self, datasets: Sequence[Dataset], separate_eval: bool = True) -> None: super(ConcatDataset, self).__init__(datasets) self.separate_eval = separate_eval if not separate_eval: if len(set([type(ds) for ds in datasets])) != 1: raise NotImplementedError( 'All the datasets should have same types')
[docs] def evaluate(self, results: dict, logger: Optional[Union[str, Logger]] = None, **kwargs: Any): """Evaluate the results. Args: results (list[list | tuple]): Testing results of the dataset. logger (logging.Logger | str | None): Logger used for printing related information during evaluation. Default: None. Returns: dict[str: float]: AP results of the total dataset or each separate dataset if `self.separate_eval=True`. """ assert len(results) == self.cumulative_sizes[-1], \ ('Dataset and results have different sizes: ' f'{self.cumulative_sizes[-1]} v.s. {len(results)}') # Check whether all the datasets support evaluation for dataset in self.datasets: assert hasattr(dataset, 'evaluate'), \ f'{type(dataset)} does not implement evaluate function' if self.separate_eval: dataset_idx = -1 total_eval_results = dict() for size, dataset in zip(self.cumulative_sizes, self.datasets): start_idx = 0 if dataset_idx == -1 else \ self.cumulative_sizes[dataset_idx] end_idx = self.cumulative_sizes[dataset_idx + 1] results_per_dataset = results[start_idx:end_idx] print_log( f'\nEvaluateing {dataset.__class__.__name__} with ' f'{len(results_per_dataset)} images now', logger=logger) eval_results_per_dataset = dataset.evaluate( results_per_dataset, logger=logger, **kwargs) dataset_idx += 1 for k, v in eval_results_per_dataset.items(): total_eval_results.update({f'{dataset_idx}_{k}': v}) return total_eval_results elif len(set([type(ds) for ds in self.datasets])) != 1: raise NotImplementedError( 'All the datasets should have same types') else: original_data_infos = self.datasets[0].data_infos self.datasets[0].data_infos = sum( [dataset.data_infos for dataset in self.datasets], []) eval_results = self.datasets[0].evaluate( results, logger=logger, **kwargs) self.datasets[0].data_infos = original_data_infos return eval_results
[docs]@DATASETS.register_module() class RepeatDataset: """A wrapper of repeated dataset. The length of repeated dataset will be `times` larger than the original dataset. This is useful when the data loading time is long but the dataset is small. Using RepeatDataset can reduce the data loading time between epochs. Args: dataset (:obj:`Dataset`): The dataset to be repeated. times (int): Repeat times. """ def __init__(self, dataset, times): self.dataset = dataset self.times = times self._ori_len = len(self.dataset) def __getitem__(self, idx): """Get item from original dataset.""" return self.dataset[idx % self._ori_len] def __len__(self): """The length is multiplied by ``times``""" return self.times * self._ori_len
Read the Docs v: latest
Versions
latest
stable
Downloads
pdf
html
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.