5268393053

################################################################################
# Copyright (c) 2021 ContinualAI.                                              #
# Copyrights licensed under the MIT License.                                   #
# See the accompanying LICENSE file for terms.                                 #
#                                                                              #
# Date: 12-05-2020                                                             #
# Author(s): Lorenzo Pellegrini                                                #
# E-mail: contact@continualai.org                                              #
# Website: avalanche.continualai.org                                           #
################################################################################

""" Common benchmarks/environments utils. """

from collections import OrderedDict, defaultdict, deque
from typing import (
    TYPE_CHECKING,
    Any,
    Callable,
    Generic,
    Iterator,
    List,
    Iterable,
    Mapping,
    Optional,
    Sequence,
    TypeVar,
    Union,
    Dict,
    SupportsInt,
)
import warnings
import numpy as np

import torch
from torch import Tensor
from torch.utils.data import Subset, ConcatDataset, TensorDataset

from avalanche.benchmarks.utils.data import AvalancheDataset
from avalanche.benchmarks.utils.data_attribute import DataAttribute
from avalanche.benchmarks.utils.dataset_definitions import (
    ISupportedClassificationDataset,
)
from avalanche.benchmarks.utils.dataset_utils import (
    SubSequence,
    find_list_from_index,
)
from avalanche.benchmarks.utils.flat_data import ConstantSequence
from avalanche.benchmarks.utils.transform_groups import (
    TransformGroupDef,
    TransformGroups,
    XTransform,
    YTransform
)

if TYPE_CHECKING:
    from avalanche.benchmarks.utils.classification_dataset import (
        ClassificationDataset
    )

T_co = TypeVar("T_co", covariant=True)
TAvalancheDataset = TypeVar("TAvalancheDataset", bound="AvalancheDataset")


def tensor_as_list(sequence) -> List:
    # Numpy: list(np.array([1, 2, 3])) returns [1, 2, 3]
    # whereas: list(torch.tensor([1, 2, 3])) returns ->
    # -> [tensor(1), tensor(2), tensor(3)]
    #
    # This is why we have to handle Tensor in a different way
    if isinstance(sequence, list):
        return sequence
    if not isinstance(sequence, Iterable):
        return [sequence]
    if isinstance(sequence, Tensor):
        return sequence.tolist()
    return list(sequence)


def _indexes_grouped_by_classes(
    targets: Sequence[int],
    patterns_indexes: Union[None, Sequence[int]],
    sort_indexes: bool = True,
    sort_classes: bool = True,
) -> Union[List[int], None]:
    result_per_class: Dict[int, List[int]] = OrderedDict()
    result: List[int] = []

    indexes_was_none = patterns_indexes is None

    if patterns_indexes is not None:
        patterns_indexes = tensor_as_list(patterns_indexes)
    else:
        patterns_indexes = list(range(len(targets)))

    targets = tensor_as_list(targets)

    # Consider that result_per_class is an OrderedDict
    # This means that, if sort_classes is True, the next for statement
    # will initialize "result_per_class" in sorted order which in turn means
    # that patterns will be ordered by ascending class ID.
    classes = torch.unique(
        torch.as_tensor(targets), sorted=sort_classes
    ).tolist()

    for class_id in classes:
        result_per_class[class_id] = []

    # Stores each pattern index in the appropriate class list
    for idx in patterns_indexes:
        result_per_class[targets[idx]].append(idx)

    # Concatenate all the pattern indexes
    for class_id in classes:
        if sort_indexes:
            result_per_class[class_id].sort()
        result.extend(result_per_class[class_id])

    if result == patterns_indexes and indexes_was_none:
        # Result is [0, 1, 2, ..., N] and patterns_indexes was originally None
        # This means that the user tried to obtain a full Dataset
        # (indexes_was_none) only ordered according to the sort_indexes and
        # sort_classes parameters. However, sort_indexes+sort_classes returned
        # the plain pattern sequence as it already is. So the original Dataset
        # already satisfies the sort_indexes+sort_classes constraints.
        # By returning None, we communicate that the Dataset can be taken as-is.
        return None

    return result


def grouped_and_ordered_indexes(
    targets: Sequence[int],
    patterns_indexes: Union[None, Sequence[int]],
    bucket_classes: bool = True,
    sort_classes: bool = False,
    sort_indexes: bool = False,
) -> Union[List[int], None]:
    """
    Given the targets list of a dataset and the patterns to include, returns the
    pattern indexes sorted according to the ``bucket_classes``,
    ``sort_classes`` and ``sort_indexes`` parameters.

    :param targets: The list of pattern targets, as a list.
    :param patterns_indexes: A list of pattern indexes to include in the set.
        If None, all patterns will be included.
    :param bucket_classes: If True, pattern indexes will be returned so that
        patterns will be grouped by class. Defaults to True.
    :param sort_classes: If both ``bucket_classes`` and ``sort_classes`` are
        True, class groups will be sorted by class index. Ignored if
        ``bucket_classes`` is False. Defaults to False.
    :param sort_indexes: If True, patterns indexes will be sorted. When
        bucketing by class, patterns will be sorted inside their buckets.
        Defaults to False.

    :returns: The list of pattern indexes sorted according to the
        ``bucket_classes``, ``sort_classes`` and ``sort_indexes`` parameters or
        None if the patterns_indexes is None and the whole dataset can be taken
        using the existing patterns order.
    """
    if bucket_classes:
        return _indexes_grouped_by_classes(
            targets,
            patterns_indexes,
            sort_indexes=sort_indexes,
            sort_classes=sort_classes,
        )

    if patterns_indexes is None:
        # No grouping and sub-set creation required... just return None
        return None
    if not sort_indexes:
        # No sorting required, just return patterns_indexes
        return tensor_as_list(patterns_indexes)

    # We are here only because patterns_indexes != None and sort_indexes is True
    patterns_indexes = tensor_as_list(patterns_indexes)
    result = list(patterns_indexes)  # Make sure we're working on a copy
    result.sort()
    return result


def as_avalanche_dataset(
    dataset: ISupportedClassificationDataset[T_co],
) -> AvalancheDataset:
    if isinstance(dataset, AvalancheDataset):
        return dataset
    return AvalancheDataset([dataset])


def as_classification_dataset(
    dataset: ISupportedClassificationDataset[T_co],
) -> 'ClassificationDataset':
    from avalanche.benchmarks.utils.classification_dataset import (
        ClassificationDataset
    )

    if isinstance(dataset, ClassificationDataset):
        return dataset
    return ClassificationDataset([dataset])


def _count_unique(*sequences: Sequence[SupportsInt]):
    uniques = set()

    for seq in sequences:
        for x in seq:
            uniques.add(int(x))

    return len(uniques)


def concat_datasets(datasets):
    """Concatenates a list of datasets."""
    if len(datasets) == 0:
        return AvalancheDataset([])
    res = datasets[0]
    if not isinstance(res, AvalancheDataset):
        res = AvalancheDataset([res])

    for d in datasets[1:]:
        if not isinstance(d, AvalancheDataset):
            d = AvalancheDataset([d])
        res = res.concat(d)
    return res


def find_common_transforms_group(
        datasets: Iterable[Any], 
        default_group: str = "train") -> str:
    """
    Utility used to find the common transformations group across multiple
    datasets.

    To compute the common group, the current one is used. Objects which are not
    instances of :class:`AvalancheDataset` are ignored.
    If no common group is found, then the default one is returned.

    :param datasets: The list of datasets.
    :param default_group: The name of the default group.
    :returns: The name of the common group.
    """
    # Find common "current_group" or use "train"
    uniform_group: Optional[str] = None
    for d_set in datasets:
        if isinstance(d_set, AvalancheDataset):
            if uniform_group is None:
                uniform_group = d_set._flat_data._transform_groups.current_group
            else:
                if (
                    uniform_group
                    != d_set._flat_data._transform_groups.current_group
                ):
                    uniform_group = None
                    break

    if uniform_group is None:
        initial_transform_group = default_group
    else:
        initial_transform_group = uniform_group

    return initial_transform_group


Y = TypeVar('Y')
T = TypeVar('T')


def _traverse_supported_dataset(
    dataset: Y,
    values_selector: Callable[[Y, Optional[List[int]]], Optional[Sequence[T]]],
    indices: Optional[List[int]] = None
) -> Sequence[T]:
    """
    Traverse the given dataset by gathering required info.

    The given dataset is traversed by covering all sub-datasets
    contained PyTorch :class:`Subset` and :class`ConcatDataset`.
    Beware that instances of :class:`AvalancheDataset` will not
    be traversed as those objects already have the proper data 
    attribute fields populated with data from leaf datasets.

    For each dataset, the `values_selector` will be called to gather
    the required information. The values returned by the given selector
    are then concatenated to create a final list of values.

    :param dataset: The dataset to traverse.
    :param values_selector: A function that, given the dataset
        and the indices to consider (which may be None if the entire 
        dataset must be considered), returns a list of selected values.
    :returns: The list of selected values.
    """
    initial_error = None
    try:
        result = values_selector(dataset, indices)
        if result is not None:
            return result
    except BaseException as e:
        initial_error = e

    if isinstance(dataset, Subset):
        if indices is None:
            indices = [dataset.indices[x] for x in range(len(dataset))]
        else:
            indices = [dataset.indices[x] for x in indices]
        
        return list(
            _traverse_supported_dataset(
                dataset.dataset, values_selector, indices
            )
        )

    if isinstance(dataset, ConcatDataset):
        result = []
        if indices is None:
            for c_dataset in dataset.datasets:
                result += list(
                    _traverse_supported_dataset(
                        c_dataset, values_selector, indices
                    )
                )
            return result

        datasets_to_indexes = defaultdict(list)
        indexes_to_dataset = []
        datasets_len = []
        recursion_result = []

        all_size = 0
        for c_dataset in dataset.datasets:
            len_dataset = len(c_dataset)
            datasets_len.append(len_dataset)
            all_size += len_dataset

        for subset_idx in indices:
            dataset_idx, pattern_idx = find_list_from_index(
                subset_idx, datasets_len, all_size
            )
            datasets_to_indexes[dataset_idx].append(pattern_idx)
            indexes_to_dataset.append(dataset_idx)

        for dataset_idx, c_dataset in enumerate(dataset.datasets):
            recursion_result.append(
                deque(
                    _traverse_supported_dataset(
                        c_dataset,
                        values_selector,
                        datasets_to_indexes[dataset_idx],
                    )
                )
            )

        result = []
        for idx in range(len(indices)):
            dataset_idx = indexes_to_dataset[idx]
            result.append(recursion_result[dataset_idx].popleft())

        return result

    if initial_error is not None:
        raise initial_error

    raise ValueError("Error: can't find the needed data in the given dataset")


def _init_task_labels(dataset, task_labels, check_shape=True) -> \
        Optional[DataAttribute[int]]:
    """
    Initializes the task label list (one for each pattern in the dataset).

    Precedence is given to the values contained in `task_labels` if passed.
    Otherwisem the elements will be retrieved from the dataset itself by
    traversing it and looking at the `targets_task_labels` field.

    :param dataset: The dataset for which the task labels list must be 
        initialized. Ignored if `task_labels` is passed, but it may still be
        used if `check_shape` is true.
    :param task_labels: The task labels to use. May be None, in which case
        the labels will be retrieved from the dataset.
    :param check_shape: If True, will check if the length of the task labels
        list matches the dataset size. Ignored if the labels are retrieved 
        from the dataset.
    :returns: A data attribute containing the task labels. May be None to
        signal that the dataset's `targets_task_labels` field should be used
        (because the dataset is a :class:`AvalancheDataset`).
    """
    if task_labels is not None:
        # task_labels has priority over the dataset fields
        if isinstance(task_labels, int):
            task_labels = ConstantSequence(task_labels, len(dataset))
        elif len(task_labels) != len(dataset) and check_shape:
            raise ValueError(
                "Invalid amount of task labels. It must be equal to the "
                "number of patterns in the dataset. Got {}, expected "
                "{}!".format(len(task_labels), len(dataset))
            )

        if isinstance(task_labels, ConstantSequence):
            tls = task_labels
        elif isinstance(task_labels, DataAttribute):
            tls = task_labels.data
        else:
            tls = SubSequence(task_labels, converter=int)
    else:
        task_labels = _traverse_supported_dataset(
            dataset, _select_task_labels
        )

        if task_labels is None:
            tls = None
        elif isinstance(task_labels, ConstantSequence):
            tls = task_labels
        elif isinstance(task_labels, DataAttribute):
            return DataAttribute(
                task_labels.data, "targets_task_labels",
                use_in_getitem=True)
        else:
            tls = SubSequence(task_labels, converter=int)

    if tls is None:
        return None
    return DataAttribute(tls, "targets_task_labels", use_in_getitem=True)


def _select_task_labels(dataset: Any, indices: Optional[List[int]]) -> \
        Optional[Sequence[SupportsInt]]:
    """
    Selector function to be passed to :func:`_traverse_supported_dataset`
    to obtain the `targets_task_labels` for the given dataset.

    :param dataset: the traversed dataset.
    :param indices: the indices describing the subset to consider.
    :returns: The list of task labels or None if not found.
    """
    found_task_labels: Optional[Sequence[SupportsInt]] = None
    if hasattr(dataset, "targets_task_labels"):
        found_task_labels = dataset.targets_task_labels

    if found_task_labels is None:
        if isinstance(dataset, (Subset, ConcatDataset)):
            return None  # Continue traversing

    if found_task_labels is None:
        if indices is None:
            return ConstantSequence(0, len(dataset))
        return ConstantSequence(0, len(indices))

    if indices is not None:
        found_task_labels = SubSequence(found_task_labels, indices=indices)

    return found_task_labels


def _init_transform_groups(
    transform_groups: Optional[Mapping[str, TransformGroupDef]],
    transform: Optional[XTransform],
    target_transform: Optional[YTransform],
    initial_transform_group: Optional[str],
    dataset,
) -> Optional[TransformGroups]:
    """
    Initializes the transform groups for the given dataset.

    This internal utility is commonly used to manage the transformation
    defintions coming from the user-facing API. The user may want to
    define transformations in a more classic (and simple) way by
    passing a single `transform`, or in a more elaborate way by
    passing a dictionary of groups (`transform_groups`).

    :param transform_groups: The transform groups to use as a dictionary
        (group_name -> group). Can be None. Mutually exclusive with 
        `targets` and `target_transform`
    :param transform: The transformation for the X value. Can be None.
    :param target_transform: The transformation for the Y value. Can be None.
    :param initial_transform_group: The name of the initial group.
        If None, 'train' will be used.
    :param dataset: The avalanche dataset, used only to obtain the name of
        the initial transformations groups if `initial_transform_group` is 
        None.
    :returns: a :class:`TransformGroups` instance if any transformation
        was passed, else None.
    """
    if transform_groups is not None and (
        transform is not None or target_transform is not None
    ):
        raise ValueError(
            "transform_groups can't be used with transform"
            "and target_transform values"
        )

    if transform_groups is not None:
        _check_groups_dict_format(transform_groups)

    if initial_transform_group is None:
        # Detect from the input dataset. If not an AvalancheDataset then
        # use 'train' as the initial transform group
        if (
            isinstance(dataset, AvalancheDataset)
            and dataset._flat_data._transform_groups is not None
        ):
            tgs = dataset._flat_data._transform_groups
            initial_transform_group = tgs.current_group
        else:
            initial_transform_group = "train"

    if transform_groups is None:
        if target_transform is None and transform is None:
            tgs = None
        else:
            tgs = TransformGroups(
                {
                    "train": (transform, target_transform),
                    "eval": (transform, target_transform),
                },
                current_group=initial_transform_group,
            )
    else:
        tgs = TransformGroups(
            transform_groups, current_group=initial_transform_group
        )
    return tgs


def _check_groups_dict_format(groups_dict):
    # The original groups_dict must be convertible to native Python dict
    groups_dict = dict(groups_dict)

    # Check if the format of the groups is correct
    for map_key in groups_dict:
        if not isinstance(map_key, str):
            raise ValueError(
                "Every group must be identified by a string."
                'Wrong key was: "' + str(map_key) + '"'
            )

    if "test" in groups_dict:
        warnings.warn(
            'A transformation group named "test" has been found. Beware '
            "that by default AvalancheDataset supports test transformations"
            ' through the "eval" group. Consider using that one!'
        )


def _split_user_def_task_label(
    datasets,
    task_labels: Optional[Union[int, 
                                Sequence[int],
                                Sequence[Sequence[int]]]]) -> \
        List[Optional[Union[int, Sequence[int]]]]:
    """
    Given a datasets list and the user-defined list of task labels,
    returns the task labels list of each dataset.

    This internal utility is mainly used to manage the different ways
    in which the user can define the task labels:
    - As a single task label for all exemplars of all datasets
    - A single list of length equal to the sum of the lengths of all datasets
    - A list containing, for each dataset, one element between: 
        - a list, defining the task labels of each exemplar of a that dataset
        - an int, defining the task label of all exemplars of a that dataset
    
    :param datasets: The list of datasets.
    :param task_labels: The user-defined task labels. Can be None, in which
        case a list of None will be returned.
    :returns: A list containing as many elements as the input `datasets`. 
        Each element is either a list of task labels or None. If None 
        (because `task_labels` is None), this means that the task labels
        should be retrieved by traversing each dataset.
    """
    t_labels = []
    idx_start = 0
    for dd_idx, dd in enumerate(datasets):
        end_idx = idx_start + len(dd)
        dataset_t_label: Optional[Union[int, Sequence[int]]]
        if task_labels is None:
            # No task label set
            dataset_t_label = None
        elif isinstance(task_labels, int):
            # Single integer (same label for all instances)
            dataset_t_label = task_labels
        elif isinstance(task_labels[0], int):
            # Single task labels sequence
            # (to be split across concatenated datasets)
            dataset_t_label = task_labels[idx_start:end_idx]  # type: ignore
        elif len(task_labels[dd_idx]) == len(dd):  # type: ignore
            # One sequence per dataset
            dataset_t_label = task_labels[dd_idx]
        else:
            raise ValueError(
                'The task_labels parameter has an invalid format.'
            )
        t_labels.append(dataset_t_label)

        idx_start = end_idx
    return t_labels


def _split_user_def_targets(
        datasets,
        targets: Optional[Union[Sequence[T], Sequence[Sequence[T]]]],
        single_element_checker: Callable[[Any], bool]) -> \
            List[Optional[Sequence[T]]]:
    """
    Given a datasets list and the user-defined list of targets,
    returns the targets list of each dataset.

    This internal utility is mainly used to manage the different ways
    in which the user can define the targets:
    - A single list of length equal to the sum of the lengths of all datasets
    - A list containing, for each dataset, a list, defining the targets 
        of each exemplar of a that dataset
    
    :param datasets: The list of datasets.
    :param targets: The user-defined targets. Can be None, in which
        case a list of None will be returned.
    :returns: A list containing as many elements as the input `datasets`. 
        Each element is either a list of targets or None. If None 
        (because `targets` is None), this means that the targets
        should be retrieved by traversing each dataset.
    """
    t_labels = []
    idx_start = 0
    for dd_idx, dd in enumerate(datasets):
        end_idx = idx_start + len(dd)
        dataset_t_label: Optional[Sequence[T]]
        if targets is None:
            # No targets set
            dataset_t_label = None
        elif single_element_checker(targets[0]):
            # Single targets sequence
            # (to be split across concatenated datasets)
            dataset_t_label = targets[idx_start:end_idx]  # type: ignore
        elif len(targets[dd_idx]) == len(dd):  # type: ignore
            # One sequence per dataset
            dataset_t_label = targets[dd_idx]  # type: ignore
        else:
            raise ValueError(
                'The targets parameter has an invalid format.'
            )
        t_labels.append(dataset_t_label)

        idx_start = end_idx
    return t_labels


class TaskSet(Mapping[int, TAvalancheDataset], Generic[TAvalancheDataset]):
    """A lazy mapping for <task-label -> task dataset>.

    Given an `AvalancheClassificationDataset`, this class provides an
    iterator that splits the data into task subsets, returning tuples
    `<task_id, task_dataset>`.

    Usage:

    .. code-block:: python

        tset = TaskSet(data)
        for tid, tdata in tset:
            print(f"task {tid} has {len(tdata)} examples.")

    """

    def __init__(self, data: TAvalancheDataset):
        """Constructor.

        :param data: original data
        """
        super().__init__()
        self.data: TAvalancheDataset = data

    def __iter__(self) -> Iterator[int]:
        t_labels = self._get_task_labels_field()
        return iter(t_labels.uniques)

    def __getitem__(self, task_label: int):
        t_labels = self._get_task_labels_field()
        tl_idx = t_labels.val_to_idx[task_label]
        return self.data.subset(
            tl_idx
        )

    def __len__(self) -> int:
        t_labels = self._get_task_labels_field()
        return len(t_labels.uniques)

    def _get_task_labels_field(self) -> DataAttribute[int]:
        return self.data.targets_task_labels  # type: ignore


def _numpy_is_sequence_int(numpy_tensor: np.ndarray) -> bool:
    return issubclass(numpy_tensor.dtype.type, np.integer)


def _numpy_is_single_int(numpy_tensor: np.ndarray) -> bool:
    try:
        single_value = numpy_tensor.item()
        return isinstance(single_value, int)
    except ValueError:
        return False


def _torch_is_sequence_int(torch_tensor: Tensor) -> bool:
    return not torch.is_floating_point(torch_tensor) and \
        not torch.is_complex(torch_tensor)


def _torch_is_single_int(torch_tensor: Tensor) -> bool:
    try:
        single_value = torch_tensor.item()
        return isinstance(single_value, int)
    except ValueError:
        return False
    

def _element_is_single_int(element: Any):
    if isinstance(element, (int, np.integer)):
        return True
    if isinstance(element, Tensor):
        return _torch_is_single_int(element)
    else:
        return False


def _is_int_iterable(iterable: Iterable[Any]):
    if isinstance(iterable, torch.Tensor):
        return _torch_is_sequence_int(iterable)
    elif isinstance(iterable, np.ndarray):
        return _numpy_is_sequence_int(iterable)
    else:
        for t in iterable:
            if not _element_is_single_int(t):
                return False
        return True
    

AnyT = TypeVar('AnyT', bound=Iterable)


def _to_int_list(iterable: AnyT, force: bool = True) -> Union[AnyT, List[int]]:
    if isinstance(iterable, torch.Tensor):
        if _torch_is_sequence_int(iterable):
            return iterable.tolist()
        elif force:
            raise ValueError('Cannot convert PyTorch Tenspr to int list')
        else:
            return iterable
    elif isinstance(iterable, np.ndarray):
        if _numpy_is_sequence_int(iterable):
            return iterable.tolist()
        elif force:
            raise ValueError('Cannot convert NumPy array to int list')
        else:
            return iterable  # type: ignore
    else:
        int_list = []
        for t in iterable:
            if _element_is_single_int(t):
                int_list.append(t)
            elif force:
                raise ValueError('Cannot convert sequence to int list')
            else:
                return iterable
        return int_list


def _smart_init_targets(
    dataset,
    targets,
    check_shape=True
):
    """
    Initializes the targets for a given dataset.

    To support backwards compatibility for when when 
    :func:`create_multi_dataset_generic_benchmark` was
    used to manage classification benchmarks only, this function will try to
    mimic the steps taken in :func:`make_classification_dataset`, that is:
    
    - will try to check if the input dataset has classification 
        targets (integer tensors / ndarray) and will cast them to
        a list of native ints, as expected by other parts
        of Avalanche.
    - accepts passing an int for the targets field. The given int
        will be applied to all exemplars in the dataset. 
    - supports PyTorch TensorDataset, by taking the second tensor as targets.

    If targets are not of type int, then they will be returned as-is,
    so that other types of datasets (regression, detection, ...) are
    supported without issues.

    :param dataset: The input dataset. If the `targets` parameter is
        not None, then targets will be retrieved from the dataset.
    :param targets: The targets to use. Can be None, in which case
        targets will be retrieved from the dataset.
    :param check_shape: If True, will check if the number of exemplars
        in the dataset match the length of the obtained targets sequence.
    :return: The targets, as a DataAttribute of elements whose type depends
        on the input dataset.
    """
    if targets is not None:
        # User defined targets always take precedence
        if isinstance(targets, int):
            # Classification targets
            targets = ConstantSequence(targets, len(dataset))
        elif len(targets) != len(dataset) and check_shape:
            raise ValueError(
                "Invalid number of target labels. It must be equal to the "
                "number of patterns in the dataset. Got {}, expected "
                "{}!".format(len(targets), len(dataset))
            )
        return DataAttribute(targets, "targets")

    targets = _traverse_supported_dataset(
        dataset, _smart_select_targets_opt)
    
    if targets is not None:
        # Classification targets
        targets = _to_int_list(targets, force=False)

    if targets is None:
        return None
    
    return DataAttribute(targets, "targets")


def _smart_select_targets_opt(
        dataset: Any,
        indices: Optional[List[int]]) -> Optional[Sequence[Any]]:
    if hasattr(dataset, "targets"):
        # Standard supported dataset
        found_targets = dataset.targets
    elif hasattr(dataset, "tensors") and len(dataset.tensors) >= 2:
        # Support for PyTorch TensorDataset
        found_targets = dataset.tensors[1]
    else:
        return None

    if indices is not None:
        found_targets = SubSequence(found_targets, indices=indices)

    return found_targets


def make_generic_dataset(
    dataset: Any,
    *,
    transform: Optional[XTransform] = None,
    target_transform: Optional[YTransform] = None,
    transform_groups: Optional[Mapping[str, TransformGroupDef]] = None,
    initial_transform_group: Optional[str] = None,
    task_labels: Optional[Union[int, Sequence[int]]] = None,
    targets: Optional[Any] = None,
    collate_fn: Optional[Callable[[List], Any]] = None
) -> AvalancheDataset:
    """
    Helper function will create an :class:`AvalancheDataset` with
    supervision fields `targets` and `targets_task_labels` (if given or found
    in the input dataset).

    :param dataset: The dataset to wrap in the AvalancheDataset. If it contains
        `targets` and/or `targets_task_labels` fields, then those fields will
        be inherited by the resulting dataset (if not given by the `targets`
        or `task_labels` parameters). This will also check if the input dataset
        is a :class:`TensorDataset` and, in that case, will try to use the
        second tensor as the `targets` field.
    :param transform: The transformation to apply to X values.
        Mutually exclusive with `transform_groups`.
    :param target_transform: The transformation to apply to Y values.
        Mutually exclusive with `transform_groups`.
    :param transform_groups: The transformations groups to add to the dataset.
        Mutually xclusive with `transform` and `target_transform`.
    :param task_labels: A list containing a task label for each example. Can
        also be a plain `int`, in which case it will be applied to all
        examples. If not None, shadows the `targets_task_labels` field from
        the input dataset.
    :param targets: A list containing a target for each example. If not None,
        shadows the `targets` field from the input dataset.
    :param collate_fn: The collate function to use when loading this dataset.

    :returns: An :class:`AvalancheDataset`.
    """
    if isinstance(dataset, AvalancheDataset):
        return dataset

    transform_gs = _init_transform_groups(
        transform_groups=transform_groups,
        transform=transform,
        target_transform=target_transform,
        initial_transform_group=initial_transform_group,
        dataset=dataset,
    )

    targets_data: Optional[DataAttribute[Any]] = \
        _smart_init_targets(dataset, targets)
    task_labels_data: Optional[DataAttribute[int]] = \
        _init_task_labels(dataset, task_labels)

    das: List[DataAttribute] = []
    if targets_data is not None:
        das.append(targets_data)
    if task_labels_data is not None:
        das.append(task_labels_data)

    data = AvalancheDataset(
        [dataset],
        data_attributes=das if len(das) > 0 else None,
        transform_groups=transform_gs,
        collate_fn=collate_fn,
    )
    
    if initial_transform_group is not None:
        return data.with_transforms(initial_transform_group)
    else:
        return data


def make_generic_tensor_dataset(
    dataset_tensors: Sequence,
    *,
    transform: Optional[XTransform] = None,
    target_transform: Optional[YTransform] = None,
    transform_groups: Optional[Mapping[str, TransformGroupDef]] = None,
    initial_transform_group: Optional[str] = None,
    task_labels: Optional[Union[int, Sequence[int]]] = None,
    targets: Optional[Any] = None,
    collate_fn: Optional[Callable[[List], Any]] = None
) -> AvalancheDataset:
    if len(dataset_tensors) < 1:
        raise ValueError("At least one sequence must be passed")

    if isinstance(targets, int):
        targets = dataset_tensors[targets]
    tts = []
    for tt in dataset_tensors:  # TorchTensor requires a pytorch tensor
        if not hasattr(tt, 'size'):
            tt = torch.tensor(tt)
        tts.append(tt)
    dataset = TensorDataset(*tts)

    transform_gs = _init_transform_groups(
        transform_groups,
        transform,
        target_transform,
        initial_transform_group,
        dataset,
    )
    targets_data = _smart_init_targets(dataset, targets)
    task_labels_data = _init_task_labels(dataset, task_labels)

    das: List[DataAttribute] = []
    if targets_data is not None:
        das.append(targets_data)
    if task_labels_data is not None:
        das.append(task_labels_data)

    data = AvalancheDataset(
        [dataset],
        data_attributes=das if len(das) > 0 else None,
        transform_groups=transform_gs,
        collate_fn=collate_fn,
    )

    if initial_transform_group is not None:
        return data.with_transforms(initial_transform_group)
    else:
        return data


__all__ = [
    "tensor_as_list",
    "grouped_and_ordered_indexes",
    "as_avalanche_dataset",
    "as_classification_dataset",
    "concat_datasets",
    "find_common_transforms_group",
    "TaskSet",
    "make_generic_dataset",
    "make_generic_tensor_dataset"
]

ContinualAI / avalanche / 5268393053

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous