• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

adaptive-machine-learning / CapyMOA / 24541350644

17 Apr 2026 12:31AM UTC coverage: 74.415% (-0.1%) from 74.535%
24541350644

push

github

web-flow
feat: add domain incremental ocl datasets (#344)

227 of 300 new or added lines in 9 files covered. (75.67%)

6995 of 9400 relevant lines covered (74.41%)

0.74 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

53.73
/src/capymoa/ocl/datasets/_vit.py
1
from pathlib import Path
1✔
2
from typing import Any, Callable, List, Optional, Sequence, Tuple, cast
1✔
3

4
import torch
1✔
5
from torch import Tensor
1✔
6
from torch.utils.data import ConcatDataset, Dataset, Subset
1✔
7

8
from capymoa.datasets import get_download_dir
1✔
9
from capymoa.datasets._utils import TensorDatasetWithTransform, download_numpy_dataset
1✔
10
from capymoa.ocl.util.data import group_indicies
1✔
11
from capymoa.stream import TorchStream
1✔
12

13
from ._base import _BuiltInCIScenario
1✔
14
from ._constants import _SOURCES
1✔
15
from ._vision import DomainCIFAR100
1✔
16

17

18
class SplitCIFAR100ViT(_BuiltInCIScenario):
1✔
19
    """CIFAR100 encoded by a Vision Transformer (ViT).
20

21
    * Encoded using the ``vit_base_patch16_224_augreg_in21k`` pre-trained
22
      backbone [1]_.
23
    * 768 dimensional features (extracted from the last layer of the ViT).
24
    * 100 classes.
25
    * 50,000 training samples
26
    * 10,000 testing samples
27
    * Useful for developing and evaluating prototype based continual
28
      learning algorithms.
29

30
    ..  [1] Model card for ``vit_base_patch16_224.augreg_in21k``
31
        https://huggingface.co/timm/vit_base_patch16_224.augreg_in21k
32
    """
33

34
    num_classes = 100
1✔
35
    default_task_count = 10
1✔
36
    default_train_transform = None
1✔
37
    default_test_transform = None
1✔
38
    shape = [768]
1✔
39
    _dataset_key = "CIFAR100_vit_base_patch16_224_augreg_in21k"
1✔
40

41
    @classmethod
1✔
42
    def _download_dataset(
1✔
43
        cls,
44
        train: bool,
45
        directory: Path,
46
        auto_download: bool,
47
        transform: Optional[Any],
48
        target_transform: Optional[Callable[[Any], Any]] = None,
49
    ) -> Dataset[Tuple[Tensor, Tensor]]:
NEW
50
        ((train_x, train_y), (test_x, test_y)) = download_numpy_dataset(
×
51
            dataset_name=cls._dataset_key,
52
            url=_SOURCES[cls._dataset_key],
53
            auto_download=auto_download,
54
            downloads=directory,
55
        )
NEW
56
        if train:
×
NEW
57
            return TensorDatasetWithTransform(
×
58
                torch.from_numpy(train_x).float(),
59
                torch.from_numpy(train_y).long(),
60
                transform=transform,
61
                target_transform=target_transform,
62
            )
NEW
63
        return TensorDatasetWithTransform(
×
64
            torch.from_numpy(test_x).float(),
65
            torch.from_numpy(test_y).long(),
66
            transform=transform,
67
            target_transform=target_transform,
68
        )
69

70

71
class SplitCIFAR10ViT(SplitCIFAR100ViT):
1✔
72
    """CIFAR10 encoded by a Vision Transformer (ViT).
73

74
    * Encoded using the ``vit_base_patch16_224_augreg_in21k`` pre-trained
75
      backbone [1]_.
76
    * 768 dimensional features (extracted from the last layer of the ViT).
77
    * 10 classes.
78
    * 50,000 training samples
79
    * 10,000 testing samples
80
    * Useful for developing and evaluating prototype based continual learning
81
      algorithms.
82

83
    ..  [1] Model card for ``vit_base_patch16_224.augreg_in21k``
84
        https://huggingface.co/timm/vit_base_patch16_224.augreg_in21k
85
    """
86

87
    _dataset_key = "CIFAR10_vit_base_patch16_224_augreg_in21k"
1✔
88

89
    num_classes = 10
1✔
90
    default_task_count = 5
1✔
91
    shape = [768]
1✔
92

93

94
class DomainCIFAR100ViT(SplitCIFAR100ViT):
1✔
95
    """Domain incremental CIFAR-100 ViT variant with 20 classes per task.
96

97
    This dataset has exactly 5 tasks. Each task contains one fine-grained class from
98
    each CIFAR-100 superclass (20 classes per task), while labels are remapped to the 20
99
    superclass IDs.
100

101
    Note that the groupings are subjective based on the original CIFAR-100's coarse
102
    labels.
103

104
    **References:**
105

106
    #. Krizhevsky, A. (2009). Learning Multiple Layers of Features from Tiny Images.
107
    """
108

109
    _CIFAR100_CLASS_TO_SUPERCLASS: List[int] = (
1✔
110
        DomainCIFAR100._CIFAR100_CLASS_TO_SUPERCLASS
111
    )
112
    _CIFAR100_SUPERCLASS_CLASSES: List[List[int]] = (
1✔
113
        DomainCIFAR100._CIFAR100_SUPERCLASS_CLASSES
114
    )
115
    classes = DomainCIFAR100.classes
1✔
116

117
    num_classes = 20
1✔
118
    default_task_count = 5
1✔
119
    shape = [768]
1✔
120

121
    def __init__(
1✔
122
        self,
123
        shuffle_data: bool = True,
124
        seed: int = 0,
125
        directory: Path = get_download_dir(),
126
        auto_download: bool = True,
127
        train_transform: Optional[Callable[[Any], Tensor]] = None,
128
        test_transform: Optional[Callable[[Any], Tensor]] = None,
129
    ):
130
        """Create the DomainCIFAR100ViT scenario."""
NEW
131
        if train_transform is None:
×
NEW
132
            train_transform = self.default_train_transform
×
NEW
133
        if test_transform is None:
×
NEW
134
            test_transform = self.default_test_transform
×
135

NEW
136
        self.num_tasks = self.default_task_count
×
NEW
137
        all_classes = set(range(self.num_classes))
×
NEW
138
        self.task_schedule = [set(all_classes) for _ in range(self.num_tasks)]
×
139

NEW
140
        generator = torch.Generator().manual_seed(seed)
×
141

NEW
142
        def target_transform(y: Any) -> int:
×
NEW
143
            return self._CIFAR100_CLASS_TO_SUPERCLASS[int(y)]
×
144

NEW
145
        train_dataset = self._download_dataset(
×
146
            True,
147
            directory,
148
            auto_download,
149
            train_transform,
150
            target_transform=target_transform,
151
        )
NEW
152
        test_dataset = self._download_dataset(
×
153
            False,
154
            directory,
155
            auto_download,
156
            test_transform,
157
            target_transform=target_transform,
158
        )
159

NEW
160
        superclass_classes = torch.asarray(self._CIFAR100_SUPERCLASS_CLASSES)
×
161
        # Shuffle each superclass class order so tasks change with seed.
NEW
162
        if shuffle_data:
×
NEW
163
            for i, classes in enumerate(superclass_classes):
×
NEW
164
                superclass_classes[i] = classes[
×
165
                    torch.randperm(classes.size(0), generator=generator)
166
                ]
167

NEW
168
        self.train_tasks = self._build_domain_tasks(
×
169
            train_dataset,
170
            superclass_classes.T.tolist(),
171
            shuffle_data,
172
            generator,
173
        )
NEW
174
        self.test_tasks = self._build_domain_tasks(
×
175
            test_dataset,
176
            superclass_classes.T.tolist(),
177
            False,
178
            generator,
179
        )
180

NEW
181
        self.stream = TorchStream.from_classification(
×
182
            ConcatDataset(self.train_tasks),
183
            num_classes=self.num_classes,
184
            shuffle=False,
185
            dataset_name=str(self),
186
            shape=self.shape,
187
            class_names=self.classes,
188
        )
NEW
189
        self.schema = self.stream.get_schema()
×
NEW
190
        self.task_mask = torch.ones(
×
191
            (self.num_tasks, self.num_classes), dtype=torch.bool
192
        )
193

194
    @staticmethod
1✔
195
    def _build_domain_tasks(
1✔
196
        dataset: Dataset[Tuple[Tensor, Tensor]],
197
        task_fine_classes: Sequence[Sequence[int]],
198
        shuffle_data: bool,
199
        generator: torch.Generator,
200
    ) -> List[Dataset[Tuple[Tensor, Tensor]]]:
NEW
201
        targets = cast(torch.LongTensor, torch.asarray(dataset.targets))  # type: ignore
×
NEW
202
        grouped_indices = group_indicies(
×
203
            targets, task_fine_classes, shuffle=shuffle_data, rng=generator
204
        )
NEW
205
        tasks: List[Dataset[Tuple[Tensor, Tensor]]] = []
×
NEW
206
        for indices in grouped_indices:
×
NEW
207
            tasks.append(
×
208
                cast(
209
                    Dataset[Tuple[Tensor, Tensor]],
210
                    Subset(dataset, indices.tolist()),
211
                )
212
            )
NEW
213
        return tasks
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc