• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ContinualAI / avalanche / 5268393053

pending completion
5268393053

Pull #1397

github

web-flow
Merge 60d244754 into e91562200
Pull Request #1397: Specialize benchmark creation helpers

417 of 538 new or added lines in 30 files covered. (77.51%)

43 existing lines in 5 files now uncovered.

16586 of 22630 relevant lines covered (73.29%)

2.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.59
/avalanche/benchmarks/generators/benchmark_generators.py
1
################################################################################
2
# Copyright (c) 2021 ContinualAI.                                              #
3
# Copyrights licensed under the MIT License.                                   #
4
# See the accompanying LICENSE file for terms.                                 #
5
#                                                                              #
6
# Date: 16-04-2021                                                             #
7
# Author(s): Lorenzo Pellegrini                                                #
8
# E-mail: contact@continualai.org                                              #
9
# Website: avalanche.continualai.org                                           #
10
################################################################################
11

12
""" In this module the high-level benchmark generators are listed. They are
4✔
13
based on the methods already implemented in the "scenario" module. For the
14
specific generators we have: "New Classes" (NC) and "New Instances" (NI); For
15
the generic ones: filelist_benchmark, tensors_benchmark, dataset_benchmark
16
and paths_benchmark.
17
"""
18
from itertools import tee
4✔
19
from typing import (
4✔
20
    Sequence,
21
    Optional,
22
    Dict,
23
    TypeVar,
24
    Union,
25
    Any,
26
    List,
27
    Callable,
28
    Set,
29
    Tuple,
30
    Iterable,
31
    Generator,
32
)
33

34
import torch
4✔
35
from avalanche.benchmarks.scenarios.classification_benchmark_creation import (
4✔
36
    create_classification_benchmark_from_filelists,
37
    create_classification_benchmark_from_paths,
38
    create_classification_benchmark_from_tensor_lists,
39
    create_lazy_classification_benchmark,
40
    create_multi_dataset_classification_benchmark,
41
)
42
from avalanche.benchmarks.scenarios.classification_scenario import \
4✔
43
    ClassificationScenario
44

45
from avalanche.benchmarks.scenarios.dataset_scenario import (
4✔
46
    DatasetScenario,
47
    DatasetStream,
48
    FactoryBasedStream,
49
    StreamDef,
50
    TStreamsUserDict,
51
)
52
from avalanche.benchmarks.scenarios.detection_scenario import DetectionScenario
4✔
53
from avalanche.benchmarks.scenarios.generic_benchmark_creation import *
4✔
54
from avalanche.benchmarks.scenarios import (
4✔
55
    StreamUserDef,
56
)
57
from avalanche.benchmarks.scenarios.generic_scenario import (
4✔
58
    CLStream,
59
    DatasetExperience,
60
    SizedCLStream,
61
)
62
from avalanche.benchmarks.scenarios.lazy_dataset_sequence import (
4✔
63
    LazyDatasetSequence,
64
)
65
from avalanche.benchmarks.scenarios.new_classes.nc_scenario import NCScenario
4✔
66
from avalanche.benchmarks.scenarios.new_instances.ni_scenario import NIScenario
4✔
67
from avalanche.benchmarks.utils.classification_dataset import (
4✔
68
    ClassificationDataset,
69
    SupportedDataset,
70
    make_classification_dataset,
71
    concat_classification_datasets_sequentially
72
)
73
from avalanche.benchmarks.utils.data import AvalancheDataset
4✔
74
from avalanche.benchmarks.scenarios.detection_benchmark_creation import (
4✔
75
    create_multi_dataset_detection_benchmark,
76
)
77

78

79
TDatasetScenario = TypeVar(
4✔
80
    'TDatasetScenario',
81
    bound='DatasetScenario')
82
TCLStream = TypeVar(
4✔
83
    'TCLStream',
84
    bound='CLStream')
85
TSizedCLStream = TypeVar(
4✔
86
    'TSizedCLStream',
87
    bound='SizedCLStream')
88
TDatasetExperience = TypeVar(
4✔
89
    'TDatasetExperience',
90
    bound='DatasetExperience')
91
TCLDataset = TypeVar(
4✔
92
    'TCLDataset',
93
    bound='AvalancheDataset')
94

95

96
def nc_benchmark(
4✔
97
    train_dataset: Union[Sequence[SupportedDataset], SupportedDataset],
98
    test_dataset: Union[Sequence[SupportedDataset], SupportedDataset],
99
    n_experiences: int,
100
    task_labels: bool,
101
    *,
102
    shuffle: bool = True,
103
    seed: Optional[int] = None,
104
    fixed_class_order: Optional[Sequence[int]] = None,
105
    per_exp_classes: Optional[Dict[int, int]] = None,
106
    class_ids_from_zero_from_first_exp: bool = False,
107
    class_ids_from_zero_in_each_exp: bool = False,
108
    one_dataset_per_exp: bool = False,
109
    train_transform=None,
110
    eval_transform=None,
111
    reproducibility_data: Optional[Dict[str, Any]] = None
112
) -> NCScenario:
113
    """
114
    This is the high-level benchmark instances generator for the
115
    "New Classes" (NC) case. Given a sequence of train and test datasets creates
116
    the continual stream of data as a series of experiences. Each experience
117
    will contain all the instances belonging to a certain set of classes and a
118
    class won't be assigned to more than one experience.
119

120
    This is the reference helper function for creating instances of Class- or
121
    Task-Incremental benchmarks.
122

123
    The ``task_labels`` parameter determines if each incremental experience has
124
    an increasing task label or if, at the contrary, a default task label 0
125
    has to be assigned to all experiences. This can be useful when
126
    differentiating between Single-Incremental-Task and Multi-Task scenarios.
127

128
    There are other important parameters that can be specified in order to tweak
129
    the behaviour of the resulting benchmark. Please take a few minutes to read
130
    and understand them as they may save you a lot of work.
131

132
    This generator features a integrated reproducibility mechanism that allows
133
    the user to store and later re-load a benchmark. For more info see the
134
    ``reproducibility_data`` parameter.
135

136
    :param train_dataset: A list of training datasets, or a single dataset.
137
    :param test_dataset: A list of test datasets, or a single test dataset.
138
    :param n_experiences: The number of incremental experience. This is not used
139
        when using multiple train/test datasets with the ``one_dataset_per_exp``
140
        parameter set to True.
141
    :param task_labels: If True, each experience will have an ascending task
142
            label. If False, the task label will be 0 for all the experiences.
143
    :param shuffle: If True, the class (or experience) order will be shuffled.
144
        Defaults to True.
145
    :param seed: If ``shuffle`` is True and seed is not None, the class (or
146
        experience) order will be shuffled according to the seed. When None, the
147
        current PyTorch random number generator state will be used. Defaults to
148
        None.
149
    :param fixed_class_order: If not None, the class order to use (overrides
150
        the shuffle argument). Very useful for enhancing reproducibility.
151
        Defaults to None.
152
    :param per_exp_classes: Is not None, a dictionary whose keys are
153
        (0-indexed) experience IDs and their values are the number of classes
154
        to include in the respective experiences. The dictionary doesn't
155
        have to contain a key for each experience! All the remaining experiences
156
        will contain an equal amount of the remaining classes. The
157
        remaining number of classes must be divisible without remainder
158
        by the remaining number of experiences. For instance,
159
        if you want to include 50 classes in the first experience
160
        while equally distributing remaining classes across remaining
161
        experiences, just pass the "{0: 50}" dictionary as the
162
        per_experience_classes parameter. Defaults to None.
163
    :param class_ids_from_zero_from_first_exp: If True, original class IDs
164
        will be remapped so that they will appear as having an ascending
165
        order. For instance, if the resulting class order after shuffling
166
        (or defined by fixed_class_order) is [23, 34, 11, 7, 6, ...] and
167
        class_ids_from_zero_from_first_exp is True, then all the patterns
168
        belonging to class 23 will appear as belonging to class "0",
169
        class "34" will be mapped to "1", class "11" to "2" and so on.
170
        This is very useful when drawing confusion matrices and when dealing
171
        with algorithms with dynamic head expansion. Defaults to False.
172
        Mutually exclusive with the ``class_ids_from_zero_in_each_exp``
173
        parameter.
174
    :param class_ids_from_zero_in_each_exp: If True, original class IDs
175
        will be mapped to range [0, n_classes_in_exp) for each experience.
176
        Defaults to False. Mutually exclusive with the
177
        ``class_ids_from_zero_from_first_exp`` parameter.
178
    :param one_dataset_per_exp: available only when multiple train-test
179
        datasets are provided. If True, each dataset will be treated as a
180
        experience. Mutually exclusive with the ``per_experience_classes`` and
181
        ``fixed_class_order`` parameters. Overrides the ``n_experiences``
182
        parameter. Defaults to False.
183
    :param train_transform: The transformation to apply to the training data,
184
        e.g. a random crop, a normalization or a concatenation of different
185
        transformations (see torchvision.transform documentation for a
186
        comprehensive list of possible transformations). Defaults to None.
187
    :param eval_transform: The transformation to apply to the test data,
188
        e.g. a random crop, a normalization or a concatenation of different
189
        transformations (see torchvision.transform documentation for a
190
        comprehensive list of possible transformations). Defaults to None.
191
    :param reproducibility_data: If not None, overrides all the other
192
        benchmark definition options. This is usually a dictionary containing
193
        data used to reproduce a specific experiment. One can use the
194
        ``get_reproducibility_data`` method to get (and even distribute)
195
        the experiment setup so that it can be loaded by passing it as this
196
        parameter. In this way one can be sure that the same specific
197
        experimental setup is being used (for reproducibility purposes).
198
        Beware that, in order to reproduce an experiment, the same train and
199
        test datasets must be used. Defaults to None.
200

201
    :return: A properly initialized :class:`NCScenario` instance.
202
    """
203

204
    if class_ids_from_zero_from_first_exp and class_ids_from_zero_in_each_exp:
4✔
205
        raise ValueError(
4✔
206
            "Invalid mutually exclusive options "
207
            "class_ids_from_zero_from_first_exp and "
208
            "classes_ids_from_zero_in_each_exp set at the "
209
            "same time"
210
        )
211

212
    if isinstance(train_dataset, (list, tuple)):
4✔
213
        # Multi-dataset setting
214

215
        if not isinstance(test_dataset, (list, tuple)):
4✔
216
            raise ValueError(
×
217
                "If a list is passed for train_dataset, "
218
                "then test_dataset must be a list, too."
219
            )
220

221
        if len(train_dataset) != len(test_dataset):
4✔
222
            raise ValueError(
×
223
                "Train/test dataset lists must contain the "
224
                "exact same number of datasets"
225
            )
226

227
        if per_exp_classes and one_dataset_per_exp:
4✔
228
            raise ValueError(
×
229
                "Both per_experience_classes and one_dataset_per_exp are"
230
                "used, but those options are mutually exclusive"
231
            )
232

233
        if fixed_class_order and one_dataset_per_exp:
4✔
234
            raise ValueError(
×
235
                "Both fixed_class_order and one_dataset_per_exp are"
236
                "used, but those options are mutually exclusive"
237
            )
238

239
        train_dataset_sup = list(
4✔
240
            map(make_classification_dataset, train_dataset)
241
        )
242
        test_dataset_sup = list(
4✔
243
            map(make_classification_dataset, test_dataset)
244
        )
245
        
246
        seq_train_dataset, seq_test_dataset, mapping = \
4✔
247
            concat_classification_datasets_sequentially(
248
                train_dataset_sup, test_dataset_sup
249
            )
250

251
        if one_dataset_per_exp:
4✔
252
            # If one_dataset_per_exp is True, each dataset will be treated as
253
            # a experience. In this benchmark, shuffle refers to the experience
254
            # order, not to the class one.
255
            (
4✔
256
                fixed_class_order,
257
                per_exp_classes,
258
            ) = _one_dataset_per_exp_class_order(mapping, shuffle, seed)
259

260
            # We pass a fixed_class_order to the NCGenericScenario
261
            # constructor, so we don't need shuffling.
262
            shuffle = False
4✔
263
            seed = None
4✔
264

265
            # Overrides n_experiences (and per_experience_classes, already done)
266
            n_experiences = len(train_dataset)
4✔
267
    else:
268
        seq_train_dataset = make_classification_dataset(train_dataset)
4✔
269
        seq_test_dataset = make_classification_dataset(test_dataset)
4✔
270

271
    transform_groups = dict(
4✔
272
        train=(train_transform, None), eval=(eval_transform, None)
273
    )
274

275
    # Set transformation groups
276
    final_train_dataset = make_classification_dataset(
4✔
277
        seq_train_dataset,
278
        transform_groups=transform_groups,
279
        initial_transform_group="train",
280
    )
281

282
    final_test_dataset = make_classification_dataset(
4✔
283
        seq_test_dataset,
284
        transform_groups=transform_groups,
285
        initial_transform_group="eval",
286
    )
287

288
    return NCScenario(
4✔
289
        train_dataset=final_train_dataset,
290
        test_dataset=final_test_dataset,
291
        n_experiences=n_experiences,
292
        task_labels=task_labels,
293
        shuffle=shuffle,
294
        seed=seed,
295
        fixed_class_order=fixed_class_order,
296
        per_experience_classes=per_exp_classes,
297
        class_ids_from_zero_from_first_exp=class_ids_from_zero_from_first_exp,
298
        class_ids_from_zero_in_each_exp=class_ids_from_zero_in_each_exp,
299
        reproducibility_data=reproducibility_data
300
    )
301

302

303
def ni_benchmark(
4✔
304
    train_dataset: Union[Sequence[SupportedDataset], SupportedDataset],
305
    test_dataset: Union[Sequence[SupportedDataset], SupportedDataset],
306
    n_experiences: int,
307
    *,
308
    task_labels: bool = False,
309
    shuffle: bool = True,
310
    seed: Optional[int] = None,
311
    balance_experiences: bool = False,
312
    min_class_patterns_in_exp: int = 0,
313
    fixed_exp_assignment: Optional[Sequence[Sequence[int]]] = None,
314
    train_transform=None,
315
    eval_transform=None,
316
    reproducibility_data: Optional[Dict[str, Any]] = None,
317
) -> NIScenario:
318
    """
319
    This is the high-level benchmark instances generator for the
320
    "New Instances" (NI) case. Given a sequence of train and test datasets
321
    creates the continual stream of data as a series of experiences.
322

323
    This is the reference helper function for creating instances of
324
    Domain-Incremental benchmarks.
325

326
    The ``task_labels`` parameter determines if each incremental experience has
327
    an increasing task label or if, at the contrary, a default task label 0
328
    has to be assigned to all experiences. This can be useful when
329
    differentiating between Single-Incremental-Task and Multi-Task scenarios.
330

331
    There are other important parameters that can be specified in order to tweak
332
    the behaviour of the resulting benchmark. Please take a few minutes to read
333
    and understand them as they may save you a lot of work.
334

335
    This generator features an integrated reproducibility mechanism that allows
336
    the user to store and later re-load a benchmark. For more info see the
337
    ``reproducibility_data`` parameter.
338

339
    :param train_dataset: A list of training datasets, or a single dataset.
340
    :param test_dataset: A list of test datasets, or a single test dataset.
341
    :param n_experiences: The number of experiences.
342
    :param task_labels: If True, each experience will have an ascending task
343
            label. If False, the task label will be 0 for all the experiences.
344
    :param shuffle: If True, patterns order will be shuffled.
345
    :param seed: A valid int used to initialize the random number generator.
346
        Can be None.
347
    :param balance_experiences: If True, pattern of each class will be equally
348
        spread across all experiences. If False, patterns will be assigned to
349
        experiences in a complete random way. Defaults to False.
350
    :param min_class_patterns_in_exp: The minimum amount of patterns of
351
        every class that must be assigned to every experience. Compatible with
352
        the ``balance_experiences`` parameter. An exception will be raised if
353
        this constraint can't be satisfied. Defaults to 0.
354
    :param fixed_exp_assignment: If not None, the pattern assignment
355
        to use. It must be a list with an entry for each experience. Each entry
356
        is a list that contains the indexes of patterns belonging to that
357
        experience. Overrides the ``shuffle``, ``balance_experiences`` and
358
        ``min_class_patterns_in_exp`` parameters.
359
    :param train_transform: The transformation to apply to the training data,
360
        e.g. a random crop, a normalization or a concatenation of different
361
        transformations (see torchvision.transform documentation for a
362
        comprehensive list of possible transformations). Defaults to None.
363
    :param eval_transform: The transformation to apply to the test data,
364
        e.g. a random crop, a normalization or a concatenation of different
365
        transformations (see torchvision.transform documentation for a
366
        comprehensive list of possible transformations). Defaults to None.
367
    :param reproducibility_data: If not None, overrides all the other
368
        benchmark definition options, including ``fixed_exp_assignment``.
369
        This is usually a dictionary containing data used to
370
        reproduce a specific experiment. One can use the
371
        ``get_reproducibility_data`` method to get (and even distribute)
372
        the experiment setup so that it can be loaded by passing it as this
373
        parameter. In this way one can be sure that the same specific
374
        experimental setup is being used (for reproducibility purposes).
375
        Beware that, in order to reproduce an experiment, the same train and
376
        test datasets must be used. Defaults to None.
377

378
    :return: A properly initialized :class:`NIScenario` instance.
379
    """
380
    
381
    seq_train_dataset, seq_test_dataset = train_dataset, test_dataset
4✔
382
    if isinstance(train_dataset, (list, tuple)):
4✔
383
        if not isinstance(test_dataset, (list, tuple)):
4✔
384
            raise ValueError(
×
385
                "If a list is passed for train_dataset, "
386
                "then test_dataset must be a list, too."
387
            )
388
        
389
        if len(train_dataset) != len(test_dataset):
4✔
390
            raise ValueError(
×
391
                "Train/test dataset lists must contain the "
392
                "exact same number of datasets"
393
            )
394

395
        train_dataset_sup = list(
4✔
396
            map(make_classification_dataset, train_dataset)
397
        )
398
        test_dataset_sup = list(
4✔
399
            map(make_classification_dataset, test_dataset)
400
        )
401

402
        seq_train_dataset, seq_test_dataset, _ = \
4✔
403
            concat_classification_datasets_sequentially(
404
                train_dataset_sup, test_dataset_sup
405
            )
406
    else:
407
        seq_train_dataset = make_classification_dataset(train_dataset)
4✔
408
        seq_test_dataset = make_classification_dataset(test_dataset)
4✔
409

410
    transform_groups = dict(
4✔
411
        train=(train_transform, None), eval=(eval_transform, None)
412
    )
413

414
    # Set transformation groups
415
    final_train_dataset = make_classification_dataset(
4✔
416
        seq_train_dataset,
417
        transform_groups=transform_groups,
418
        initial_transform_group="train",
419
    )
420

421
    final_test_dataset = make_classification_dataset(
4✔
422
        seq_test_dataset,
423
        transform_groups=transform_groups,
424
        initial_transform_group="eval",
425
    )
426

427
    return NIScenario(
4✔
428
        train_dataset=final_train_dataset,
429
        test_dataset=final_test_dataset,
430
        n_experiences=n_experiences,
431
        task_labels=task_labels,
432
        shuffle=shuffle,
433
        seed=seed,
434
        balance_experiences=balance_experiences,
435
        min_class_patterns_in_exp=min_class_patterns_in_exp,
436
        fixed_exp_assignment=fixed_exp_assignment,
437
        reproducibility_data=reproducibility_data
438
    )
439

440

441
# Here we define some high-level APIs an alias of their mid-level counterparts.
442
# This was done mainly because the implementation for the mid-level API is now
443
# quite stable and not particularly complex.
444
dataset_benchmark = create_multi_dataset_generic_benchmark
4✔
445
filelist_benchmark = create_generic_benchmark_from_filelists
4✔
446
paths_benchmark = create_generic_benchmark_from_paths
4✔
447
tensors_benchmark = create_generic_benchmark_from_tensor_lists
4✔
448
lazy_benchmark = create_lazy_generic_benchmark
4✔
449

450

451
# Classification-specific
452
dataset_classification_benchmark = \
4✔
453
    create_multi_dataset_classification_benchmark
454
filelist_classification_benchmark = \
4✔
455
    create_classification_benchmark_from_filelists
456
paths_classification_benchmark = create_classification_benchmark_from_paths
4✔
457
tensors_classification_benchmark = \
4✔
458
    create_classification_benchmark_from_tensor_lists
459
lazy_classification_benchmark = create_lazy_classification_benchmark
4✔
460

461
# Detection-specific
462
dataset_detection_benchmark = \
4✔
463
    create_multi_dataset_detection_benchmark
464

465

466
def _one_dataset_per_exp_class_order(
4✔
467
    class_list_per_exp: Sequence[Sequence[int]],
468
    shuffle: bool,
469
    seed: Optional[int],
470
) -> Tuple[List[int], Dict[int, int]]:
471
    """
472
    Utility function that shuffles the class order by keeping classes from the
473
    same experience together. Each experience is defined by a different entry in
474
    the class_list_per_exp parameter.
475

476
    :param class_list_per_exp: A list of class lists, one for each experience
477
    :param shuffle: If True, the experience order will be shuffled. If False,
478
        this function will return the concatenation of lists from the
479
        class_list_per_exp parameter.
480
    :param seed: If not None, an integer used to initialize the random
481
        number generator.
482

483
    :returns: A class order that keeps class IDs from the same experience
484
        together (adjacent).
485
    """
486
    dataset_order = list(range(len(class_list_per_exp)))
4✔
487
    if shuffle:
4✔
488
        if seed is not None:
4✔
489
            torch.random.manual_seed(seed)
4✔
490
        dataset_order = torch.as_tensor(dataset_order)[
4✔
491
            torch.randperm(len(dataset_order))
492
        ].tolist()
493
    fixed_class_order: List[int] = []
4✔
494
    classes_per_exp: Dict[int, int] = {}
4✔
495
    for dataset_position, dataset_idx in enumerate(dataset_order):
4✔
496
        fixed_class_order.extend(class_list_per_exp[dataset_idx])
4✔
497
        classes_per_exp[dataset_position] = len(class_list_per_exp[dataset_idx])
4✔
498
    return fixed_class_order, classes_per_exp
4✔
499

500

501
def fixed_size_experience_split_strategy(
4✔
502
    experience_size: int,
503
    shuffle: bool,
504
    drop_last: bool,
505
    experience: DatasetExperience[TCLDataset]
506
) -> Sequence[TCLDataset]:
507
    """
508
    The default splitting strategy used by :func:`data_incremental_benchmark`.
509

510
    This splitting strategy simply splits the experience in smaller experiences
511
    of size `experience_size`.
512

513
    When taking inspiration for your custom splitting strategy, please consider
514
    that all parameters preceding `experience` are filled by
515
    :func:`data_incremental_benchmark` by using `partial` from the `functools`
516
    standard library. A custom splitting strategy must have only a single
517
    parameter: the experience. Consider wrapping your custom splitting strategy
518
    with `partial` if more parameters are needed.
519

520
    Also consider that the stream name of the experience can be obtained by
521
    using `experience.origin_stream.name`.
522

523
    :param experience_size: The experience size (number of instances).
524
    :param shuffle: If True, instances will be shuffled before splitting.
525
    :param drop_last: If True, the last mini-experience will be dropped if
526
        not of size `experience_size`
527
    :param experience: The experience to split.
528
    :return: The list of datasets that will be used to create the
529
        mini-experiences.
530
    """
531

532
    exp_dataset = experience.dataset
4✔
533
    exp_indices = list(range(len(exp_dataset)))
4✔
534

535
    result_datasets = []
4✔
536

537
    if shuffle:
4✔
538
        exp_indices = torch.as_tensor(exp_indices)[
×
539
            torch.randperm(len(exp_indices))
540
        ].tolist()
541

542
    init_idx = 0
4✔
543
    while init_idx < len(exp_indices):
4✔
544
        final_idx = init_idx + experience_size  # Exclusive
4✔
545
        if final_idx > len(exp_indices):
4✔
546
            if drop_last:
4✔
547
                break
×
548

549
            final_idx = len(exp_indices)
4✔
550

551
        result_datasets.append(
4✔
552
            exp_dataset.subset(exp_indices[init_idx:final_idx])
553
        )
554
        init_idx = final_idx
4✔
555

556
    return result_datasets
4✔
557

558

559
TDatasetStream = TypeVar(
4✔
560
    'TDatasetStream',
561
    bound='DatasetStream'
562
)
563

564

565
def _make_plain_experience(
4✔
566
    stream: DatasetStream[DatasetExperience[TCLDataset]],
567
    experience_idx: int
568
) -> DatasetExperience[TCLDataset]:
569
    dataset = stream.benchmark.stream_definitions[
4✔
570
        stream.name
571
    ].exps_data[experience_idx]
572

573
    return DatasetExperience(
4✔
574
        current_experience=experience_idx,
575
        origin_stream=stream,
576
        benchmark=stream.benchmark,
577
        dataset=dataset
578
    )
579

580

581
def _smart_benchmark_factory(
4✔
582
    original_benchmark: DatasetScenario,
583
    new_streams_definitions: TStreamsUserDict,
584
    complete_test_set_only: bool
585
) -> DatasetScenario:
586
    
587
    if isinstance(original_benchmark, ClassificationScenario):
4✔
588
        return ClassificationScenario(
4✔
589
            stream_definitions=new_streams_definitions,
590
            complete_test_set_only=complete_test_set_only
591
        )
592
    elif isinstance(original_benchmark, DetectionScenario):
4✔
593
        return DetectionScenario(
×
594
            stream_definitions=new_streams_definitions,
595
            complete_test_set_only=complete_test_set_only
596
        )
597
    else:
598
        # Generic scenario
599
        return DatasetScenario(
4✔
600
            stream_definitions=new_streams_definitions,
601
            complete_test_set_only=complete_test_set_only,
602
            stream_factory=FactoryBasedStream,
603
            experience_factory=_make_plain_experience,
604
        )
605

606

607
def data_incremental_benchmark(
4✔
608
    benchmark_instance: DatasetScenario[TDatasetStream,
609
                                        TDatasetExperience,
610
                                        TCLDataset],
611
    experience_size: int,
612
    shuffle: bool = False,
613
    drop_last: bool = False,
614
    split_streams: Sequence[str] = ("train",),
615
    custom_split_strategy: Optional[Callable[
616
        [DatasetExperience[TCLDataset]],
617
        Sequence[TCLDataset]
618
    ]] = None,
619
    *,
620
    benchmark_factory: Optional[Callable[
621
        [
622
            DatasetScenario[TDatasetStream,
623
                            TDatasetExperience,
624
                            TCLDataset],
625
            TStreamsUserDict,
626
            bool
627
        ], DatasetScenario[
628
            DatasetStream[DatasetExperience[TCLDataset]],
629
            DatasetExperience[TCLDataset],
630
            TCLDataset]
631
        ]
632
    ] = _smart_benchmark_factory,
633
    experience_factory: Optional[Callable[
634
        [DatasetStream[DatasetExperience[TCLDataset]], int], 
635
        DatasetExperience[TCLDataset]
636
    ]] = _make_plain_experience,
637
) -> DatasetScenario[
638
        DatasetStream[DatasetExperience[TCLDataset]],
639
        DatasetExperience[TCLDataset],
640
        TCLDataset]:
641
    """
642
    High-level benchmark generator for a Data Incremental setup.
643

644
    This generator accepts an existing benchmark instance and returns a version
645
    of it in which experiences have been split in order to produce a
646
    Data Incremental stream.
647

648
    In its base form this generator will split train experiences in experiences
649
    of a fixed, configurable, size. The split can be also performed on other
650
    streams (like the test one) if needed.
651

652
    The `custom_split_strategy` parameter can be used if a more specific
653
    splitting is required.
654

655
    Beware that experience splitting is NOT executed in a lazy way. This
656
    means that the splitting process takes place immediately. Consider
657
    optimizing the split process for speed when using a custom splitting
658
    strategy.
659

660
    Please note that each mini-experience will have a task labels field
661
    equal to the one of the originating experience.
662

663
    The `complete_test_set_only` field of the resulting benchmark instance
664
    will be `True` only if the same field of original benchmark instance is
665
    `True` and if the resulting test stream contains exactly one experience.
666

667
    :param benchmark_instance: The benchmark to split.
668
    :param experience_size: The size of the experience, as an int. Ignored
669
        if `custom_split_strategy` is used.
670
    :param shuffle: If True, experiences will be split by first shuffling
671
        instances in each experience. This will use the default PyTorch
672
        random number generator at its current state. Defaults to False.
673
        Ignored if `custom_split_strategy` is used.
674
    :param drop_last: If True, if the last experience doesn't contain
675
        `experience_size` instances, then the last experience will be dropped.
676
        Defaults to False. Ignored if `custom_split_strategy` is used.
677
    :param split_streams: The list of streams to split. By default only the
678
        "train" stream will be split.
679
    :param custom_split_strategy: A function that implements a custom splitting
680
        strategy. The function must accept an experience and return a list
681
        of datasets each describing an experience. Defaults to None, which means
682
        that the standard splitting strategy will be used (which creates
683
        experiences of size `experience_size`).
684
        A good starting to understand the mechanism is to look at the
685
        implementation of the standard splitting function
686
        :func:`fixed_size_experience_split_strategy`.
687
    :param benchmark_factory: The scenario factory. Defaults to 
688
        `_smart_experience_factory`, which will try to create a benchmark of the
689
        same class of the originating one. Can be None, in which case a generic
690
        :class:`DatasetScenario` will be used coupled with the factory defined
691
        by the `experience_factory` parameter.
692
    :param experience_factory: The experience factory. Ignored if
693
        `scenario_factory` is not None. Otherwise, defaults to
694
        :class:`DatasetExperience`.
695
    :return: The Data Incremental benchmark instance.
696
    """
697

698
    split_strategy: Callable[
699
        [DatasetExperience[TCLDataset]], 
700
        Sequence[TCLDataset]
701
    ]
702
    if custom_split_strategy is None:
4✔
703
        # functools.partial is a more compact option
704
        # However, MyPy does not understand what a partial is -_-
705
        def fixed_size_experience_split_strategy_wrapper(exp):
4✔
706
            return fixed_size_experience_split_strategy(
4✔
707
                experience_size,
708
                shuffle,
709
                drop_last,
710
                exp
711
            )
712

713
        split_strategy = fixed_size_experience_split_strategy_wrapper
4✔
714
    else:
715
        split_strategy = custom_split_strategy
×
716

717
    stream_definitions: Dict[str, StreamDef[TCLDataset]] = dict(
4✔
718
        benchmark_instance.stream_definitions
719
    )
720

721
    for stream_name in split_streams:
4✔
722
        if stream_name not in stream_definitions:
4✔
723
            raise ValueError(
×
724
                f"Stream {stream_name} could not be found in the "
725
                f"benchmark instance"
726
            )
727

728
        stream: TDatasetStream = getattr(
4✔
729
            benchmark_instance,
730
            f"{stream_name}_stream")
731

732
        split_datasets: List[TCLDataset] = []
4✔
733
        split_task_labels: List[Set[int]] = []
4✔
734

735
        exp: DatasetExperience[TCLDataset]
736
        for exp in stream:
4✔
737
            experiences = split_strategy(exp)
4✔
738
            split_datasets += experiences
4✔
739
            for _ in range(len(experiences)):
4✔
740
                split_task_labels.append(set(exp.task_labels))
4✔
741

742
        stream_def = StreamDef(
4✔
743
            LazyDatasetSequence(split_datasets, len(split_datasets)),
744
            split_task_labels,
745
            stream_definitions[stream_name].origin_dataset,
746
            False
747
        )
748
        stream_def.exps_data.load_all_experiences()
4✔
749

750
        stream_definitions[stream_name] = stream_def
4✔
751

752
    complete_test_set_only = (
4✔
753
        benchmark_instance.complete_test_set_only
754
        and len(stream_definitions["test"].exps_data) == 1
755
    )
756

757
    if benchmark_factory is not None:
4✔
758
        # Try to create a benchmark of the same class of the
759
        # initial benchmark.
760
        return benchmark_factory(
4✔
761
            benchmark_instance,
762
            stream_definitions,
763
            complete_test_set_only
764
        )
765

766
    # Generic benchmark class
767
    if experience_factory is None:
×
768
        experience_factory = _make_plain_experience
×
769

770
    return DatasetScenario(
×
771
        stream_definitions=stream_definitions,
772
        complete_test_set_only=complete_test_set_only,
773
        stream_factory=FactoryBasedStream,
774
        experience_factory=experience_factory,
775
    )
776

777

778
def random_validation_split_strategy(
4✔
779
    validation_size: Union[int, float],
780
    shuffle: bool,
781
    experience: DatasetExperience[TCLDataset],
782
) -> Tuple[TCLDataset, TCLDataset]:
783
    """
784
    The default splitting strategy used by
785
    :func:`benchmark_with_validation_stream`.
786

787
    This splitting strategy simply splits the experience in two experiences (
788
    train and validation) of size `validation_size`.
789

790
    When taking inspiration for your custom splitting strategy, please consider
791
    that all parameters preceding `experience` are filled by
792
    :func:`benchmark_with_validation_stream` by using `partial` from the
793
    `functools` standard library. A custom splitting strategy must have only
794
    a single parameter: the experience. Consider wrapping your custom
795
    splitting strategy with `partial` if more parameters are needed.
796

797
    Also consider that the stream name of the experience can be obtained by
798
    using `experience.origin_stream.name`.
799

800
    :param validation_size: The number of instances to allocate to the
801
    validation experience. Can be an int value or a float between 0 and 1.
802
    :param shuffle: If True, instances will be shuffled before splitting.
803
        Otherwise, the first instances will be allocated to the training
804
        dataset by leaving the last ones to the validation dataset.
805
    :param experience: The experience to split.
806
    :return: A tuple containing 2 elements: the new training and validation
807
        datasets.
808
    """
809

810
    exp_dataset = experience.dataset
4✔
811
    exp_indices = list(range(len(exp_dataset)))
4✔
812

813
    if shuffle:
4✔
814
        exp_indices = torch.as_tensor(exp_indices)[
×
815
            torch.randperm(len(exp_indices))
816
        ].tolist()
817

818
    if 0.0 <= validation_size <= 1.0:
4✔
819
        valid_n_instances = int(validation_size * len(exp_dataset))
4✔
820
    else:
821
        valid_n_instances = int(validation_size)
4✔
822
        if valid_n_instances > len(exp_dataset):
4✔
823
            raise ValueError(
×
824
                f"Can't create the validation experience: not enough "
825
                f"instances. Required {valid_n_instances}, got only"
826
                f"{len(exp_dataset)}"
827
            )
828

829
    train_n_instances = len(exp_dataset) - valid_n_instances
4✔
830
    result_train_dataset = exp_dataset.subset(exp_indices[:train_n_instances])
4✔
831
    result_valid_dataset = exp_dataset.subset(exp_indices[train_n_instances:])
4✔
832
    return result_train_dataset, result_valid_dataset
4✔
833

834

835
def class_balanced_split_strategy(
4✔
836
    validation_size: float,
837
    experience: DatasetExperience[ClassificationDataset],
838
) -> Tuple[ClassificationDataset, ClassificationDataset]:
839
    """Class-balanced train/validation splits.
840

841
    This splitting strategy splits `experience` into two experiences
842
    (train and validation) of size `validation_size` using a class-balanced
843
    split. Sample of each class are chosen randomly.
844

845
    You can use this split strategy to split a benchmark with::
846

847
        validation_size = 0.2
848
        foo = lambda exp: class_balanced_split_strategy(validation_size, exp)
849
        bm = benchmark_with_validation_stream(bm, custom_split_strategy=foo)
850

851
    :param validation_size: The percentage of samples to allocate to the
852
        validation experience as a float between 0 and 1.
853
    :param experience: The experience to split.
854
    :return: A tuple containing 2 elements: the new training and validation
855
        datasets.
856
    """
857
    if not isinstance(validation_size, float):
4✔
NEW
858
        raise ValueError("validation_size must be a float")
×
859
    if not 0.0 <= validation_size <= 1.0:
4✔
860
        raise ValueError("validation_size must be a float in [0, 1].")
×
861

862
    exp_dataset = experience.dataset
4✔
863
    exp_indices = list(range(len(exp_dataset)))
4✔
864
    targets_as_tensor = torch.as_tensor(experience.dataset.targets)
4✔
865
    exp_classes: List[int] = targets_as_tensor.unique().tolist()
4✔
866

867
    # shuffle exp_indices
868
    exp_indices = torch.as_tensor(exp_indices)[torch.randperm(len(exp_indices))]
4✔
869
    # shuffle the targets as well
870
    exp_targets = targets_as_tensor[exp_indices]
4✔
871

872
    train_exp_indices = []
4✔
873
    valid_exp_indices = []
4✔
874
    for cid in exp_classes:  # split indices for each class separately.
4✔
875
        c_indices = exp_indices[exp_targets == cid]
4✔
876
        valid_n_instances = int(validation_size * len(c_indices))
4✔
877
        valid_exp_indices.extend(c_indices[:valid_n_instances])
4✔
878
        train_exp_indices.extend(c_indices[valid_n_instances:])
4✔
879

880
    result_train_dataset = exp_dataset.subset(train_exp_indices)
4✔
881
    result_valid_dataset = exp_dataset.subset(valid_exp_indices)
4✔
882
    return result_train_dataset, result_valid_dataset
4✔
883

884

885
def _gen_split(
4✔
886
    split_generator: Iterable[
887
        Tuple[TCLDataset, TCLDataset]
888
    ]
889
) -> Tuple[
890
    Generator[TCLDataset, None, None],
891
    Generator[TCLDataset, None, None],
892
]:
893
    """
894
    Internal utility function to split the train-validation generator
895
    into two distinct generators (one for the train stream and another one
896
    for the valid stream).
897

898
    :param split_generator: The lazy stream generator returning tuples of train
899
        and valid datasets.
900
    :return: Two generators (one for the train, one for the valuid).
901
    """
902

903
    # For more info: https://stackoverflow.com/a/28030261
904
    gen_a, gen_b = tee(split_generator, 2)
4✔
905
    return (a for a, b in gen_a), (b for a, b in gen_b)
4✔
906

907

908
def _lazy_train_val_split(
4✔
909
    split_strategy: Callable[
910
        [DatasetExperience[TCLDataset]],
911
        Tuple[TCLDataset, TCLDataset],
912
    ],
913
    experiences: Iterable[DatasetExperience[TCLDataset]],
914
) -> Generator[
915
    Tuple[TCLDataset, TCLDataset], None, None
916
]:
917
    """
918
    Creates a generator operating around the split strategy and the
919
    experiences stream.
920

921
    :param split_strategy: The strategy used to split each experience in train
922
        and validation datasets.
923
    :return: A generator returning a 2 elements tuple (the train and validation
924
        datasets).
925
    """
926

927
    for new_experience in experiences:
4✔
928
        yield split_strategy(new_experience)
4✔
929

930

931
def benchmark_with_validation_stream(
4✔
932
    benchmark_instance: DatasetScenario[TDatasetStream,
933
                                        TDatasetExperience,
934
                                        TCLDataset],
935
    validation_size: Union[int, float] = 0.5,
936
    shuffle: bool = False,
937
    input_stream: str = "train",
938
    output_stream: str = "valid",
939
    custom_split_strategy: Optional[Callable[
940
        [DatasetExperience[TCLDataset]],
941
        Tuple[TCLDataset, TCLDataset],
942
    ]] = None,
943
    *,
944
    benchmark_factory: Optional[Callable[
945
        [
946
            DatasetScenario[TDatasetStream,
947
                            TDatasetExperience,
948
                            TCLDataset],
949
            TStreamsUserDict,
950
            bool
951
        ], DatasetScenario[
952
                DatasetStream[DatasetExperience[TCLDataset]],
953
                DatasetExperience[TCLDataset],
954
                TCLDataset]]
955
    ] = _smart_benchmark_factory,
956
    experience_factory: Optional[Callable[
957
        [DatasetStream[DatasetExperience[TCLDataset]], int],
958
        DatasetExperience[TCLDataset]
959
    ]] = _make_plain_experience,
960
    lazy_splitting: Optional[bool] = None
961
) -> DatasetScenario[
962
        DatasetStream[DatasetExperience[TCLDataset]],
963
        DatasetExperience[TCLDataset],
964
        TCLDataset]:
965
    """
966
    Helper that can be used to obtain a benchmark with a validation stream.
967

968
    This generator accepts an existing benchmark instance and returns a version
969
    of it in which a validation stream has been added.
970

971
    In its base form this generator will split train experiences to extract
972
    validation experiences of a fixed (by number of instances or relative
973
    size), configurable, size. The split can be also performed on other
974
    streams if needed and the name of the resulting validation stream can
975
    be configured too.
976

977
    Each validation experience will be extracted directly from a single training
978
    experience. Patterns selected for the validation experience will be removed
979
    from the training one.
980

981
    If shuffle is True, the validation stream will be created randomly.
982
    Beware that no kind of class balancing is done.
983

984
    The `custom_split_strategy` parameter can be used if a more specific
985
    splitting is required.
986

987
    Please note that the resulting experiences will have a task labels field
988
    equal to the one of the originating experience.
989

990
    Experience splitting can be executed in a lazy way. This behavior can be
991
    controlled using the `lazy_splitting` parameter. By default, experiences
992
    are split in a lazy way only when the input stream is lazily generated.
993

994
    The default splitting strategy is a random split. A class-balanced split
995
    is also available using `class_balanced_split_strategy`::
996

997
        validation_size = 0.2
998
        foo = lambda exp: class_balanced_split_strategy(validation_size, exp)
999
        bm = benchmark_with_validation_stream(bm, custom_split_strategy=foo)
1000

1001
    :param benchmark_instance: The benchmark to split.
1002
    :param validation_size: The size of the validation experience, as an int
1003
        or a float between 0 and 1. Ignored if `custom_split_strategy` is used.
1004
    :param shuffle: If True, patterns will be allocated to the validation
1005
        stream randomly. This will use the default PyTorch random number
1006
        generator at its current state. Defaults to False. Ignored if
1007
        `custom_split_strategy` is used. If False, the first instances will be
1008
        allocated to the training  dataset by leaving the last ones to the
1009
        validation dataset.
1010
    :param input_stream: The name of the input stream. Defaults to 'train'.
1011
    :param output_stream: The name of the output stream. Defaults to 'valid'.
1012
    :param custom_split_strategy: A function that implements a custom splitting
1013
        strategy. The function must accept an experience and return a tuple
1014
        containing the new train and validation dataset. Defaults to None,
1015
        which means that the standard splitting strategy will be used (which
1016
        creates experiences according to `validation_size` and `shuffle`).
1017
        A good starting to understand the mechanism is to look at the
1018
        implementation of the standard splitting function
1019
        :func:`random_validation_split_strategy`.
1020
    :param benchmark_factory: The scenario factory. Defaults to 
1021
        `_smart_experience_factory`, which will try to create a benchmark of the
1022
        same class of the originating one. Can be None, in which case a generic
1023
        :class:`DatasetScenario` will be used coupled with the factory defined
1024
        by the `experience_factory` parameter.
1025
    :param experience_factory: The experience factory. Ignored if
1026
        `scenario_factory` is not None. Otherwise, defaults to
1027
        :class:`DatasetExperience`.
1028
    :param lazy_splitting: If True, the stream will be split in a lazy way.
1029
        If False, the stream will be split immediately. Defaults to None, which
1030
        means that the stream will be split in a lazy or non-lazy way depending
1031
        on the laziness of the `input_stream`.
1032
    :return: A benchmark instance in which the validation stream has been added.
1033
    """
1034

1035
    split_strategy: Callable[
1036
        [DatasetExperience[TCLDataset]],
1037
        Tuple[TCLDataset, TCLDataset],
1038
    ]
1039
    if custom_split_strategy is None:
4✔
1040
        # functools.partial is a more compact option
1041
        # However, MyPy does not understand what a partial is -_-
1042
        def random_validation_split_strategy_wrapper(exp):
4✔
1043
            return random_validation_split_strategy(
4✔
1044
                validation_size,
1045
                shuffle,
1046
                exp
1047
            )
1048

1049
        split_strategy = random_validation_split_strategy_wrapper
4✔
1050
    else:
1051
        split_strategy = custom_split_strategy
4✔
1052

1053
    original_stream_definitions: Dict[str, StreamDef[TCLDataset]] = \
4✔
1054
        benchmark_instance.stream_definitions
1055
    streams = benchmark_instance.streams
4✔
1056

1057
    if input_stream not in streams:
4✔
1058
        raise ValueError(
×
1059
            f"Stream {input_stream} could not be found in the "
1060
            f"benchmark instance"
1061
        )
1062

1063
    if output_stream in streams:
4✔
1064
        raise ValueError(
×
1065
            f"Stream {output_stream} already exists in the "
1066
            f"benchmark instance"
1067
        )
1068

1069
    stream: TDatasetStream = streams[input_stream]
4✔
1070

1071
    if lazy_splitting is None:
4✔
1072
        split_lazily = original_stream_definitions[input_stream].is_lazy
4✔
1073
    else:
1074
        split_lazily = lazy_splitting
4✔
1075

1076
    exps_tasks_labels = list(
4✔
1077
        original_stream_definitions[input_stream].exps_task_labels
1078
    )
1079

1080
    train_exps_source: Union[Iterable[TCLDataset], 
1081
                             Tuple[Iterable[TCLDataset], int]]
1082
    valid_exps_source: Union[Iterable[TCLDataset], 
1083
                             Tuple[Iterable[TCLDataset], int]]
1084
    if not split_lazily:
4✔
1085
        # Classic static splitting
1086
        train_exps_source = []
4✔
1087
        valid_exps_source = []
4✔
1088

1089
        exp: DatasetExperience[TCLDataset]
1090
        for exp in stream:
4✔
1091
            train_exp, valid_exp = split_strategy(exp)
4✔
1092
            train_exps_source.append(train_exp)
4✔
1093
            valid_exps_source.append(valid_exp)
4✔
1094
    else:
1095
        # Lazy splitting (based on a generator)
1096
        split_generator = _lazy_train_val_split(split_strategy, stream)
4✔
1097
        train_exps_gen, valid_exps_gen = _gen_split(split_generator)
4✔
1098
        train_exps_source = (train_exps_gen, len(stream))
4✔
1099
        valid_exps_source = (valid_exps_gen, len(stream))
4✔
1100
    
1101
    stream_definitions: Dict[str, Union[StreamUserDef[TCLDataset], 
4✔
1102
                                        StreamDef[TCLDataset]]] = \
1103
        dict(original_stream_definitions)
1104

1105
    train_stream_def: StreamUserDef[TCLDataset] = StreamUserDef(
4✔
1106
        train_exps_source,
1107
        exps_tasks_labels,
1108
        stream_definitions[input_stream].origin_dataset,
1109
        split_lazily,
1110
    )
1111

1112
    valid_stream_def: StreamUserDef[TCLDataset] = StreamUserDef(
4✔
1113
        valid_exps_source,
1114
        exps_tasks_labels,
1115
        stream_definitions[input_stream].origin_dataset,
1116
        split_lazily,
1117
    )
1118

1119
    stream_definitions[input_stream] = train_stream_def
4✔
1120
    stream_definitions[output_stream] = valid_stream_def
4✔
1121

1122
    complete_test_set_only = benchmark_instance.complete_test_set_only
4✔
1123

1124
    if benchmark_factory is not None:
4✔
1125
        # Try to create a benchmark of the same class of the
1126
        # initial benchmark.
1127
        return benchmark_factory(
4✔
1128
            benchmark_instance,
1129
            stream_definitions,
1130
            complete_test_set_only
1131
        )
1132

1133
    # Generic benchmark class
1134
    if experience_factory is None:
×
1135
        experience_factory = _make_plain_experience
×
1136

1137
    return DatasetScenario(
×
1138
        stream_definitions=stream_definitions,
1139
        complete_test_set_only=complete_test_set_only,
1140
        stream_factory=FactoryBasedStream,
1141
        experience_factory=experience_factory,
1142
    )
1143

1144

1145
__all__ = [
4✔
1146
    "nc_benchmark",
1147
    "ni_benchmark",
1148
    "dataset_benchmark",
1149
    "filelist_benchmark",
1150
    "paths_benchmark",
1151
    "tensors_benchmark",
1152
    "lazy_benchmark",
1153
    "dataset_classification_benchmark",
1154
    "dataset_detection_benchmark",
1155
    "filelist_classification_benchmark",
1156
    "paths_classification_benchmark",
1157
    "tensors_classification_benchmark",
1158
    "lazy_classification_benchmark",
1159
    "data_incremental_benchmark",
1160
    "benchmark_with_validation_stream",
1161
    "random_validation_split_strategy",
1162
    "class_balanced_split_strategy",
1163
]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc