• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ContinualAI / avalanche / 5268393053

pending completion
5268393053

Pull #1397

github

web-flow
Merge 60d244754 into e91562200
Pull Request #1397: Specialize benchmark creation helpers

417 of 538 new or added lines in 30 files covered. (77.51%)

43 existing lines in 5 files now uncovered.

16586 of 22630 relevant lines covered (73.29%)

2.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

16.83
/avalanche/benchmarks/classic/stream51.py
1
################################################################################
2
# Copyright (c) 2020 ContinualAI                                               #
3
# Copyrights licensed under the MIT License.                                   #
4
# See the accompanying LICENSE file for terms.                                 #
5
#                                                                              #
6
# Date: 19-02-2021                                                             #
7
# Author(s): Tyler L. Hayes                                                    #
8
# E-mail: contact@continualai.org                                              #
9
# Website: www.continualai.org                                                 #
10
################################################################################
11
from pathlib import Path
4✔
12
from typing import List, Optional, Union
4✔
13

14
from typing_extensions import Literal
4✔
15

16

17
from avalanche.benchmarks.datasets import Stream51
4✔
18
from avalanche.benchmarks.scenarios.classification_benchmark_creation import (
4✔
19
    create_classification_benchmark_from_paths,
20
)
21
from avalanche.benchmarks.scenarios.classification_scenario import (
4✔
22
    CommonClassificationScenarioType,
23
)
24
from avalanche.benchmarks.scenarios.generic_benchmark_creation import (
4✔
25
    FileAndLabel,
26
)
27
from torchvision import transforms
4✔
28
import math
4✔
29
import os
4✔
30

31

32
_mu = [0.485, 0.456, 0.406]
4✔
33
_std = [0.229, 0.224, 0.225]
4✔
34
_default_stream51_transform = transforms.Compose(
4✔
35
    [
36
        transforms.Resize((224, 224)),
37
        transforms.ToTensor(),
38
        transforms.Normalize(mean=_mu, std=_std),
39
    ]
40
)
41

42

43
def _adjust_bbox(img_shapes, bbox, ratio=1.1) -> List[int]:
4✔
44
    """
45
    Adapts bounding box coordinates so that they can be used by
46
    torchvision.transforms.functional.crop function.
47

48
    This also pads each bounding box according to the `ratio` parameter.
49

50
    :param img_shapes: a list of shapes, with each element in the format
51
        "[img.shape[0], img.shape[1]]".
52
    :param bbox: A list of elements in the format "[right, left, top, bottom]".
53
    :param ratio: The amount of padding. Defaults to "1.1".
54

55
    :returns: A list of adapted bounding box coordinates.
56
    """
57
    cw = bbox[0] - bbox[1]
×
58
    ch = bbox[2] - bbox[3]
×
59
    center = [int(bbox[1] + cw / 2), int(bbox[3] + ch / 2)]
×
60
    bbox = [
×
61
        min([int(center[0] + (cw * ratio / 2)), img_shapes[0]]),
62
        max([int(center[0] - (cw * ratio / 2)), 0]),
63
        min([int(center[1] + (ch * ratio / 2)), img_shapes[1]]),
64
        max([int(center[1] - (ch * ratio / 2)), 0]),
65
    ]
66
    return [bbox[3], bbox[1], bbox[2] - bbox[3], bbox[0] - bbox[1]]
×
67

68

69
def CLStream51(
4✔
70
    *,
71
    scenario: Literal[
72
        "iid", "class_iid", "instance", "class_instance"
73
    ] = "class_instance",
74
    seed=10,
75
    eval_num=None,
76
    bbox_crop=True,
77
    ratio: float = 1.10,
78
    download=True,
79
    train_transform=_default_stream51_transform,
80
    eval_transform=_default_stream51_transform,
81
    dataset_root: Optional[Union[str, Path]] = None
82
) -> CommonClassificationScenarioType:
83
    """
84
    Creates a CL benchmark for Stream-51.
85

86
    If the dataset is not present in the computer, this method will
87
    automatically download and store it.
88

89
    This generator can be used to obtain the 'iid', 'class_iid', 'instance', and
90
    'class_instance' scenarios.
91

92
    The benchmark instance returned by this method will have two fields,
93
    `train_stream` and `test_stream`, which can be iterated to obtain
94
    training and test :class:`Experience`. Avalanche will support the
95
    "out of distribution" stream in the near future!
96

97
    Each Experience contains the `dataset` and the associated task label, which
98
    is always 0 for Stream51.
99

100
    The benchmark API is quite simple and is uniform across all benchmark
101
    generators. It is recommended to check the tutorial of the "benchmark" API,
102
    which contains usage examples ranging from "basic" to "advanced".
103

104
    :param scenario: A string defining which Stream-51 scenario to return.
105
        Can be chosen between 'iid', 'class_iid', 'instance', and
106
        'class_instance'. Defaults to 'class_instance'.
107
    :param bbox_crop: If True, crops the images by using the bounding boxes
108
        defined by Stream51. This is needed to ensure that images depict only
109
        the required object (for classification purposes). Defaults to True.
110
    :param ratio: A floating point value (>= 1.0) that controls the amount of
111
        padding for bounding boxes crop (default: 1.10).
112
    :param seed: Random seed for shuffling classes or instances. Defaults to 10.
113
    :param eval_num: How many samples to see before evaluating the network for
114
        instance ordering and how many classes to see before evaluating the
115
        network for the class_instance ordering. Defaults to None, which means
116
        that "30000" will be used for the 'instance' scenario and "10" for the
117
        'class_instance' scenario.
118
    :param download: If True, the dataset will automatically downloaded.
119
        Defaults to True.
120
    :param train_transform: The transformation to apply to the training data,
121
        e.g. a random crop, a normalization or a concatenation of different
122
        transformations (see torchvision.transform documentation for a
123
        comprehensive list of possible transformations).
124
        If no transformation is passed, the default train transformation
125
        will be used.
126
    :param eval_transform: The transformation to apply to the test data,
127
        e.g. a random crop, a normalization or a concatenation of different
128
        transformations (see torchvision.transform documentation for a
129
        comprehensive list of possible transformations).
130
        If no transformation is passed, the default eval transformation
131
        will be used.
132
    :param dataset_root: The root path of the dataset.
133
        Defaults to None, which means that the default location for
134
        'stream51' will be used.
135

136
    :returns: A properly initialized :class:`ClassificationScenario` instance.
137
    """
138

139
    # get train and test sets and order them by benchmark
140
    train_set = Stream51(root=dataset_root, train=True, download=download)
×
141
    test_set = Stream51(root=dataset_root, train=False, download=download)
×
142
    samples = Stream51.make_dataset(
×
143
        train_set.samples, ordering=scenario, seed=seed
144
    )
145
    dataset_root = train_set.root
×
146

147
    # set appropriate train parameters
148
    train_set.samples = samples
×
149
    train_set.targets = [s[0] for s in samples]
×
150

151
    # compute number of tasks
152
    if eval_num is None and scenario == "instance":
×
153
        eval_num = 30000
×
154
        num_tasks = math.ceil(
×
155
            len(train_set) / eval_num
156
        )  # evaluate every 30000 samples
157
    elif eval_num is None and scenario == "class_instance":
×
158
        eval_num = 10
×
159
        num_tasks = math.ceil(51 / eval_num)  # evaluate every 10 classes
×
160
    elif scenario == "instance":
×
161
        num_tasks = math.ceil(
×
162
            len(train_set) / eval_num
163
        )  # evaluate every eval_num samples
164
    else:
165
        num_tasks = math.ceil(51 / eval_num)  # evaluate every eval_num classes
×
166

167
    test_filelists_paths: List[List[FileAndLabel]] = []
×
168
    train_filelists_paths: List[List[FileAndLabel]] = []
×
169
    test_ood_filelists_paths: Optional[
×
170
        List[List[FileAndLabel]]
171
    ] = []
172
    if scenario == "instance":
×
173
        # break files into task lists based on eval_num samples
174
        train_filelists_paths = []
×
175
        start = 0
×
176
        for i in range(num_tasks):
×
177
            end = min(start + eval_num, len(train_set))
×
178

179
            train_filelists_paths.append(
×
180
                [
181
                    (
182
                        os.path.join(dataset_root, train_set.samples[j][-1]),
183
                        train_set.samples[j][0],
184
                        _adjust_bbox(
185
                            train_set.samples[j][-3],
186
                            train_set.samples[j][-2],
187
                            ratio,
188
                        ),
189
                    )
190
                    for j in range(start, end)
191
                ]
192
            )
193
            start = end
×
194

195
        # use all test data for instance ordering
196
        test_filelists_paths = [[
×
197
            (
198
                os.path.join(dataset_root, test_set.samples[j][-1]),
199
                test_set.samples[j][0],
200
                _adjust_bbox(
201
                    test_set.samples[j][-3], test_set.samples[j][-2], ratio
202
                ),
203
            )
204
            for j in range(len(test_set))
205
        ]]
206
        test_ood_filelists_paths = None  # no ood testing for instance ordering
×
207
    elif scenario == "class_instance":
×
208
        # break files into task lists based on classes
209
        test_ood_filelists_paths = []
×
210
        class_change = [
×
211
            i
212
            for i in range(1, len(train_set.targets))
213
            if train_set.targets[i] != train_set.targets[i - 1]
214
        ]
215
        unique_so_far = []
×
216
        start = 0
×
217
        for i in range(num_tasks):
×
218
            if i == num_tasks - 1:
×
219
                end = len(train_set)
×
220
            else:
221
                end = class_change[
×
222
                    min(eval_num + eval_num * i - 1, len(class_change) - 1)
223
                ]
224
            unique_labels = [train_set.targets[k] for k in range(start, end)]
×
225
            unique_labels = list(set(unique_labels))
×
226
            unique_so_far += unique_labels
×
227
            test_files = []
×
228
            test_ood_files = []
×
229
            for ix, test_label in enumerate(test_set.targets):
×
230
                if test_label in unique_so_far:
×
231
                    test_files.append(ix)
×
232
                else:
233
                    test_ood_files.append(ix)
×
234
            test_filelists_paths.append(
×
235
                [
236
                    (
237
                        os.path.join(dataset_root, test_set.samples[j][-1]),
238
                        test_set.samples[j][0],
239
                        _adjust_bbox(
240
                            test_set.samples[j][-3],
241
                            test_set.samples[j][-2],
242
                            ratio,
243
                        ),
244
                    )
245
                    for j in test_files
246
                ]
247
            )
248
            test_ood_filelists_paths.append(
×
249
                [
250
                    (
251
                        os.path.join(dataset_root, test_set.samples[j][-1]),
252
                        test_set.samples[j][0],
253
                        _adjust_bbox(
254
                            test_set.samples[j][-3],
255
                            test_set.samples[j][-2],
256
                            ratio,
257
                        ),
258
                    )
259
                    for j in test_ood_files
260
                ]
261
            )
262
            train_filelists_paths.append(
×
263
                [
264
                    (
265
                        os.path.join(dataset_root, train_set.samples[j][-1]),
266
                        train_set.samples[j][0],
267
                        _adjust_bbox(
268
                            train_set.samples[j][-3],
269
                            train_set.samples[j][-2],
270
                            ratio,
271
                        ),
272
                    )
273
                    for j in range(start, end)
274
                ]
275
            )
276
            start = end
×
277
    else:
278
        raise NotImplementedError
×
279

280
    if not bbox_crop:
×
281
        # remove bbox coordinates from lists
282
        train_filelists_paths = [
×
283
            [(j[0], j[1]) for j in i] for i in train_filelists_paths
284
        ]
285
        test_filelists_paths = [
×
286
            [(j[0], j[1]) for j in i] for i in test_filelists_paths
287
        ]
288
        if scenario == "class_instance":
×
289
            assert test_ood_filelists_paths is not None
×
290
            test_ood_filelists_paths = [
×
291
                [(j[0], j[1]) for j in i] for i in test_ood_filelists_paths
292
            ]
293

NEW
294
    benchmark_obj: CommonClassificationScenarioType = \
×
295
        create_classification_benchmark_from_paths(
296
            train_lists_of_files=train_filelists_paths,
297
            test_lists_of_files=test_filelists_paths,
298
            task_labels=[0 for _ in range(num_tasks)],
299
            complete_test_set_only=scenario == "instance",
300
            train_transform=train_transform,
301
            eval_transform=eval_transform,
302
        )
303

304
    return benchmark_obj
×
305

306

307
__all__ = ["CLStream51"]
4✔
308

309
if __name__ == "__main__":
4✔
310
    from torch.utils.data.dataloader import DataLoader
×
311
    from torchvision import transforms
×
312
    import matplotlib.pyplot as plt
×
313

314
    benchmark = CLStream51(scenario="class_instance", seed=10, bbox_crop=True)
×
315

316
    train_imgs_count = 0
×
317
    for i, batch in enumerate(benchmark.train_stream):
×
318
        print(i, batch)
×
319
        dataset, _ = batch.dataset, batch.task_label
×
320
        train_imgs_count += len(dataset)
×
321
        dl = DataLoader(dataset, batch_size=1)
×
322

323
        for j, mb in enumerate(dl):
×
324
            if j == 2:
×
325
                break
×
326
            x, y, *_ = mb
×
327

328
            # show a few un-normalized images from data stream
329
            # this code is for debugging purposes
330
            x_np = x[0, :, :, :].numpy().transpose(1, 2, 0)
×
331
            x_np = x_np * _std + _mu
×
332
            plt.imshow(x_np)
×
333
            plt.show()
×
334

335
            print(x.shape)
×
336
            print(y.shape)
×
337

338
    # make sure all of the training data was loaded properly
339
    assert train_imgs_count == 150736
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc