5268393053

################################################################################
# Copyright (c) 2020 ContinualAI                                               #
# Copyrights licensed under the MIT License.                                   #
# See the accompanying LICENSE file for terms.                                 #
#                                                                              #
# Date: 19-02-2021                                                             #
# Author(s): Tyler L. Hayes                                                    #
# E-mail: contact@continualai.org                                              #
# Website: www.continualai.org                                                 #
################################################################################
from pathlib import Path
from typing import List, Optional, Union

from typing_extensions import Literal


from avalanche.benchmarks.datasets import Stream51
from avalanche.benchmarks.scenarios.classification_benchmark_creation import (
    create_classification_benchmark_from_paths,
)
from avalanche.benchmarks.scenarios.classification_scenario import (
    CommonClassificationScenarioType,
)
from avalanche.benchmarks.scenarios.generic_benchmark_creation import (
    FileAndLabel,
)
from torchvision import transforms
import math
import os


_mu = [0.485, 0.456, 0.406]
_std = [0.229, 0.224, 0.225]
_default_stream51_transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=_mu, std=_std),
    ]
)


def _adjust_bbox(img_shapes, bbox, ratio=1.1) -> List[int]:
    """
    Adapts bounding box coordinates so that they can be used by
    torchvision.transforms.functional.crop function.

    This also pads each bounding box according to the `ratio` parameter.

    :param img_shapes: a list of shapes, with each element in the format
        "[img.shape[0], img.shape[1]]".
    :param bbox: A list of elements in the format "[right, left, top, bottom]".
    :param ratio: The amount of padding. Defaults to "1.1".

    :returns: A list of adapted bounding box coordinates.
    """
    cw = bbox[0] - bbox[1]
    ch = bbox[2] - bbox[3]
    center = [int(bbox[1] + cw / 2), int(bbox[3] + ch / 2)]
    bbox = [
        min([int(center[0] + (cw * ratio / 2)), img_shapes[0]]),
        max([int(center[0] - (cw * ratio / 2)), 0]),
        min([int(center[1] + (ch * ratio / 2)), img_shapes[1]]),
        max([int(center[1] - (ch * ratio / 2)), 0]),
    ]
    return [bbox[3], bbox[1], bbox[2] - bbox[3], bbox[0] - bbox[1]]


def CLStream51(
    *,
    scenario: Literal[
        "iid", "class_iid", "instance", "class_instance"
    ] = "class_instance",
    seed=10,
    eval_num=None,
    bbox_crop=True,
    ratio: float = 1.10,
    download=True,
    train_transform=_default_stream51_transform,
    eval_transform=_default_stream51_transform,
    dataset_root: Optional[Union[str, Path]] = None
) -> CommonClassificationScenarioType:
    """
    Creates a CL benchmark for Stream-51.

    If the dataset is not present in the computer, this method will
    automatically download and store it.

    This generator can be used to obtain the 'iid', 'class_iid', 'instance', and
    'class_instance' scenarios.

    The benchmark instance returned by this method will have two fields,
    `train_stream` and `test_stream`, which can be iterated to obtain
    training and test :class:`Experience`. Avalanche will support the
    "out of distribution" stream in the near future!

    Each Experience contains the `dataset` and the associated task label, which
    is always 0 for Stream51.

    The benchmark API is quite simple and is uniform across all benchmark
    generators. It is recommended to check the tutorial of the "benchmark" API,
    which contains usage examples ranging from "basic" to "advanced".

    :param scenario: A string defining which Stream-51 scenario to return.
        Can be chosen between 'iid', 'class_iid', 'instance', and
        'class_instance'. Defaults to 'class_instance'.
    :param bbox_crop: If True, crops the images by using the bounding boxes
        defined by Stream51. This is needed to ensure that images depict only
        the required object (for classification purposes). Defaults to True.
    :param ratio: A floating point value (>= 1.0) that controls the amount of
        padding for bounding boxes crop (default: 1.10).
    :param seed: Random seed for shuffling classes or instances. Defaults to 10.
    :param eval_num: How many samples to see before evaluating the network for
        instance ordering and how many classes to see before evaluating the
        network for the class_instance ordering. Defaults to None, which means
        that "30000" will be used for the 'instance' scenario and "10" for the
        'class_instance' scenario.
    :param download: If True, the dataset will automatically downloaded.
        Defaults to True.
    :param train_transform: The transformation to apply to the training data,
        e.g. a random crop, a normalization or a concatenation of different
        transformations (see torchvision.transform documentation for a
        comprehensive list of possible transformations).
        If no transformation is passed, the default train transformation
        will be used.
    :param eval_transform: The transformation to apply to the test data,
        e.g. a random crop, a normalization or a concatenation of different
        transformations (see torchvision.transform documentation for a
        comprehensive list of possible transformations).
        If no transformation is passed, the default eval transformation
        will be used.
    :param dataset_root: The root path of the dataset.
        Defaults to None, which means that the default location for
        'stream51' will be used.

    :returns: A properly initialized :class:`ClassificationScenario` instance.
    """

    # get train and test sets and order them by benchmark
    train_set = Stream51(root=dataset_root, train=True, download=download)
    test_set = Stream51(root=dataset_root, train=False, download=download)
    samples = Stream51.make_dataset(
        train_set.samples, ordering=scenario, seed=seed
    )
    dataset_root = train_set.root

    # set appropriate train parameters
    train_set.samples = samples
    train_set.targets = [s[0] for s in samples]

    # compute number of tasks
    if eval_num is None and scenario == "instance":
        eval_num = 30000
        num_tasks = math.ceil(
            len(train_set) / eval_num
        )  # evaluate every 30000 samples
    elif eval_num is None and scenario == "class_instance":
        eval_num = 10
        num_tasks = math.ceil(51 / eval_num)  # evaluate every 10 classes
    elif scenario == "instance":
        num_tasks = math.ceil(
            len(train_set) / eval_num
        )  # evaluate every eval_num samples
    else:
        num_tasks = math.ceil(51 / eval_num)  # evaluate every eval_num classes

    test_filelists_paths: List[List[FileAndLabel]] = []
    train_filelists_paths: List[List[FileAndLabel]] = []
    test_ood_filelists_paths: Optional[
        List[List[FileAndLabel]]
    ] = []
    if scenario == "instance":
        # break files into task lists based on eval_num samples
        train_filelists_paths = []
        start = 0
        for i in range(num_tasks):
            end = min(start + eval_num, len(train_set))

            train_filelists_paths.append(
                [
                    (
                        os.path.join(dataset_root, train_set.samples[j][-1]),
                        train_set.samples[j][0],
                        _adjust_bbox(
                            train_set.samples[j][-3],
                            train_set.samples[j][-2],
                            ratio,
                        ),
                    )
                    for j in range(start, end)
                ]
            )
            start = end

        # use all test data for instance ordering
        test_filelists_paths = [[
            (
                os.path.join(dataset_root, test_set.samples[j][-1]),
                test_set.samples[j][0],
                _adjust_bbox(
                    test_set.samples[j][-3], test_set.samples[j][-2], ratio
                ),
            )
            for j in range(len(test_set))
        ]]
        test_ood_filelists_paths = None  # no ood testing for instance ordering
    elif scenario == "class_instance":
        # break files into task lists based on classes
        test_ood_filelists_paths = []
        class_change = [
            i
            for i in range(1, len(train_set.targets))
            if train_set.targets[i] != train_set.targets[i - 1]
        ]
        unique_so_far = []
        start = 0
        for i in range(num_tasks):
            if i == num_tasks - 1:
                end = len(train_set)
            else:
                end = class_change[
                    min(eval_num + eval_num * i - 1, len(class_change) - 1)
                ]
            unique_labels = [train_set.targets[k] for k in range(start, end)]
            unique_labels = list(set(unique_labels))
            unique_so_far += unique_labels
            test_files = []
            test_ood_files = []
            for ix, test_label in enumerate(test_set.targets):
                if test_label in unique_so_far:
                    test_files.append(ix)
                else:
                    test_ood_files.append(ix)
            test_filelists_paths.append(
                [
                    (
                        os.path.join(dataset_root, test_set.samples[j][-1]),
                        test_set.samples[j][0],
                        _adjust_bbox(
                            test_set.samples[j][-3],
                            test_set.samples[j][-2],
                            ratio,
                        ),
                    )
                    for j in test_files
                ]
            )
            test_ood_filelists_paths.append(
                [
                    (
                        os.path.join(dataset_root, test_set.samples[j][-1]),
                        test_set.samples[j][0],
                        _adjust_bbox(
                            test_set.samples[j][-3],
                            test_set.samples[j][-2],
                            ratio,
                        ),
                    )
                    for j in test_ood_files
                ]
            )
            train_filelists_paths.append(
                [
                    (
                        os.path.join(dataset_root, train_set.samples[j][-1]),
                        train_set.samples[j][0],
                        _adjust_bbox(
                            train_set.samples[j][-3],
                            train_set.samples[j][-2],
                            ratio,
                        ),
                    )
                    for j in range(start, end)
                ]
            )
            start = end
    else:
        raise NotImplementedError

    if not bbox_crop:
        # remove bbox coordinates from lists
        train_filelists_paths = [
            [(j[0], j[1]) for j in i] for i in train_filelists_paths
        ]
        test_filelists_paths = [
            [(j[0], j[1]) for j in i] for i in test_filelists_paths
        ]
        if scenario == "class_instance":
            assert test_ood_filelists_paths is not None
            test_ood_filelists_paths = [
                [(j[0], j[1]) for j in i] for i in test_ood_filelists_paths
            ]

    benchmark_obj: CommonClassificationScenarioType = \
        create_classification_benchmark_from_paths(
            train_lists_of_files=train_filelists_paths,
            test_lists_of_files=test_filelists_paths,
            task_labels=[0 for _ in range(num_tasks)],
            complete_test_set_only=scenario == "instance",
            train_transform=train_transform,
            eval_transform=eval_transform,
        )

    return benchmark_obj


__all__ = ["CLStream51"]

if __name__ == "__main__":
    from torch.utils.data.dataloader import DataLoader
    from torchvision import transforms
    import matplotlib.pyplot as plt

    benchmark = CLStream51(scenario="class_instance", seed=10, bbox_crop=True)

    train_imgs_count = 0
    for i, batch in enumerate(benchmark.train_stream):
        print(i, batch)
        dataset, _ = batch.dataset, batch.task_label
        train_imgs_count += len(dataset)
        dl = DataLoader(dataset, batch_size=1)

        for j, mb in enumerate(dl):
            if j == 2:
                break
            x, y, *_ = mb

            # show a few un-normalized images from data stream
            # this code is for debugging purposes
            x_np = x[0, :, :, :].numpy().transpose(1, 2, 0)
            x_np = x_np * _std + _mu
            plt.imshow(x_np)
            plt.show()

            print(x.shape)
            print(y.shape)

    # make sure all of the training data was loaded properly
    assert train_imgs_count == 150736

1	################################################################################
2	# Copyright (c) 2020 ContinualAI #
3	# Copyrights licensed under the MIT License. #
4	# See the accompanying LICENSE file for terms. #
5	# #
6	# Date: 19-02-2021 #
7	# Author(s): Tyler L. Hayes #
8	# E-mail: contact@continualai.org #
9	# Website: www.continualai.org #
10	################################################################################
11	from pathlib import Path	4✔
12	from typing import List, Optional, Union	4✔
13
14	from typing_extensions import Literal	4✔
15
16
17	from avalanche.benchmarks.datasets import Stream51	4✔
18	from avalanche.benchmarks.scenarios.classification_benchmark_creation import (	4✔
19	create_classification_benchmark_from_paths,
20	)
21	from avalanche.benchmarks.scenarios.classification_scenario import (	4✔
22	CommonClassificationScenarioType,
23	)
24	from avalanche.benchmarks.scenarios.generic_benchmark_creation import (	4✔
25	FileAndLabel,
26	)
27	from torchvision import transforms	4✔
28	import math	4✔
29	import os	4✔
30
31
32	_mu = [0.485, 0.456, 0.406]	4✔
33	_std = [0.229, 0.224, 0.225]	4✔
34	_default_stream51_transform = transforms.Compose(	4✔
35	[
36	transforms.Resize((224, 224)),
37	transforms.ToTensor(),
38	transforms.Normalize(mean=_mu, std=_std),
39	]
40	)
41
42
43	def _adjust_bbox(img_shapes, bbox, ratio=1.1) -> List[int]:	4✔
44	"""
45	Adapts bounding box coordinates so that they can be used by
46	torchvision.transforms.functional.crop function.
47
48	This also pads each bounding box according to the `ratio` parameter.
49
50	:param img_shapes: a list of shapes, with each element in the format
51	"[img.shape[0], img.shape[1]]".
52	:param bbox: A list of elements in the format "[right, left, top, bottom]".
53	:param ratio: The amount of padding. Defaults to "1.1".
54
55	:returns: A list of adapted bounding box coordinates.
56	"""
57	cw = bbox[0] - bbox[1]	×
58	ch = bbox[2] - bbox[3]	×
59	center = [int(bbox[1] + cw / 2), int(bbox[3] + ch / 2)]	×
60	bbox = [	×
61	min([int(center[0] + (cw * ratio / 2)), img_shapes[0]]),
62	max([int(center[0] - (cw * ratio / 2)), 0]),
63	min([int(center[1] + (ch * ratio / 2)), img_shapes[1]]),
64	max([int(center[1] - (ch * ratio / 2)), 0]),
65	]
66	return [bbox[3], bbox[1], bbox[2] - bbox[3], bbox[0] - bbox[1]]	×
67
68
69	def CLStream51(	4✔
70	*,
71	scenario: Literal[
72	"iid", "class_iid", "instance", "class_instance"
73	] = "class_instance",
74	seed=10,
75	eval_num=None,
76	bbox_crop=True,
77	ratio: float = 1.10,
78	download=True,
79	train_transform=_default_stream51_transform,
80	eval_transform=_default_stream51_transform,
81	dataset_root: Optional[Union[str, Path]] = None
82	) -> CommonClassificationScenarioType:
83	"""
84	Creates a CL benchmark for Stream-51.
85
86	If the dataset is not present in the computer, this method will
87	automatically download and store it.
88
89	This generator can be used to obtain the 'iid', 'class_iid', 'instance', and
90	'class_instance' scenarios.
91
92	The benchmark instance returned by this method will have two fields,
93	`train_stream` and `test_stream`, which can be iterated to obtain
94	training and test :class:`Experience`. Avalanche will support the
95	"out of distribution" stream in the near future!
96
97	Each Experience contains the `dataset` and the associated task label, which
98	is always 0 for Stream51.
99
100	The benchmark API is quite simple and is uniform across all benchmark
101	generators. It is recommended to check the tutorial of the "benchmark" API,
102	which contains usage examples ranging from "basic" to "advanced".
103
104	:param scenario: A string defining which Stream-51 scenario to return.
105	Can be chosen between 'iid', 'class_iid', 'instance', and
106	'class_instance'. Defaults to 'class_instance'.
107	:param bbox_crop: If True, crops the images by using the bounding boxes
108	defined by Stream51. This is needed to ensure that images depict only
109	the required object (for classification purposes). Defaults to True.
110	:param ratio: A floating point value (>= 1.0) that controls the amount of
111	padding for bounding boxes crop (default: 1.10).
112	:param seed: Random seed for shuffling classes or instances. Defaults to 10.
113	:param eval_num: How many samples to see before evaluating the network for
114	instance ordering and how many classes to see before evaluating the
115	network for the class_instance ordering. Defaults to None, which means
116	that "30000" will be used for the 'instance' scenario and "10" for the
117	'class_instance' scenario.
118	:param download: If True, the dataset will automatically downloaded.
119	Defaults to True.
120	:param train_transform: The transformation to apply to the training data,
121	e.g. a random crop, a normalization or a concatenation of different
122	transformations (see torchvision.transform documentation for a
123	comprehensive list of possible transformations).
124	If no transformation is passed, the default train transformation
125	will be used.
126	:param eval_transform: The transformation to apply to the test data,
127	e.g. a random crop, a normalization or a concatenation of different
128	transformations (see torchvision.transform documentation for a
129	comprehensive list of possible transformations).
130	If no transformation is passed, the default eval transformation
131	will be used.
132	:param dataset_root: The root path of the dataset.
133	Defaults to None, which means that the default location for
134	'stream51' will be used.
135
136	:returns: A properly initialized :class:`ClassificationScenario` instance.
137	"""
138
139	# get train and test sets and order them by benchmark
140	train_set = Stream51(root=dataset_root, train=True, download=download)	×
141	test_set = Stream51(root=dataset_root, train=False, download=download)	×
142	samples = Stream51.make_dataset(	×
143	train_set.samples, ordering=scenario, seed=seed
144	)
145	dataset_root = train_set.root	×
146
147	# set appropriate train parameters
148	train_set.samples = samples	×
149	train_set.targets = [s[0] for s in samples]	×
150
151	# compute number of tasks
152	if eval_num is None and scenario == "instance":	×
153	eval_num = 30000	×
154	num_tasks = math.ceil(	×
155	len(train_set) / eval_num
156	) # evaluate every 30000 samples
157	elif eval_num is None and scenario == "class_instance":	×
158	eval_num = 10	×
159	num_tasks = math.ceil(51 / eval_num) # evaluate every 10 classes	×
160	elif scenario == "instance":	×
161	num_tasks = math.ceil(	×
162	len(train_set) / eval_num
163	) # evaluate every eval_num samples
164	else:
165	num_tasks = math.ceil(51 / eval_num) # evaluate every eval_num classes	×
166
167	test_filelists_paths: List[List[FileAndLabel]] = []	×
168	train_filelists_paths: List[List[FileAndLabel]] = []	×
169	test_ood_filelists_paths: Optional[	×
170	List[List[FileAndLabel]]
171	] = []
172	if scenario == "instance":	×
173	# break files into task lists based on eval_num samples
174	train_filelists_paths = []	×
175	start = 0	×
176	for i in range(num_tasks):	×
177	end = min(start + eval_num, len(train_set))	×
178
179	train_filelists_paths.append(	×
180	[
181	(
182	os.path.join(dataset_root, train_set.samples[j][-1]),
183	train_set.samples[j][0],
184	_adjust_bbox(
185	train_set.samples[j][-3],
186	train_set.samples[j][-2],
187	ratio,
188	),
189	)
190	for j in range(start, end)
191	]
192	)
193	start = end	×
194
195	# use all test data for instance ordering
196	test_filelists_paths = [[	×
197	(
198	os.path.join(dataset_root, test_set.samples[j][-1]),
199	test_set.samples[j][0],
200	_adjust_bbox(
201	test_set.samples[j][-3], test_set.samples[j][-2], ratio
202	),
203	)
204	for j in range(len(test_set))
205	]]
206	test_ood_filelists_paths = None # no ood testing for instance ordering	×
207	elif scenario == "class_instance":	×
208	# break files into task lists based on classes
209	test_ood_filelists_paths = []	×
210	class_change = [	×
211	i
212	for i in range(1, len(train_set.targets))
213	if train_set.targets[i] != train_set.targets[i - 1]
214	]
215	unique_so_far = []	×
216	start = 0	×
217	for i in range(num_tasks):	×
218	if i == num_tasks - 1:	×
219	end = len(train_set)	×
220	else:
221	end = class_change[	×
222	min(eval_num + eval_num * i - 1, len(class_change) - 1)
223	]
224	unique_labels = [train_set.targets[k] for k in range(start, end)]	×
225	unique_labels = list(set(unique_labels))	×
226	unique_so_far += unique_labels	×
227	test_files = []	×
228	test_ood_files = []	×
229	for ix, test_label in enumerate(test_set.targets):	×
230	if test_label in unique_so_far:	×
231	test_files.append(ix)	×
232	else:
233	test_ood_files.append(ix)	×
234	test_filelists_paths.append(	×
235	[
236	(
237	os.path.join(dataset_root, test_set.samples[j][-1]),
238	test_set.samples[j][0],
239	_adjust_bbox(
240	test_set.samples[j][-3],
241	test_set.samples[j][-2],
242	ratio,
243	),
244	)
245	for j in test_files
246	]
247	)
248	test_ood_filelists_paths.append(	×
249	[
250	(
251	os.path.join(dataset_root, test_set.samples[j][-1]),
252	test_set.samples[j][0],
253	_adjust_bbox(
254	test_set.samples[j][-3],
255	test_set.samples[j][-2],
256	ratio,
257	),
258	)
259	for j in test_ood_files
260	]
261	)
262	train_filelists_paths.append(	×
263	[
264	(
265	os.path.join(dataset_root, train_set.samples[j][-1]),
266	train_set.samples[j][0],
267	_adjust_bbox(
268	train_set.samples[j][-3],
269	train_set.samples[j][-2],
270	ratio,
271	),
272	)
273	for j in range(start, end)
274	]
275	)
276	start = end	×
277	else:
278	raise NotImplementedError	×
279
280	if not bbox_crop:	×
281	# remove bbox coordinates from lists
282	train_filelists_paths = [	×
283	[(j[0], j[1]) for j in i] for i in train_filelists_paths
284	]
285	test_filelists_paths = [	×
286	[(j[0], j[1]) for j in i] for i in test_filelists_paths
287	]
288	if scenario == "class_instance":	×
289	assert test_ood_filelists_paths is not None	×
290	test_ood_filelists_paths = [	×
291	[(j[0], j[1]) for j in i] for i in test_ood_filelists_paths
292	]
293
NEW 294	benchmark_obj: CommonClassificationScenarioType = \	×
295	create_classification_benchmark_from_paths(
296	train_lists_of_files=train_filelists_paths,
297	test_lists_of_files=test_filelists_paths,
298	task_labels=[0 for _ in range(num_tasks)],
299	complete_test_set_only=scenario == "instance",
300	train_transform=train_transform,
301	eval_transform=eval_transform,
302	)
303
304	return benchmark_obj	×
305
306
307	__all__ = ["CLStream51"]	4✔
308
309	if __name__ == "__main__":	4✔
310	from torch.utils.data.dataloader import DataLoader	×
311	from torchvision import transforms	×
312	import matplotlib.pyplot as plt	×
313
314	benchmark = CLStream51(scenario="class_instance", seed=10, bbox_crop=True)	×
315
316	train_imgs_count = 0	×
317	for i, batch in enumerate(benchmark.train_stream):	×
318	print(i, batch)	×
319	dataset, _ = batch.dataset, batch.task_label	×
320	train_imgs_count += len(dataset)	×
321	dl = DataLoader(dataset, batch_size=1)	×
322
323	for j, mb in enumerate(dl):	×
324	if j == 2:	×
325	break	×
326	x, y, *_ = mb	×
327
328	# show a few un-normalized images from data stream
329	# this code is for debugging purposes
330	x_np = x[0, :, :, :].numpy().transpose(1, 2, 0)	×
331	x_np = x_np * _std + _mu	×
332	plt.imshow(x_np)	×
333	plt.show()	×
334
335	print(x.shape)	×
336	print(y.shape)	×
337
338	# make sure all of the training data was loaded properly
339	assert train_imgs_count == 150736	×

ContinualAI / avalanche / 5268393053

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous