5268393053

################################################################################
# Copyright (c) 2021 ContinualAI.                                              #
# Copyrights licensed under the MIT License.                                   #
# See the accompanying LICENSE file for terms.                                 #
#                                                                              #
# Date: 05-17-2022                                                             #
# Author: Zhiqiu Lin, Jia Shi                                                  #
# E-mail: zl279@cornell.edu, jiashi@andrew.cmu.edu                             #
# Website: https://clear-benchmark.github.io                                   #
################################################################################

""" This module contains the high-level CLEAR benchmark/factor generator.
In the original CLEAR benchmark paper (https://arxiv.org/abs/2201.06289),
a novel Streaming evaluation protocol is proposed in contrast to traditional
IID evaluation protocol for CL. The major difference lies in that: 

IID Protocol: Sample a test set from current task, which requires splitting
    the data into 7:3 train:test set.
Streaming Protocol: Use the data of next task as the test set for current task,
    which is arguably more realistic since real-world model training and 
    deployment usually takes considerable amount of time. By the time the 
    model is applied, the task has already drifted.
    
We support both evaluation protocols for benchmark construction."""

from pathlib import Path
from typing import Sequence, Union, Any, Optional

from avalanche.benchmarks.datasets.clear import (
    _CLEARImage,
    _CLEARFeature,
    SEED_LIST,
    CLEAR_FEATURE_TYPES,
    _CLEAR_DATA_SPLITS,
)
from avalanche.benchmarks.scenarios.classification_benchmark_creation import (
    create_classification_benchmark_from_paths,
    create_classification_benchmark_from_tensor_lists,
)
from avalanche.benchmarks.scenarios.classification_scenario import (
    CommonClassificationScenarioType,
)

EVALUATION_PROTOCOLS = ["iid", "streaming"]


def CLEAR(
    *,
    data_name: str = "clear10",
    evaluation_protocol: str = "streaming",
    feature_type: Optional[str] = None,
    seed: Optional[int] = None,
    train_transform: Optional[Any] = None,
    eval_transform: Optional[Any] = None,
    dataset_root: Optional[Union[str, Path]] = None,
):
    """
    Creates a Domain-Incremental benchmark for CLEAR 10 & 100
    with 10 & 100 illustrative classes and an n+1 th background class.

    If the dataset is not present in the computer, **this method will be
    able to automatically download** and store it.

    This generator supports benchmark construction of both 'iid' and 'streaming'
    evaluation_protocol. The main difference is:

    'iid': Always sample testset from current task, which requires
        splitting the data into 7:3 train:test with a given random seed.
    'streaming': Use all data of next task as the testset for current task,
        which does not split the data and does not require random seed.


    The generator supports both Image and Feature (Tensor) datasets.
    If feature_type == None, then images will be used.
    If feature_type is specified, then feature tensors will be used.

    The benchmark instance returned by this method will have two fields,
    `train_stream` and `test_stream`, which can be iterated to obtain
    training and test :class:`Experience`. Each Experience contains the
    `dataset` and the associated task label.

    Note that the train/test streams will still be data of current task,
    regardless of whether evaluation protocol is 'iid' or 'streaming'.
    For 'iid' protocol, train stream is 70% of current task data,
    and test stream is 30% of current task data.
    For 'streaming' protocol, train stream is 100% of current task data,
    and test stream is just a duplicate of train stream.

    The task label 0 will be assigned to each experience.

    :param evaluation_protocol: Choose from ['iid', 'streaming']
        if chosen 'iid', then must specify a seed between [0,1,2,3,4];
        if chosen 'streaming', then the seed will be ignored.
    :param feature_type: Whether to return raw RGB images or feature tensors
        extracted by pre-trained models. Can choose between
        [None, 'moco_b0', 'moco_imagenet', 'byol_imagenet', 'imagenet'].
        If feature_type is None, then images will be returned.
        Otherwise feature tensors will be returned.
    :param seed: If evaluation_protocol is iid, then must specify a seed value
        for train:test split. Choose between [0,1,2,3,4].
    :param train_transform: The transformation to apply to the training data,
        e.g. a random crop, a normalization or a concatenation of different
        transformations (see torchvision.transform documentation for a
        comprehensive list of possible transformations). Defaults to None.
    :param eval_transform: The transformation to apply to the test data,
        e.g. a random crop, a normalization or a concatenation of different
        transformations (see torchvision.transform documentation for a
        comprehensive list of possible transformations). Defaults to None.
    :param dataset_root: The root path of the dataset.
        Defaults to None, which means that the default location for
        str(data_name) will be used.

    :returns: a properly initialized :class:`ClassificationScenario` instance.
    """
    assert data_name in _CLEAR_DATA_SPLITS

    assert evaluation_protocol in EVALUATION_PROTOCOLS, (
        "Must specify a evaluation protocol from " f"{EVALUATION_PROTOCOLS}"
    )

    if evaluation_protocol == "streaming":
        assert seed is None, (
            "Seed for train/test split is not required "
            "under streaming protocol"
        )
        train_split = "all"
        test_split = "all"
    elif evaluation_protocol == "iid":
        assert seed in SEED_LIST, "No seed for train/test split"
        train_split = "train"
        test_split = "test"
    else:
        raise NotImplementedError()

    benchmark_obj: CommonClassificationScenarioType
    if feature_type is None:
        clear_dataset_train = _CLEARImage(
            root=dataset_root,
            data_name=data_name,
            download=True,
            split=train_split,
            seed=seed,
            transform=train_transform,
        )
        clear_dataset_test = _CLEARImage(
            root=dataset_root,
            data_name=data_name,
            download=True,
            split=test_split,
            seed=seed,
            transform=eval_transform,
        )
        train_samples_paths = clear_dataset_train.get_paths_and_targets(
            root_appended=True
        )
        test_samples_paths = clear_dataset_test.get_paths_and_targets(
            root_appended=True
        )
        benchmark_obj = create_classification_benchmark_from_paths(
            train_samples_paths,
            test_samples_paths,
            task_labels=list(range(len(train_samples_paths))),
            complete_test_set_only=False,
            train_transform=train_transform,
            eval_transform=eval_transform,
        )
    else:
        clear_dataset_train = _CLEARFeature(
            root=dataset_root,
            data_name=data_name,
            download=True,
            feature_type=feature_type,
            split=train_split,
            seed=seed,
        )
        clear_dataset_test = _CLEARFeature(
            root=dataset_root,
            data_name=data_name,
            download=True,
            feature_type=feature_type,
            split=test_split,
            seed=seed,
        )
        train_samples = clear_dataset_train.tensors_and_targets
        test_samples = clear_dataset_test.tensors_and_targets

        benchmark_obj = create_classification_benchmark_from_tensor_lists(
            train_samples,
            test_samples,
            task_labels=list(range(len(train_samples))),
            complete_test_set_only=False,
            train_transform=train_transform,
            eval_transform=eval_transform,
        )

    return benchmark_obj


class CLEARMetric:
    """All metrics used in CLEAR paper.
    More information can be found at:
    https://clear-benchmark.github.io/
    """

    def __init__(self):
        super(CLEARMetric, self).__init__()

    def get_metrics(self, matrix):
        """Given an accuracy matrix, returns the 5 metrics used in CLEAR paper

        These are:
            'in_domain' : In-domain accuracy (avg of diagonal)
            'next_domain' : In-domain accuracy (avg of superdiagonal)
            'accuracy' : Accuracy (avg of diagonal + lower triangular)
            'backward_transfer' : BwT (avg of lower triangular)
            'forward_transfer' : FwT (avg of upper triangular)

        :param matrix: Accuracy matrix,
            e.g., matrix[5][0] is the test accuracy on 0-th-task at timestamp 5
        :return: A dictionary containing these 5 metrics
        """
        assert matrix.shape[0] == matrix.shape[1]
        metrics_dict = {
            "in_domain": self.in_domain(matrix),
            "next_domain": self.next_domain(matrix),
            "accuracy": self.accuracy(matrix),
            "forward_transfer": self.forward_transfer(matrix),
            "backward_transfer": self.backward_transfer(matrix),
        }
        return metrics_dict

    def accuracy(self, matrix):
        """
        Average of lower triangle + diagonal
        Evaluate accuracy on seen tasks
        """
        r, _ = matrix.shape
        res = [matrix[i, j] for i in range(r) for j in range(i + 1)]
        return sum(res) / len(res)

    def in_domain(self, matrix):
        """
        Diagonal average
        Evaluate accuracy on the current task only
        """
        r, _ = matrix.shape
        res = [matrix[i, i] for i in range(r)]
        return sum(res) / r

    def next_domain(self, matrix):
        """
        Superdiagonal average
        Evaluate on the immediate next timestamp
        """
        r, _ = matrix.shape
        res = [matrix[i, i + 1] for i in range(r - 1)]
        return sum(res) / (r - 1)

    def forward_transfer(self, matrix):
        """
        Upper trianglar average
        Evaluate generalization to all future task
        """
        r, _ = matrix.shape
        res = [matrix[i, j] for i in range(r) for j in range(i + 1, r)]
        return sum(res) / len(res)

    def backward_transfer(self, matrix):
        """
        Lower triangular average
        Evaluate learning without forgetting
        """
        r, _ = matrix.shape
        res = [matrix[i, j] for i in range(r) for j in range(i)]
        return sum(res) / len(res)


__all__ = ["CLEAR", "CLEARMetric"]

if __name__ == "__main__":
    import sys
    from torchvision import transforms
    import torch

    normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
    )
    transform = transforms.Compose(
        [
            transforms.Resize(224),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ]
    )

    data_name = "clear10"
    root = f"../avalanche_datasets/{data_name}"

    for p in EVALUATION_PROTOCOLS:
        seed_list: Sequence[Optional[int]]
        if p == "streaming":
            seed_list = [None]
        else:
            seed_list = SEED_LIST
        
        for f in [None] + CLEAR_FEATURE_TYPES[data_name]:
            t = transform if f is None else None
            for seed in seed_list:
                benchmark_instance = CLEAR(
                    evaluation_protocol=p,
                    feature_type=f,
                    seed=seed,
                    train_transform=t,
                    eval_transform=t,
                    dataset_root=root,
                )
                benchmark_instance.train_stream[0]
                # check_vision_benchmark(benchmark_instance)
                print(
                    f"Check pass for {p} protocol, and "
                    f"feature type {f} and seed {seed}"
                )
    sys.exit(0)

1	################################################################################
2	# Copyright (c) 2021 ContinualAI. #
3	# Copyrights licensed under the MIT License. #
4	# See the accompanying LICENSE file for terms. #
5	# #
6	# Date: 05-17-2022 #
7	# Author: Zhiqiu Lin, Jia Shi #
8	# E-mail: zl279@cornell.edu, jiashi@andrew.cmu.edu #
9	# Website: https://clear-benchmark.github.io #
10	################################################################################
11
12	""" This module contains the high-level CLEAR benchmark/factor generator.	4✔
13	In the original CLEAR benchmark paper (https://arxiv.org/abs/2201.06289),
14	a novel Streaming evaluation protocol is proposed in contrast to traditional
15	IID evaluation protocol for CL. The major difference lies in that:
16
17	IID Protocol: Sample a test set from current task, which requires splitting
18	the data into 7:3 train:test set.
19	Streaming Protocol: Use the data of next task as the test set for current task,
20	which is arguably more realistic since real-world model training and
21	deployment usually takes considerable amount of time. By the time the
22	model is applied, the task has already drifted.
23
24	We support both evaluation protocols for benchmark construction."""
25
26	from pathlib import Path	4✔
27	from typing import Sequence, Union, Any, Optional	4✔
28
29	from avalanche.benchmarks.datasets.clear import (	4✔
30	_CLEARImage,
31	_CLEARFeature,
32	SEED_LIST,
33	CLEAR_FEATURE_TYPES,
34	_CLEAR_DATA_SPLITS,
35	)
36	from avalanche.benchmarks.scenarios.classification_benchmark_creation import (	4✔
37	create_classification_benchmark_from_paths,
38	create_classification_benchmark_from_tensor_lists,
39	)
40	from avalanche.benchmarks.scenarios.classification_scenario import (	4✔
41	CommonClassificationScenarioType,
42	)
43
44	EVALUATION_PROTOCOLS = ["iid", "streaming"]	4✔
45
46
47	def CLEAR(	4✔
48	*,
49	data_name: str = "clear10",
50	evaluation_protocol: str = "streaming",
51	feature_type: Optional[str] = None,
52	seed: Optional[int] = None,
53	train_transform: Optional[Any] = None,
54	eval_transform: Optional[Any] = None,
55	dataset_root: Optional[Union[str, Path]] = None,
56	):
57	"""
58	Creates a Domain-Incremental benchmark for CLEAR 10 & 100
59	with 10 & 100 illustrative classes and an n+1 th background class.
60
61	If the dataset is not present in the computer, **this method will be
62	able to automatically download** and store it.
63
64	This generator supports benchmark construction of both 'iid' and 'streaming'
65	evaluation_protocol. The main difference is:
66
67	'iid': Always sample testset from current task, which requires
68	splitting the data into 7:3 train:test with a given random seed.
69	'streaming': Use all data of next task as the testset for current task,
70	which does not split the data and does not require random seed.
71
72
73	The generator supports both Image and Feature (Tensor) datasets.
74	If feature_type == None, then images will be used.
75	If feature_type is specified, then feature tensors will be used.
76
77	The benchmark instance returned by this method will have two fields,
78	`train_stream` and `test_stream`, which can be iterated to obtain
79	training and test :class:`Experience`. Each Experience contains the
80	`dataset` and the associated task label.
81
82	Note that the train/test streams will still be data of current task,
83	regardless of whether evaluation protocol is 'iid' or 'streaming'.
84	For 'iid' protocol, train stream is 70% of current task data,
85	and test stream is 30% of current task data.
86	For 'streaming' protocol, train stream is 100% of current task data,
87	and test stream is just a duplicate of train stream.
88
89	The task label 0 will be assigned to each experience.
90
91	:param evaluation_protocol: Choose from ['iid', 'streaming']
92	if chosen 'iid', then must specify a seed between [0,1,2,3,4];
93	if chosen 'streaming', then the seed will be ignored.
94	:param feature_type: Whether to return raw RGB images or feature tensors
95	extracted by pre-trained models. Can choose between
96	[None, 'moco_b0', 'moco_imagenet', 'byol_imagenet', 'imagenet'].
97	If feature_type is None, then images will be returned.
98	Otherwise feature tensors will be returned.
99	:param seed: If evaluation_protocol is iid, then must specify a seed value
100	for train:test split. Choose between [0,1,2,3,4].
101	:param train_transform: The transformation to apply to the training data,
102	e.g. a random crop, a normalization or a concatenation of different
103	transformations (see torchvision.transform documentation for a
104	comprehensive list of possible transformations). Defaults to None.
105	:param eval_transform: The transformation to apply to the test data,
106	e.g. a random crop, a normalization or a concatenation of different
107	transformations (see torchvision.transform documentation for a
108	comprehensive list of possible transformations). Defaults to None.
109	:param dataset_root: The root path of the dataset.
110	Defaults to None, which means that the default location for
111	str(data_name) will be used.
112
113	:returns: a properly initialized :class:`ClassificationScenario` instance.
114	"""
115	assert data_name in _CLEAR_DATA_SPLITS	×
116
117	assert evaluation_protocol in EVALUATION_PROTOCOLS, (	×
118	"Must specify a evaluation protocol from " f"{EVALUATION_PROTOCOLS}"
119	)
120
121	if evaluation_protocol == "streaming":	×
122	assert seed is None, (	×
123	"Seed for train/test split is not required "
124	"under streaming protocol"
125	)
126	train_split = "all"	×
127	test_split = "all"	×
128	elif evaluation_protocol == "iid":	×
129	assert seed in SEED_LIST, "No seed for train/test split"	×
130	train_split = "train"	×
131	test_split = "test"	×
132	else:
133	raise NotImplementedError()	×
134
135	benchmark_obj: CommonClassificationScenarioType
136	if feature_type is None:	×
137	clear_dataset_train = _CLEARImage(	×
138	root=dataset_root,
139	data_name=data_name,
140	download=True,
141	split=train_split,
142	seed=seed,
143	transform=train_transform,
144	)
145	clear_dataset_test = _CLEARImage(	×
146	root=dataset_root,
147	data_name=data_name,
148	download=True,
149	split=test_split,
150	seed=seed,
151	transform=eval_transform,
152	)
153	train_samples_paths = clear_dataset_train.get_paths_and_targets(	×
154	root_appended=True
155	)
156	test_samples_paths = clear_dataset_test.get_paths_and_targets(	×
157	root_appended=True
158	)
NEW 159	benchmark_obj = create_classification_benchmark_from_paths(	×
160	train_samples_paths,
161	test_samples_paths,
162	task_labels=list(range(len(train_samples_paths))),
163	complete_test_set_only=False,
164	train_transform=train_transform,
165	eval_transform=eval_transform,
166	)
167	else:
168	clear_dataset_train = _CLEARFeature(	×
169	root=dataset_root,
170	data_name=data_name,
171	download=True,
172	feature_type=feature_type,
173	split=train_split,
174	seed=seed,
175	)
176	clear_dataset_test = _CLEARFeature(	×
177	root=dataset_root,
178	data_name=data_name,
179	download=True,
180	feature_type=feature_type,
181	split=test_split,
182	seed=seed,
183	)
184	train_samples = clear_dataset_train.tensors_and_targets	×
185	test_samples = clear_dataset_test.tensors_and_targets	×
186
NEW 187	benchmark_obj = create_classification_benchmark_from_tensor_lists(	×
188	train_samples,
189	test_samples,
190	task_labels=list(range(len(train_samples))),
191	complete_test_set_only=False,
192	train_transform=train_transform,
193	eval_transform=eval_transform,
194	)
195
196	return benchmark_obj	×
197
198
199	class CLEARMetric:	4✔
200	"""All metrics used in CLEAR paper.	4✔
201	More information can be found at:
202	https://clear-benchmark.github.io/
203	"""
204
205	def __init__(self):	4✔
206	super(CLEARMetric, self).__init__()	×
207
208	def get_metrics(self, matrix):	4✔
209	"""Given an accuracy matrix, returns the 5 metrics used in CLEAR paper
210
211	These are:
212	'in_domain' : In-domain accuracy (avg of diagonal)
213	'next_domain' : In-domain accuracy (avg of superdiagonal)
214	'accuracy' : Accuracy (avg of diagonal + lower triangular)
215	'backward_transfer' : BwT (avg of lower triangular)
216	'forward_transfer' : FwT (avg of upper triangular)
217
218	:param matrix: Accuracy matrix,
219	e.g., matrix[5][0] is the test accuracy on 0-th-task at timestamp 5
220	:return: A dictionary containing these 5 metrics
221	"""
222	assert matrix.shape[0] == matrix.shape[1]	×
223	metrics_dict = {	×
224	"in_domain": self.in_domain(matrix),
225	"next_domain": self.next_domain(matrix),
226	"accuracy": self.accuracy(matrix),
227	"forward_transfer": self.forward_transfer(matrix),
228	"backward_transfer": self.backward_transfer(matrix),
229	}
230	return metrics_dict	×
231
232	def accuracy(self, matrix):	4✔
233	"""
234	Average of lower triangle + diagonal
235	Evaluate accuracy on seen tasks
236	"""
237	r, _ = matrix.shape	×
238	res = [matrix[i, j] for i in range(r) for j in range(i + 1)]	×
239	return sum(res) / len(res)	×
240
241	def in_domain(self, matrix):	4✔
242	"""
243	Diagonal average
244	Evaluate accuracy on the current task only
245	"""
246	r, _ = matrix.shape	×
247	res = [matrix[i, i] for i in range(r)]	×
248	return sum(res) / r	×
249
250	def next_domain(self, matrix):	4✔
251	"""
252	Superdiagonal average
253	Evaluate on the immediate next timestamp
254	"""
255	r, _ = matrix.shape	×
256	res = [matrix[i, i + 1] for i in range(r - 1)]	×
257	return sum(res) / (r - 1)	×
258
259	def forward_transfer(self, matrix):	4✔
260	"""
261	Upper trianglar average
262	Evaluate generalization to all future task
263	"""
264	r, _ = matrix.shape	×
265	res = [matrix[i, j] for i in range(r) for j in range(i + 1, r)]	×
266	return sum(res) / len(res)	×
267
268	def backward_transfer(self, matrix):	4✔
269	"""
270	Lower triangular average
271	Evaluate learning without forgetting
272	"""
273	r, _ = matrix.shape	×
274	res = [matrix[i, j] for i in range(r) for j in range(i)]	×
275	return sum(res) / len(res)	×
276
277
278	__all__ = ["CLEAR", "CLEARMetric"]	4✔
279
280	if __name__ == "__main__":	4✔
281	import sys	×
282	from torchvision import transforms	×
283	import torch	×
284
285	normalize = transforms.Normalize(	×
286	mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
287	)
288	transform = transforms.Compose(	×
289	[
290	transforms.Resize(224),
291	transforms.CenterCrop(224),
292	transforms.ToTensor(),
293	normalize,
294	]
295	)
296
297	data_name = "clear10"	×
298	root = f"../avalanche_datasets/{data_name}"	×
299
300	for p in EVALUATION_PROTOCOLS:	×
301	seed_list: Sequence[Optional[int]]	×
302	if p == "streaming":	×
303	seed_list = [None]	×
304	else:
305	seed_list = SEED_LIST	×
306
307	for f in [None] + CLEAR_FEATURE_TYPES[data_name]:	×
308	t = transform if f is None else None	×
309	for seed in seed_list:	×
310	benchmark_instance = CLEAR(	×
311	evaluation_protocol=p,
312	feature_type=f,
313	seed=seed,
314	train_transform=t,
315	eval_transform=t,
316	dataset_root=root,
317	)
318	benchmark_instance.train_stream[0]	×
319	# check_vision_benchmark(benchmark_instance)
320	print(	×
321	f"Check pass for {p} protocol, and "
322	f"feature type {f} and seed {seed}"
323	)
324	sys.exit(0)	×

ContinualAI / avalanche / 5268393053

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous