5399886876

################################################################################
# Copyright (c) 2021 ContinualAI.                                              #
# Copyrights licensed under the MIT License.                                   #
# See the accompanying LICENSE file for terms.                                 #
#                                                                              #
# Date: 20-05-2021                                                             #
# Author: Matthias De Lange                                                    #
# E-mail: contact@continualai.org                                              #
# Website: www.continualai.org                                                 #
################################################################################

"""INATURALIST2018 Pytorch Dataset

Info: https://www.kaggle.com/c/inaturalist-2018/data
Download: https://github.com/visipedia/inat_comp/tree/master/2018
Based on survey in CL: https://ieeexplore.ieee.org/document/9349197

Images have a max dimension of 800px and have been converted to JPEG format
You can select supercategories to include. By default 10 Super categories are
selected from the 14 available, based on at least having 100 categories (leaving
out Chromista, Protozoa, Bacteria), and omitting a random super category from
the remainder (Actinopterygii).

Example filename from the JSON: "file_name":
"train_val2018/Insecta/1455/994fa5...f1e360d34aae943.jpg"
"""

from typing import Any, Dict, List, Set

import os
import logging
from torch.utils.data.dataset import Dataset
from torchvision.transforms import ToTensor
from PIL import Image
from os.path import expanduser
import pprint

from .inaturalist_data import INATURALIST_DATA


def pil_loader(path):
    """Load an Image with PIL"""
    # open path as file to avoid ResourceWarning
    # (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, "rb") as f:
        img = Image.open(f)
        return img.convert("RGB")


def _isArrayLike(obj):
    return hasattr(obj, "__iter__") and hasattr(obj, "__len__")


class INATURALIST2018(Dataset):
    """INATURALIST Pytorch Dataset

    For default selection of 10 supercategories:

    - Training Images in total: 428,830
    - Validation Images in total:  23,229
    - Shape of images: torch.Size([1, 3, 600, 800])
    - Class counts per supercategory (both train/val):

        - 'Amphibia': 144,
        - 'Animalia': 178,
        - 'Arachnida': 114,
        - 'Aves': 1258,
        - 'Fungi': 321,
        - 'Insecta': 2031,
        - 'Mammalia': 234,
        - 'Mollusca': 262,
        - 'Plantae': 2917,
        - 'Reptilia': 284}
    """

    splits = ["train", "val", "test"]

    def_supcats = [
        "Amphibia",
        "Animalia",
        "Arachnida",
        "Aves",
        "Fungi",
        "Insecta",
        "Mammalia",
        "Mollusca",
        "Plantae",
        "Reptilia",
    ]

    def __init__(
        self,
        root=expanduser("~") + "/.avalanche/data/inaturalist2018/",
        split="train",
        transform=ToTensor(),
        target_transform=None,
        loader=pil_loader,
        download=True,
        supcats=None,
    ):
        super().__init__()
        # conda install -c conda-forge pycocotools
        from pycocotools.coco import COCO as jsonparser

        assert split in self.splits
        self.split = split  # training set or test set
        self.transform = transform
        self.target_transform = target_transform
        self.root = root
        self.loader = loader
        self.log = logging.getLogger("avalanche")

        # Supercategories to include (None = all)
        self.supcats = supcats if supcats is not None else self.def_supcats

        if download:
            download_trainval = self.split in ["train", "val"]
            self.inat_data = INATURALIST_DATA(
                data_folder=root, trainval=download_trainval
            )

        # load annotations
        ann_file = f"{split}2018.json"
        self.log.info(f"Loading annotations from: {ann_file}")
        self.ds = jsonparser(annotation_file=os.path.join(root, ann_file))

        self.img_ids, self.targets = [], []  # targets field is required!
        self.cats_per_supcat: Dict[str, Set[int]] = {}

        # Filter full dataset parsed
        for ann in self.ds.anns.values():
            img_id = ann["image_id"]
            cat_id = ann["category_id"]

            # img = self.ds.loadImgs(img_id)[0]["file_name"]  # Img Path
            cat = self.ds.loadCats(cat_id)[0]  # Get category
            target = cat["name"]  # Is subdirectory
            supcat = cat["supercategory"]  # Is parent directory

            if self.supcats is None or supcat in self.supcats:  # Made selection
                # Add category to supercategory
                if supcat not in self.cats_per_supcat:
                    self.cats_per_supcat[supcat] = set()
                self.cats_per_supcat[supcat].add(int(target))  # Need int

                # Add to list
                self.img_ids.append(img_id)
                self.targets.append(target)
                # self.suptargets.append(supcat)

        cnt_per_supcat = {k: len(v) for k, v in self.cats_per_supcat.items()}
        self.log.info("Classes per supercategories:")
        self.log.info(pprint.pformat(cnt_per_supcat, indent=2))
        self.log.info(f"Images in total: {self.__len__()}")

    def _load_image(self, img_id: int) -> Image.Image:
        path = self.ds.loadImgs(img_id)[0]["file_name"]
        return Image.open(os.path.join(self.root, path)).convert("RGB")

    def _load_target(self, img_id) -> List[Any]:
        return self.ds.loadAnns(self.ds.getAnnIds(img_id))

    def __getitem__(self, index):
        id = self.img_ids[index]
        img = self._load_image(id)
        # target = self._load_target(id)
        target = self.targets[index]

        if self.transform is not None:
            img = self.transform(img)
        if self.target_transform is not None:
            target = self.target_transform(target)

        return img, target

    def __len__(self):
        return len(self.img_ids)


if __name__ == "__main__":
    # this litte example script can be used to visualize the first image
    # leaded from the dataset.
    from torch.utils.data.dataloader import DataLoader
    import matplotlib.pyplot as plt
    from torchvision import transforms
    import torch

    train_data = INATURALIST2018()
    test_data = INATURALIST2018(split="val")
    print("train size: ", len(train_data))
    print("test size: ", len(test_data))

    dataloader = DataLoader(train_data, batch_size=1)

    for batch_data in dataloader:
        x, y = batch_data
        plt.imshow(transforms.ToPILImage()(torch.squeeze(x)))
        plt.show()
        print(x.size())
        print(len(y))
        break

__all__ = ["INATURALIST2018"]

1	################################################################################
2	# Copyright (c) 2021 ContinualAI. #
3	# Copyrights licensed under the MIT License. #
4	# See the accompanying LICENSE file for terms. #
5	# #
6	# Date: 20-05-2021 #
7	# Author: Matthias De Lange #
8	# E-mail: contact@continualai.org #
9	# Website: www.continualai.org #
10	################################################################################
11
12	"""INATURALIST2018 Pytorch Dataset	4✔
13
14	Info: https://www.kaggle.com/c/inaturalist-2018/data
15	Download: https://github.com/visipedia/inat_comp/tree/master/2018
16	Based on survey in CL: https://ieeexplore.ieee.org/document/9349197
17
18	Images have a max dimension of 800px and have been converted to JPEG format
19	You can select supercategories to include. By default 10 Super categories are
20	selected from the 14 available, based on at least having 100 categories (leaving
21	out Chromista, Protozoa, Bacteria), and omitting a random super category from
22	the remainder (Actinopterygii).
23
24	Example filename from the JSON: "file_name":
25	"train_val2018/Insecta/1455/994fa5...f1e360d34aae943.jpg"
26	"""
27
28	from typing import Any, Dict, List, Set	4✔
29
30	import os	4✔
31	import logging	4✔
32	from torch.utils.data.dataset import Dataset	4✔
33	from torchvision.transforms import ToTensor	4✔
34	from PIL import Image	4✔
35	from os.path import expanduser	4✔
36	import pprint	4✔
37
38	from .inaturalist_data import INATURALIST_DATA	4✔
39
40
41	def pil_loader(path):	4✔
42	"""Load an Image with PIL"""
43	# open path as file to avoid ResourceWarning
44	# (https://github.com/python-pillow/Pillow/issues/835)
45	with open(path, "rb") as f:	×
46	img = Image.open(f)	×
47	return img.convert("RGB")	×
48
49
50	def _isArrayLike(obj):	4✔
51	return hasattr(obj, "__iter__") and hasattr(obj, "__len__")	×
52
53
54	class INATURALIST2018(Dataset):	4✔
55	"""INATURALIST Pytorch Dataset	4✔
56
57	For default selection of 10 supercategories:
58
59	- Training Images in total: 428,830
60	- Validation Images in total: 23,229
61	- Shape of images: torch.Size([1, 3, 600, 800])
62	- Class counts per supercategory (both train/val):
63
64	- 'Amphibia': 144,
65	- 'Animalia': 178,
66	- 'Arachnida': 114,
67	- 'Aves': 1258,
68	- 'Fungi': 321,
69	- 'Insecta': 2031,
70	- 'Mammalia': 234,
71	- 'Mollusca': 262,
72	- 'Plantae': 2917,
73	- 'Reptilia': 284}
74	"""
75
76	splits = ["train", "val", "test"]	4✔
77
78	def_supcats = [	4✔
79	"Amphibia",
80	"Animalia",
81	"Arachnida",
82	"Aves",
83	"Fungi",
84	"Insecta",
85	"Mammalia",
86	"Mollusca",
87	"Plantae",
88	"Reptilia",
89	]
90
91	def __init__(	4✔
92	self,
93	root=expanduser("~") + "/.avalanche/data/inaturalist2018/",
94	split="train",
95	transform=ToTensor(),
96	target_transform=None,
97	loader=pil_loader,
98	download=True,
99	supcats=None,
100	):
101	super().__init__()	×
102	# conda install -c conda-forge pycocotools
103	from pycocotools.coco import COCO as jsonparser	×
104
105	assert split in self.splits	×
106	self.split = split # training set or test set	×
107	self.transform = transform	×
108	self.target_transform = target_transform	×
109	self.root = root	×
110	self.loader = loader	×
111	self.log = logging.getLogger("avalanche")	×
112
113	# Supercategories to include (None = all)
114	self.supcats = supcats if supcats is not None else self.def_supcats	×
115
116	if download:	×
117	download_trainval = self.split in ["train", "val"]	×
118	self.inat_data = INATURALIST_DATA(	×
119	data_folder=root, trainval=download_trainval
120	)
121
122	# load annotations
123	ann_file = f"{split}2018.json"	×
124	self.log.info(f"Loading annotations from: {ann_file}")	×
125	self.ds = jsonparser(annotation_file=os.path.join(root, ann_file))	×
126
127	self.img_ids, self.targets = [], [] # targets field is required!	×
128	self.cats_per_supcat: Dict[str, Set[int]] = {}	×
129
130	# Filter full dataset parsed
131	for ann in self.ds.anns.values():	×
132	img_id = ann["image_id"]	×
133	cat_id = ann["category_id"]	×
134
135	# img = self.ds.loadImgs(img_id)[0]["file_name"] # Img Path
136	cat = self.ds.loadCats(cat_id)[0] # Get category	×
137	target = cat["name"] # Is subdirectory	×
138	supcat = cat["supercategory"] # Is parent directory	×
139
140	if self.supcats is None or supcat in self.supcats: # Made selection	×
141	# Add category to supercategory
UNCOV 142	if supcat not in self.cats_per_supcat:	×
143	self.cats_per_supcat[supcat] = set()	×
144	self.cats_per_supcat[supcat].add(int(target)) # Need int	×
145
146	# Add to list
147	self.img_ids.append(img_id)	×
148	self.targets.append(target)	×
149	# self.suptargets.append(supcat)
150
151	cnt_per_supcat = {k: len(v) for k, v in self.cats_per_supcat.items()}	×
152	self.log.info("Classes per supercategories:")	×
153	self.log.info(pprint.pformat(cnt_per_supcat, indent=2))	×
154	self.log.info(f"Images in total: {self.__len__()}")	×
155
156	def _load_image(self, img_id: int) -> Image.Image:	4✔
157	path = self.ds.loadImgs(img_id)[0]["file_name"]	×
158	return Image.open(os.path.join(self.root, path)).convert("RGB")	×
159
160	def _load_target(self, img_id) -> List[Any]:	4✔
161	return self.ds.loadAnns(self.ds.getAnnIds(img_id))	×
162
163	def __getitem__(self, index):	4✔
164	id = self.img_ids[index]	×
165	img = self._load_image(id)	×
166	# target = self._load_target(id)
167	target = self.targets[index]	×
168
169	if self.transform is not None:	×
170	img = self.transform(img)	×
171	if self.target_transform is not None:	×
172	target = self.target_transform(target)	×
173
174	return img, target	×
175
176	def __len__(self):	4✔
177	return len(self.img_ids)	×
178
179
180	if __name__ == "__main__":	4✔
181	# this litte example script can be used to visualize the first image
182	# leaded from the dataset.
183	from torch.utils.data.dataloader import DataLoader	×
184	import matplotlib.pyplot as plt	×
185	from torchvision import transforms	×
186	import torch	×
187
188	train_data = INATURALIST2018()	×
189	test_data = INATURALIST2018(split="val")	×
190	print("train size: ", len(train_data))	×
191	print("test size: ", len(test_data))	×
192
193	dataloader = DataLoader(train_data, batch_size=1)	×
194
195	for batch_data in dataloader:	×
196	x, y = batch_data	×
197	plt.imshow(transforms.ToPILImage()(torch.squeeze(x)))	×
198	plt.show()	×
199	print(x.size())	×
200	print(len(y))	×
201	break	×
202
203	__all__ = ["INATURALIST2018"]	4✔

ContinualAI / avalanche / 5399886876

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous