• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ContinualAI / avalanche / 5399886876

pending completion
5399886876

Pull #1398

github

web-flow
Merge 2c8aba8e6 into a61ae5cab
Pull Request #1398: switch to black formatting

1023 of 1372 new or added lines in 177 files covered. (74.56%)

144 existing lines in 66 files now uncovered.

16366 of 22540 relevant lines covered (72.61%)

2.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

25.84
/avalanche/benchmarks/datasets/inaturalist/inaturalist.py
1
################################################################################
2
# Copyright (c) 2021 ContinualAI.                                              #
3
# Copyrights licensed under the MIT License.                                   #
4
# See the accompanying LICENSE file for terms.                                 #
5
#                                                                              #
6
# Date: 20-05-2021                                                             #
7
# Author: Matthias De Lange                                                    #
8
# E-mail: contact@continualai.org                                              #
9
# Website: www.continualai.org                                                 #
10
################################################################################
11

12
"""INATURALIST2018 Pytorch Dataset
4✔
13

14
Info: https://www.kaggle.com/c/inaturalist-2018/data
15
Download: https://github.com/visipedia/inat_comp/tree/master/2018
16
Based on survey in CL: https://ieeexplore.ieee.org/document/9349197
17

18
Images have a max dimension of 800px and have been converted to JPEG format
19
You can select supercategories to include. By default 10 Super categories are
20
selected from the 14 available, based on at least having 100 categories (leaving
21
out Chromista, Protozoa, Bacteria), and omitting a random super category from
22
the remainder (Actinopterygii).
23

24
Example filename from the JSON: "file_name":
25
"train_val2018/Insecta/1455/994fa5...f1e360d34aae943.jpg"
26
"""
27

28
from typing import Any, Dict, List, Set
4✔
29

30
import os
4✔
31
import logging
4✔
32
from torch.utils.data.dataset import Dataset
4✔
33
from torchvision.transforms import ToTensor
4✔
34
from PIL import Image
4✔
35
from os.path import expanduser
4✔
36
import pprint
4✔
37

38
from .inaturalist_data import INATURALIST_DATA
4✔
39

40

41
def pil_loader(path):
4✔
42
    """Load an Image with PIL"""
43
    # open path as file to avoid ResourceWarning
44
    # (https://github.com/python-pillow/Pillow/issues/835)
45
    with open(path, "rb") as f:
×
46
        img = Image.open(f)
×
47
        return img.convert("RGB")
×
48

49

50
def _isArrayLike(obj):
4✔
51
    return hasattr(obj, "__iter__") and hasattr(obj, "__len__")
×
52

53

54
class INATURALIST2018(Dataset):
4✔
55
    """INATURALIST Pytorch Dataset
4✔
56

57
    For default selection of 10 supercategories:
58

59
    - Training Images in total: 428,830
60
    - Validation Images in total:  23,229
61
    - Shape of images: torch.Size([1, 3, 600, 800])
62
    - Class counts per supercategory (both train/val):
63

64
        - 'Amphibia': 144,
65
        - 'Animalia': 178,
66
        - 'Arachnida': 114,
67
        - 'Aves': 1258,
68
        - 'Fungi': 321,
69
        - 'Insecta': 2031,
70
        - 'Mammalia': 234,
71
        - 'Mollusca': 262,
72
        - 'Plantae': 2917,
73
        - 'Reptilia': 284}
74
    """
75

76
    splits = ["train", "val", "test"]
4✔
77

78
    def_supcats = [
4✔
79
        "Amphibia",
80
        "Animalia",
81
        "Arachnida",
82
        "Aves",
83
        "Fungi",
84
        "Insecta",
85
        "Mammalia",
86
        "Mollusca",
87
        "Plantae",
88
        "Reptilia",
89
    ]
90

91
    def __init__(
4✔
92
        self,
93
        root=expanduser("~") + "/.avalanche/data/inaturalist2018/",
94
        split="train",
95
        transform=ToTensor(),
96
        target_transform=None,
97
        loader=pil_loader,
98
        download=True,
99
        supcats=None,
100
    ):
101
        super().__init__()
×
102
        # conda install -c conda-forge pycocotools
103
        from pycocotools.coco import COCO as jsonparser
×
104

105
        assert split in self.splits
×
106
        self.split = split  # training set or test set
×
107
        self.transform = transform
×
108
        self.target_transform = target_transform
×
109
        self.root = root
×
110
        self.loader = loader
×
111
        self.log = logging.getLogger("avalanche")
×
112

113
        # Supercategories to include (None = all)
114
        self.supcats = supcats if supcats is not None else self.def_supcats
×
115

116
        if download:
×
117
            download_trainval = self.split in ["train", "val"]
×
118
            self.inat_data = INATURALIST_DATA(
×
119
                data_folder=root, trainval=download_trainval
120
            )
121

122
        # load annotations
123
        ann_file = f"{split}2018.json"
×
124
        self.log.info(f"Loading annotations from: {ann_file}")
×
125
        self.ds = jsonparser(annotation_file=os.path.join(root, ann_file))
×
126

127
        self.img_ids, self.targets = [], []  # targets field is required!
×
128
        self.cats_per_supcat: Dict[str, Set[int]] = {}
×
129

130
        # Filter full dataset parsed
131
        for ann in self.ds.anns.values():
×
132
            img_id = ann["image_id"]
×
133
            cat_id = ann["category_id"]
×
134

135
            # img = self.ds.loadImgs(img_id)[0]["file_name"]  # Img Path
136
            cat = self.ds.loadCats(cat_id)[0]  # Get category
×
137
            target = cat["name"]  # Is subdirectory
×
138
            supcat = cat["supercategory"]  # Is parent directory
×
139

140
            if self.supcats is None or supcat in self.supcats:  # Made selection
×
141
                # Add category to supercategory
UNCOV
142
                if supcat not in self.cats_per_supcat:
×
143
                    self.cats_per_supcat[supcat] = set()
×
144
                self.cats_per_supcat[supcat].add(int(target))  # Need int
×
145

146
                # Add to list
147
                self.img_ids.append(img_id)
×
148
                self.targets.append(target)
×
149
                # self.suptargets.append(supcat)
150

151
        cnt_per_supcat = {k: len(v) for k, v in self.cats_per_supcat.items()}
×
152
        self.log.info("Classes per supercategories:")
×
153
        self.log.info(pprint.pformat(cnt_per_supcat, indent=2))
×
154
        self.log.info(f"Images in total: {self.__len__()}")
×
155

156
    def _load_image(self, img_id: int) -> Image.Image:
4✔
157
        path = self.ds.loadImgs(img_id)[0]["file_name"]
×
158
        return Image.open(os.path.join(self.root, path)).convert("RGB")
×
159

160
    def _load_target(self, img_id) -> List[Any]:
4✔
161
        return self.ds.loadAnns(self.ds.getAnnIds(img_id))
×
162

163
    def __getitem__(self, index):
4✔
164
        id = self.img_ids[index]
×
165
        img = self._load_image(id)
×
166
        # target = self._load_target(id)
167
        target = self.targets[index]
×
168

169
        if self.transform is not None:
×
170
            img = self.transform(img)
×
171
        if self.target_transform is not None:
×
172
            target = self.target_transform(target)
×
173

174
        return img, target
×
175

176
    def __len__(self):
4✔
177
        return len(self.img_ids)
×
178

179

180
if __name__ == "__main__":
4✔
181
    # this litte example script can be used to visualize the first image
182
    # leaded from the dataset.
183
    from torch.utils.data.dataloader import DataLoader
×
184
    import matplotlib.pyplot as plt
×
185
    from torchvision import transforms
×
186
    import torch
×
187

188
    train_data = INATURALIST2018()
×
189
    test_data = INATURALIST2018(split="val")
×
190
    print("train size: ", len(train_data))
×
191
    print("test size: ", len(test_data))
×
192

193
    dataloader = DataLoader(train_data, batch_size=1)
×
194

195
    for batch_data in dataloader:
×
196
        x, y = batch_data
×
197
        plt.imshow(transforms.ToPILImage()(torch.squeeze(x)))
×
198
        plt.show()
×
199
        print(x.size())
×
200
        print(len(y))
×
201
        break
×
202

203
__all__ = ["INATURALIST2018"]
4✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc