• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tonegas / nnodely / 18321197581

07 Oct 2025 05:39PM UTC coverage: 97.731% (+0.05%) from 97.683%
18321197581

push

github

tonegas
Added some test for format in loadData

56 of 56 new or added lines in 2 files covered. (100.0%)

10 existing lines in 3 files now uncovered.

12794 of 13091 relevant lines covered (97.73%)

0.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.03
/nnodely/operators/loader.py
1
import os, random
1✔
2

3
import pandas as pd
1✔
4
import numpy as np
1✔
5
import pandas.api.types as ptypes
1✔
6
from collections.abc import Sequence, Callable
1✔
7

8
from nnodely.basic.relation import check_names
1✔
9
from nnodely.operators.network import Network
1✔
10
from nnodely.support.utils import check, enforce_types
1✔
11

12
from nnodely.support.logger import logging, nnLogger
1✔
13
log = nnLogger(__name__, logging.WARNING)
1✔
14

15
class Loader(Network):
1✔
16
    @enforce_types
1✔
17
    def __init__(self):
1✔
18
        check(type(self) is not Loader, TypeError, "Loader class cannot be instantiated directly")
1✔
19
        super().__init__()
1✔
20

21
        # Dataaset Parameters
22
        self.__n_datasets = 0
1✔
23
        self.__datasets_loaded = set()
1✔
24

25
    @enforce_types
1✔
26
    def getSamples(self, dataset:str, *, index:int|None = None, window:int=1) -> dict:
1✔
27
        """
28
        Retrieves a window of samples from a given dataset.
29

30
        Parameters
31
        ----------
32
        dataset : str
33
            The name of the dataset to retrieve samples from.
34
        index : int, optional
35
            The starting index of the samples. If None, a random index is chosen. Default is None.
36
        window : int, optional
37
            The number of consecutive samples to retrieve. Default is 1.
38

39
        Returns
40
        -------
41
        dict
42
            A dictionary containing the retrieved samples. The keys are input names, and the values are lists of samples.
43

44
        Raises
45
        ------
46
        ValueError
47
            If the dataset is not loaded.
48

49
        Examples
50
        --------
51
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
52
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
53
            :alt: Open in Colab
54

55
        Example usage:
56
            >>> model = Modely()
57
            >>> model.loadData('dataset_name')
58
            >>> samples = model.getSamples('dataset_name', index=10, window=5)
59
        """
60
        if index is None:
1✔
61
            index = random.randint(0, self._num_of_samples[dataset] - window)
1✔
62
        check(self._data_loaded, ValueError, 'The Dataset must first be loaded using <loadData> function!')
1✔
63
        if self._data_loaded:
1✔
64
            result_dict = {}
1✔
65
            for key in self._model_def['Inputs'].keys():
1✔
66
                result_dict[key] = []
1✔
67
            for idx in range(window):
1✔
68
                for key ,samples in self._data[dataset].items():
1✔
69
                    if key in self._model_def['Inputs'].keys():
1✔
70
                        result_dict[key].append(samples[index+idx])
1✔
71
            return result_dict
1✔
72

73
    @enforce_types
1✔
74
    def filterData(self, filter_function:Callable, dataset_name:str|None = None) -> None:
1✔
75
        """
76
        Filters the data in the dataset using the provided filter function.
77

78
        Parameters
79
        ----------
80
        filter_function : Callable
81
            A function that takes a sample as input and returns True if the sample should be kept, and False if it should be removed.
82
        dataset_name : str or None, optional
83
            The name of the dataset to filter. If None, all datasets are filtered. Default is None.
84

85
        Examples
86
        --------
87
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
88
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
89
            :alt: Open in Colab
90

91
        Example usage:
92
            >>> model = Modely()
93
            >>> model.loadData('dataset_name', 'path/to/data')
94
            >>> def filter_fn(sample):
95
            >>>     return sample['input1'] > 0
96
            >>> model.filterData(filter_fn, 'dataset_name')
97
        """
98
        idx_to_remove = []
1✔
99
        if dataset_name is None:
1✔
100
            for name in self._data.keys():
1✔
101
                dataset = self._data[name]
1✔
102
                n_samples = len(dataset[list(dataset.keys())[0]])
1✔
103

104
                data_for_filter = []
1✔
105
                for i in range(n_samples):
1✔
106
                    new_sample = {key: val[i] for key, val in dataset.items()}
1✔
107
                    data_for_filter.append(new_sample)
1✔
108

109
                for idx, sample in enumerate(data_for_filter):
1✔
110
                    if not filter_function(sample):
1✔
111
                        idx_to_remove.append(idx)
1✔
112

113
                for key in self._data[name].keys():
1✔
114
                    self._data[name][key] = np.delete(self._data[name][key], idx_to_remove, axis=0)
1✔
115
                    self._num_of_samples[name] = self._data[name][key].shape[0]
1✔
116
                self.visualizer.showDataset(name=name)
1✔
117

118
        else:
119
            dataset = self._data[dataset_name]
1✔
120
            n_samples = len(dataset[list(dataset.keys())[0]])
1✔
121

122
            data_for_filter = []
1✔
123
            for i in range(n_samples):
1✔
124
                new_sample = {key: val[i] for key, val in dataset.items()}
1✔
125
                data_for_filter.append(new_sample)
1✔
126

127
            for idx, sample in enumerate(data_for_filter):
1✔
128
                if not filter_function(sample):
1✔
129
                    idx_to_remove.append(idx)
1✔
130

131
            for key in self._data[dataset_name].keys():
1✔
132
                self._data[dataset_name][key] = np.delete(self._data[dataset_name][key], idx_to_remove, axis=0)
1✔
133
                self._num_of_samples[dataset_name] = self._data[dataset_name][key].shape[0]
1✔
134
            self.visualizer.showDataset(name=dataset_name)
1✔
135

136
    @enforce_types
1✔
137
    def resamplingData(self, df:pd.DataFrame, *, scale:float = 1e9) -> None:
1✔
138
        """
139
        Resamples the DataFrame to a specified sample time.
140

141
        Parameters
142
        ----------
143
        df : pd.DataFrame
144
            The DataFrame to resample.
145
        scale : float, optional
146
            The scale factor to convert the sample time to nanoseconds. Default is 1e9
147

148
        Returns
149
        -------
150
        pd.DataFrame
151
            The resampled DataFrame.
152

153
        Raises
154
        ------
155
        TypeError
156
            If the DataFrame does not contain a time column or if the time column is not in datetime format.
157

158
        Examples
159
        --------
160
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
161
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
162
            :alt: Open in Colab
163

164
        Example usage:
165
            >>> model = Modely()
166
            >>> df = pd.DataFrame({'time': np.array(range(60), dtype=np.float32),'x': np.array(10*[10] + 20*[20] + 30*[30], dtype=np.float32)})
167
            >>> resampled_df = model.resamplingData(df, scale=1e9)
168
        """
169
        sample_time_ns = int(self._model_def.getSampleTime() * scale)
1✔
170
        method = 'linear'
1✔
171
        if type(df.index) is pd.DatetimeIndex:
1✔
172
            df = df.resample(f"{sample_time_ns}ns").interpolate(method=method)
1✔
173
        elif 'time' in df.columns:
1✔
174
            if not ptypes.is_datetime64_any_dtype(df['time']):
1✔
175
                df['time'] = pd.to_datetime(df['time'], unit='s')
1✔
176
            df = df.set_index('time', drop=True)
1✔
177
            df = df.resample(f"{sample_time_ns}ns").interpolate(method=method)
1✔
178
        else:
179
            raise TypeError("No time column found in the DataFrame. Please provide a time column for resampling.")
1✔
180
        return df
1✔
181
    
182
    @enforce_types
1✔
183
    def __get_format_idxs(self, format: list | None = None) -> dict:
1✔
184
        model_inputs = self._model_def['Inputs']
1✔
185
        format_idx = {}
1✔
186
        idx = 0
1✔
187
        for item in format:
1✔
188
            if isinstance(item, tuple):
1✔
189
                n_cols = None
1✔
190
                for key in item:
1✔
191
                    if key in model_inputs.keys():
1✔
192
                        if n_cols is None or n_cols == model_inputs[key]['dim']:
1✔
193
                            n_cols = model_inputs[key]['dim']
1✔
194
                        else:
195
                            raise ValueError(f'The variables {item} have different dimensionality.')
1✔
196
                        check(key not in format_idx, ValueError, f"The format '{format}' in not correct some variables appears more than once.")
1✔
197
                        format_idx[key] = (idx, idx + n_cols)
1✔
198
                if n_cols is not None:
1✔
199
                    idx += n_cols
1✔
200
                else:
201
                    idx += 1
1✔
202
            else:
203
                if item not in model_inputs.keys():
1✔
204
                    idx += 1
1✔
205
                    continue
1✔
206
                n_cols = model_inputs[item]['dim']
1✔
207
                check(item not in format_idx, ValueError,
1✔
208
                      f"The format '{format}' in not correct some variables appears more than once.")
209
                format_idx[item] = (idx, idx + n_cols)
1✔
210
                idx += n_cols
1✔
211
        return format_idx
1✔
212
    
213
    @enforce_types
1✔
214
    def __get_files(self, folder:str) -> list:
1✔
215
        try:
1✔
216
            _, _, files = next(os.walk(folder))
1✔
217
            files.sort()
1✔
218
        except StopIteration as e:
×
219
            check(False, StopIteration, f'ERROR: The path "{folder}" does not exist!')
×
220
            return []
×
221
        return files
1✔
222
    
223
    @enforce_types
1✔
224
    def __stack_arrays(self, data: dict) -> tuple:
1✔
225
        ## Convert lists to numpy arrays
226
        num_of_samples = {}
1✔
227
        for key in data:
1✔
228
            data[key] = np.stack(data[key])
1✔
229
            if self._model_def['Inputs'][key]['dim'] > 1:
1✔
230
                data[key] = np.array(data[key].tolist(), dtype=np.float64)
1✔
231
            if data[key].ndim == 2:  ## Add the sample dimension
1✔
232
                data[key] = np.expand_dims(data[key], axis=-1)
1✔
233
            if data[key].ndim > 3:
1✔
234
                data[key] = np.squeeze(data[key], axis=1)
×
235
            num_of_samples[key] = data[key].shape[0]
1✔
236
        return num_of_samples
1✔
237

238
    @enforce_types
1✔
239
    def loadData(self, name:str,
1✔
240
                 source: str | dict | pd.DataFrame, *,
241
                 format: list | None = None,
242
                 skiplines: int = 0,
243
                 delimiter: str = ',',
244
                 header: int | str | Sequence | None = None,
245
                 resampling: bool = False
246
                 ) -> None:
247
        """
248
        Loads data into the model. The data can be loaded from a directory path containing the csv files or from a crafted dataset.
249

250
        Parameters
251
        ----------
252
        name : str
253
            The name of the dataset.
254
        source : str or list or pd.DataFrame
255
            The source of the data. Can be a directory path containing the csv files or a custom dataset provided as a dictionary or a pandas DataFrame.
256
        format : list or None, optional
257
            The format of the data. When loading multiple csv files the format parameter will define how to read each column of the file. Default is None.
258
        skiplines : int, optional
259
            The number of lines to skip at the beginning of the file. Default is 0.
260
        delimiter : str, optional
261
            The delimiter used in the data files. Default is ','.
262
        header : list or None, optional
263
            The header of the data files. Default is None.
264

265
        Raises
266
        ------
267
        ValueError
268
            If the network is not neuralized.
269
            If the delimiter is not valid.
270

271
        Examples
272
        --------
273
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
274
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
275
            :alt: Open in Colab
276

277
        Example - load data from files:
278
            >>> x = Input('x')
279
            >>> y = Input('y')
280
            >>> out = Output('out',Fir(x.tw(0.05)))
281
            >>> test = Modely(visualizer=None)
282
            >>> test.addModel('example_model', out)
283
            >>> test.neuralizeModel(0.01)
284
            >>> data_struct = ['x', '', 'y']
285
            >>> test.loadData(name='example_dataset', source='path/to/data', format=data_struct)
286

287
        Example - load data from a crafted dataset:
288
            >>> x = Input('x')
289
            >>> y = Input('y')
290
            >>> out = Output('out',Fir(x.tw(0.05)))
291
            >>> test = Modely(visualizer=None)
292
            >>> test.addModel('example_model', out)
293
            >>> test.neuralizeModel(0.01)
294
            >>> data_x = np.array(range(10))
295
            >>> dataset = {'x': data_x, 'y': (2*data_x)}
296
            >>> test.loadData(name='example_dataset',source=dataset)
297
        """
298
        check(self.neuralized, ValueError, "The network is not neuralized.")
1✔
299
        check(delimiter in ['\t', '\n', ';', ',', ' '], ValueError, 'delimiter not valid!')
1✔
300

301
        json_inputs = self._model_def['Inputs']
1✔
302
        ## Initialize the dictionary containing the data
303
        check_names(name, self._data.keys(), f"Dataset")
1✔
304

305
        if type(source) is str:  ## we have a directory path containing the files
1✔
306
            ## collect column indexes
307
            format_idx = self.__get_format_idxs(format)
1✔
308
            ## add the dataset
309
            self._data[name] = {}
1✔
310
            ## Initialize each input key
311
            for key in format_idx.keys():
1✔
312
                self._data[name][key] = []
1✔
313
            ## obtain the file names
314
            files = self.__get_files(source)
1✔
315
            self._file_count = len(files)
1✔
316
            if self._file_count > 1:  ## Multifile
1✔
317
                self._multifile[name] = []
1✔
318

319
            ## Cycle through all the files
320
            for file in files:
1✔
321
                try:
1✔
322
                    ## read the csv
323
                    df = pd.read_csv(os.path.join(source, file), skiprows=skiplines, delimiter=delimiter, header=header)
1✔
324
                    if not all(df.iloc[0].apply(lambda x: isinstance(x, (int, float)))):
1✔
325
                        log.warning(f"The file {file} does not contain a numerical column.")
1✔
326
                    ## Resampling if the time column is provided (must be a Datetime object)
327
                    if resampling:
1✔
328
                        self.resamplingData(df)
×
329
                except:
×
UNCOV
330
                    log.warning(f'Cannot read file {os.path.join(source, file)}')
×
UNCOV
331
                    continue
×
332
                if self._file_count > 1:
1✔
333
                    self._multifile[name].append((self._multifile[name][-1] + (len(df) - self._max_n_samples + 1)) if self._multifile[name] else len(df) - self._max_n_samples + 1)
1✔
334
                ## Cycle through all the windows
335
                for key, idxs in format_idx.items():
1✔
336
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
1✔
337
                    ## Save as numpy array the data
338
                    data = df.iloc[:, idxs[0]:idxs[1]].to_numpy()
1✔
339
                    self._data[name][key] += [data[i - back:i + forw] for i in range(self._max_samples_backward, len(df) - self._max_samples_forward + 1)]
1✔
340
        else:  ## we have a crafted dataset
341
            ## add the dataset
342
            self._data[name] = {}
1✔
343
            self._file_count = 1
1✔
344
            if isinstance(source, dict):
1✔
345
                # Merge a list of inputs into a single dictionary
346
                for key in json_inputs.keys():
1✔
347
                    if key not in source.keys():
1✔
348
                        continue
1✔
349
                    self._data[name][key] = []  ## Initialize the dataset
1✔
350
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
1✔
351
                    for idx in range(len(source[key]) - self._max_n_samples + 1):
1✔
352
                        self._data[name][key].append(source[key][idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)])
1✔
353
            else:
354
                if resampling:
1✔
355
                    source = self.resamplingData(source)
1✔
356
                for key in json_inputs.keys():
1✔
357
                    if key not in source.columns:
1✔
UNCOV
358
                        continue
×
359
                    self._data[name][key] = []  ## Initialize the dataset
1✔
360
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
1✔
361
                    for idx in range(len(source) - self._max_n_samples + 1):
1✔
362
                        window = source[key].iloc[idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)]
1✔
363
                        self._data[name][key].append(window.to_numpy())
1✔
364

365
        ## Convert lists to numpy arrays
366
        num_of_samples = self.__stack_arrays(self._data[name])
1✔
367
        # Check dim of the samples
368
        check(len(set(num_of_samples.values())) == 1, ValueError, f"The number of the sample of the dataset {name} are not the same for all input in the dataset: {num_of_samples}")
1✔
369
        self._num_of_samples[name] = num_of_samples[list(num_of_samples.keys())[0]]
1✔
370
        ## Set the Loaded flag to True
371
        self._data_loaded = True
1✔
372
        ## Update the number of datasets loaded
373
        self.__n_datasets = len(self._data.keys())
1✔
374
        self.__datasets_loaded.add(name)
1✔
375
        ## Show the dataset
376
        self.visualizer.showDataset(name=name)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc