• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tonegas / nnodely / 18305449975

07 Oct 2025 07:32AM UTC coverage: 97.691% (-0.04%) from 97.727%
18305449975

push

github

tonegas
Modified the version

1 of 1 new or added line in 1 file covered. (100.0%)

38 existing lines in 5 files now uncovered.

12733 of 13034 relevant lines covered (97.69%)

0.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.75
/nnodely/operators/loader.py
1
import os, random
1✔
2

3
import pandas as pd
1✔
4
import numpy as np
1✔
5
import pandas.api.types as ptypes
1✔
6
from collections.abc import Sequence, Callable
1✔
7

8
from nnodely.basic.relation import check_names
1✔
9
from nnodely.operators.network import Network
1✔
10
from nnodely.support.utils import check, enforce_types
1✔
11

12
from nnodely.support.logger import logging, nnLogger
1✔
13
log = nnLogger(__name__, logging.WARNING)
1✔
14

15
class Loader(Network):
1✔
16
    @enforce_types
1✔
17
    def __init__(self):
1✔
18
        check(type(self) is not Loader, TypeError, "Loader class cannot be instantiated directly")
1✔
19
        super().__init__()
1✔
20

21
        # Dataaset Parameters
22
        self.__n_datasets = 0
1✔
23
        self.__datasets_loaded = set()
1✔
24

25
    @enforce_types
1✔
26
    def getSamples(self, dataset:str, *, index:int|None = None, window:int=1) -> dict:
1✔
27
        """
28
        Retrieves a window of samples from a given dataset.
29

30
        Parameters
31
        ----------
32
        dataset : str
33
            The name of the dataset to retrieve samples from.
34
        index : int, optional
35
            The starting index of the samples. If None, a random index is chosen. Default is None.
36
        window : int, optional
37
            The number of consecutive samples to retrieve. Default is 1.
38

39
        Returns
40
        -------
41
        dict
42
            A dictionary containing the retrieved samples. The keys are input names, and the values are lists of samples.
43

44
        Raises
45
        ------
46
        ValueError
47
            If the dataset is not loaded.
48

49
        Examples
50
        --------
51
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
52
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
53
            :alt: Open in Colab
54

55
        Example usage:
56
            >>> model = Modely()
57
            >>> model.loadData('dataset_name')
58
            >>> samples = model.getSamples('dataset_name', index=10, window=5)
59
        """
60
        if index is None:
1✔
61
            index = random.randint(0, self._num_of_samples[dataset] - window)
1✔
62
        check(self._data_loaded, ValueError, 'The Dataset must first be loaded using <loadData> function!')
1✔
63
        if self._data_loaded:
1✔
64
            result_dict = {}
1✔
65
            for key in self._model_def['Inputs'].keys():
1✔
66
                result_dict[key] = []
1✔
67
            for idx in range(window):
1✔
68
                for key ,samples in self._data[dataset].items():
1✔
69
                    if key in self._model_def['Inputs'].keys():
1✔
70
                        result_dict[key].append(samples[index+idx])
1✔
71
            return result_dict
1✔
72

73
    @enforce_types
1✔
74
    def filterData(self, filter_function:Callable, dataset_name:str|None = None) -> None:
1✔
75
        """
76
        Filters the data in the dataset using the provided filter function.
77

78
        Parameters
79
        ----------
80
        filter_function : Callable
81
            A function that takes a sample as input and returns True if the sample should be kept, and False if it should be removed.
82
        dataset_name : str or None, optional
83
            The name of the dataset to filter. If None, all datasets are filtered. Default is None.
84

85
        Examples
86
        --------
87
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
88
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
89
            :alt: Open in Colab
90

91
        Example usage:
92
            >>> model = Modely()
93
            >>> model.loadData('dataset_name', 'path/to/data')
94
            >>> def filter_fn(sample):
95
            >>>     return sample['input1'] > 0
96
            >>> model.filterData(filter_fn, 'dataset_name')
97
        """
98
        idx_to_remove = []
1✔
99
        if dataset_name is None:
1✔
100
            for name in self._data.keys():
1✔
101
                dataset = self._data[name]
1✔
102
                n_samples = len(dataset[list(dataset.keys())[0]])
1✔
103

104
                data_for_filter = []
1✔
105
                for i in range(n_samples):
1✔
106
                    new_sample = {key: val[i] for key, val in dataset.items()}
1✔
107
                    data_for_filter.append(new_sample)
1✔
108

109
                for idx, sample in enumerate(data_for_filter):
1✔
110
                    if not filter_function(sample):
1✔
111
                        idx_to_remove.append(idx)
1✔
112

113
                for key in self._data[name].keys():
1✔
114
                    self._data[name][key] = np.delete(self._data[name][key], idx_to_remove, axis=0)
1✔
115
                    self._num_of_samples[name] = self._data[name][key].shape[0]
1✔
116
                self.visualizer.showDataset(name=name)
1✔
117

118
        else:
119
            dataset = self._data[dataset_name]
1✔
120
            n_samples = len(dataset[list(dataset.keys())[0]])
1✔
121

122
            data_for_filter = []
1✔
123
            for i in range(n_samples):
1✔
124
                new_sample = {key: val[i] for key, val in dataset.items()}
1✔
125
                data_for_filter.append(new_sample)
1✔
126

127
            for idx, sample in enumerate(data_for_filter):
1✔
128
                if not filter_function(sample):
1✔
129
                    idx_to_remove.append(idx)
1✔
130

131
            for key in self._data[dataset_name].keys():
1✔
132
                self._data[dataset_name][key] = np.delete(self._data[dataset_name][key], idx_to_remove, axis=0)
1✔
133
                self._num_of_samples[dataset_name] = self._data[dataset_name][key].shape[0]
1✔
134
            self.visualizer.showDataset(name=dataset_name)
1✔
135

136
    @enforce_types
1✔
137
    def resamplingData(self, df:pd.DataFrame, *, scale:float = 1e9) -> None:
1✔
138
        """
139
        Resamples the DataFrame to a specified sample time.
140

141
        Parameters
142
        ----------
143
        df : pd.DataFrame
144
            The DataFrame to resample.
145
        scale : float, optional
146
            The scale factor to convert the sample time to nanoseconds. Default is 1e9
147

148
        Returns
149
        -------
150
        pd.DataFrame
151
            The resampled DataFrame.
152

153
        Raises
154
        ------
155
        TypeError
156
            If the DataFrame does not contain a time column or if the time column is not in datetime format.
157

158
        Examples
159
        --------
160
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
161
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
162
            :alt: Open in Colab
163

164
        Example usage:
165
            >>> model = Modely()
166
            >>> df = pd.DataFrame({'time': np.array(range(60), dtype=np.float32),'x': np.array(10*[10] + 20*[20] + 30*[30], dtype=np.float32)})
167
            >>> resampled_df = model.resamplingData(df, scale=1e9)
168
        """
169
        sample_time_ns = int(self._model_def.getSampleTime() * scale)
1✔
170
        method = 'linear'
1✔
171
        if type(df.index) is pd.DatetimeIndex:
1✔
172
            df = df.resample(f"{sample_time_ns}ns").interpolate(method=method)
1✔
173
        elif 'time' in df.columns:
1✔
174
            if not ptypes.is_datetime64_any_dtype(df['time']):
1✔
175
                df['time'] = pd.to_datetime(df['time'], unit='s')
1✔
176
            df = df.set_index('time', drop=True)
1✔
177
            df = df.resample(f"{sample_time_ns}ns").interpolate(method=method)
1✔
178
        else:
179
            raise TypeError("No time column found in the DataFrame. Please provide a time column for resampling.")
1✔
180
        return df
1✔
181
    
182
    @enforce_types
1✔
183
    def __get_format_idxs(self, format: list | None = None) -> dict:
1✔
184
        model_inputs = self._model_def['Inputs']
1✔
185
        format_idx = {}
1✔
186
        idx = 0
1✔
187
        for item in format:
1✔
188
            if isinstance(item, tuple):
1✔
189
                for key in item:
×
190
                    if key not in model_inputs.keys():
×
191
                        idx += 1
×
192
                        break
×
UNCOV
193
                    n_cols = model_inputs[key]['dim']
×
UNCOV
194
                    format_idx[key] = (idx, idx + n_cols)
×
UNCOV
195
                idx += n_cols
×
196
            else:
197
                if item not in model_inputs.keys():
1✔
198
                    idx += 1
1✔
199
                    continue
1✔
200
                n_cols = model_inputs[item]['dim']
1✔
201
                format_idx[item] = (idx, idx + n_cols)
1✔
202
                idx += n_cols
1✔
203
        return format_idx
1✔
204
    
205
    @enforce_types
1✔
206
    def __get_files(self, folder:str) -> list:
1✔
207
        try:
1✔
208
            _, _, files = next(os.walk(folder))
1✔
209
            files.sort()
1✔
UNCOV
210
        except StopIteration as e:
×
UNCOV
211
            check(False, StopIteration, f'ERROR: The path "{folder}" does not exist!')
×
UNCOV
212
            return []
×
213
        return files
1✔
214
    
215
    @enforce_types
1✔
216
    def __stack_arrays(self, data: dict) -> tuple:
1✔
217
        ## Convert lists to numpy arrays
218
        num_of_samples = {}
1✔
219
        for key in data:
1✔
220
            data[key] = np.stack(data[key])
1✔
221
            if self._model_def['Inputs'][key]['dim'] > 1:
1✔
222
                data[key] = np.array(data[key].tolist(), dtype=np.float64)
1✔
223
            if data[key].ndim == 2:  ## Add the sample dimension
1✔
224
                data[key] = np.expand_dims(data[key], axis=-1)
1✔
225
            if data[key].ndim > 3:
1✔
UNCOV
226
                data[key] = np.squeeze(data[key], axis=1)
×
227
            num_of_samples[key] = data[key].shape[0]
1✔
228
        return num_of_samples
1✔
229

230
    @enforce_types
1✔
231
    def loadData(self, name:str,
1✔
232
                 source: str | dict | pd.DataFrame, *,
233
                 format: list | None = None,
234
                 skiplines: int = 0,
235
                 delimiter: str = ',',
236
                 header: int | str | Sequence | None = None,
237
                 resampling: bool = False
238
                 ) -> None:
239
        """
240
        Loads data into the model. The data can be loaded from a directory path containing the csv files or from a crafted dataset.
241

242
        Parameters
243
        ----------
244
        name : str
245
            The name of the dataset.
246
        source : str or list or pd.DataFrame
247
            The source of the data. Can be a directory path containing the csv files or a custom dataset provided as a dictionary or a pandas DataFrame.
248
        format : list or None, optional
249
            The format of the data. When loading multiple csv files the format parameter will define how to read each column of the file. Default is None.
250
        skiplines : int, optional
251
            The number of lines to skip at the beginning of the file. Default is 0.
252
        delimiter : str, optional
253
            The delimiter used in the data files. Default is ','.
254
        header : list or None, optional
255
            The header of the data files. Default is None.
256

257
        Raises
258
        ------
259
        ValueError
260
            If the network is not neuralized.
261
            If the delimiter is not valid.
262

263
        Examples
264
        --------
265
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
266
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
267
            :alt: Open in Colab
268

269
        Example - load data from files:
270
            >>> x = Input('x')
271
            >>> y = Input('y')
272
            >>> out = Output('out',Fir(x.tw(0.05)))
273
            >>> test = Modely(visualizer=None)
274
            >>> test.addModel('example_model', out)
275
            >>> test.neuralizeModel(0.01)
276
            >>> data_struct = ['x', '', 'y']
277
            >>> test.loadData(name='example_dataset', source='path/to/data', format=data_struct)
278

279
        Example - load data from a crafted dataset:
280
            >>> x = Input('x')
281
            >>> y = Input('y')
282
            >>> out = Output('out',Fir(x.tw(0.05)))
283
            >>> test = Modely(visualizer=None)
284
            >>> test.addModel('example_model', out)
285
            >>> test.neuralizeModel(0.01)
286
            >>> data_x = np.array(range(10))
287
            >>> dataset = {'x': data_x, 'y': (2*data_x)}
288
            >>> test.loadData(name='example_dataset',source=dataset)
289
        """
290
        check(self.neuralized, ValueError, "The network is not neuralized.")
1✔
291
        check(delimiter in ['\t', '\n', ';', ',', ' '], ValueError, 'delimiter not valid!')
1✔
292

293
        json_inputs = self._model_def['Inputs']
1✔
294
        ## Initialize the dictionary containing the data
295
        check_names(name, self._data.keys(), f"Dataset")
1✔
296
        self._data[name] = {}
1✔
297

298
        if type(source) is str:  ## we have a directory path containing the files
1✔
299
            ## collect column indexes
300
            format_idx = self.__get_format_idxs(format)
1✔
301
            ## Initialize each input key
302
            for key in format_idx.keys():
1✔
303
                self._data[name][key] = []
1✔
304
            ## obtain the file names
305
            files = self.__get_files(source)
1✔
306
            self._file_count = len(files)
1✔
307
            if self._file_count > 1:  ## Multifile
1✔
308
                self._multifile[name] = []
1✔
309

310
            ## Cycle through all the files
311
            for file in files:
1✔
312
                try:
1✔
313
                    ## read the csv
314
                    df = pd.read_csv(os.path.join(source, file), skiprows=skiplines, delimiter=delimiter, header=header)
1✔
315
                    ## Resampling if the time column is provided (must be a Datetime object)
316
                    if resampling:
1✔
317
                        self.resamplingData(df)
×
UNCOV
318
                except:
×
UNCOV
319
                    log.warning(f'Cannot read file {os.path.join(source, file)}')
×
UNCOV
320
                    continue
×
321
                if self._file_count > 1:
1✔
322
                    self._multifile[name].append((self._multifile[name][-1] + (len(df) - self._max_n_samples + 1)) if self._multifile[name] else len(df) - self._max_n_samples + 1)
1✔
323
                ## Cycle through all the windows
324
                for key, idxs in format_idx.items():
1✔
325
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
1✔
326
                    ## Save as numpy array the data
327
                    data = df.iloc[:, idxs[0]:idxs[1]].to_numpy()
1✔
328
                    self._data[name][key] += [data[i - back:i + forw] for i in range(self._max_samples_backward, len(df) - self._max_samples_forward + 1)]
1✔
329
        else:  ## we have a crafted dataset
330
            self._file_count = 1
1✔
331
            if isinstance(source, dict):
1✔
332
                # Merge a list of inputs into a single dictionary
333
                for key in json_inputs.keys():
1✔
334
                    if key not in source.keys():
1✔
335
                        continue
1✔
336
                    self._data[name][key] = []  ## Initialize the dataset
1✔
337
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
1✔
338
                    for idx in range(len(source[key]) - self._max_n_samples + 1):
1✔
339
                        self._data[name][key].append(source[key][idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)])
1✔
340
            else:
341
                if resampling:
1✔
342
                    source = self.resamplingData(source)
1✔
343
                for key in json_inputs.keys():
1✔
344
                    if key not in source.columns:
1✔
UNCOV
345
                        continue
×
346
                    self._data[name][key] = []  ## Initialize the dataset
1✔
347
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
1✔
348
                    for idx in range(len(source) - self._max_n_samples + 1):
1✔
349
                        window = source[key].iloc[idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)]
1✔
350
                        self._data[name][key].append(window.to_numpy())
1✔
351

352
        ## Convert lists to numpy arrays
353
        num_of_samples = self.__stack_arrays(self._data[name])
1✔
354
        # Check dim of the samples
355
        check(len(set(num_of_samples.values())) == 1, ValueError, f"The number of the sample of the dataset {name} are not the same for all input in the dataset: {num_of_samples}")
1✔
356
        self._num_of_samples[name] = num_of_samples[list(num_of_samples.keys())[0]]
1✔
357
        ## Set the Loaded flag to True
358
        self._data_loaded = True
1✔
359
        ## Update the number of datasets loaded
360
        self.__n_datasets = len(self._data.keys())
1✔
361
        self.__datasets_loaded.add(name)
1✔
362
        ## Show the dataset
363
        self.visualizer.showDataset(name=name)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc