• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tonegas / nnodely / 17272281697

27 Aug 2025 04:09PM UTC coverage: 97.727% (-0.04%) from 97.767%
17272281697

push

github

tonegas
minor chages

8 of 12 new or added lines in 2 files covered. (66.67%)

14 existing lines in 4 files now uncovered.

12727 of 13023 relevant lines covered (97.73%)

0.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.64
/nnodely/operators/loader.py
1
import os, random
1✔
2

3
import pandas as pd
1✔
4
import numpy as np
1✔
5
import pandas.api.types as ptypes
1✔
6
from collections.abc import Sequence, Callable
1✔
7

8
from nnodely.basic.relation import check_names
1✔
9
from nnodely.operators.network import Network
1✔
10
from nnodely.support.utils import check, log, enforce_types, NP_DTYPE
1✔
11

12
class Loader(Network):
1✔
13
    @enforce_types
1✔
14
    def __init__(self):
1✔
15
        check(type(self) is not Loader, TypeError, "Loader class cannot be instantiated directly")
1✔
16
        super().__init__()
1✔
17

18
        # Dataaset Parameters
19
        self.__n_datasets = 0
1✔
20
        self.__datasets_loaded = set()
1✔
21

22
    @enforce_types
1✔
23
    def getSamples(self, dataset:str, *, index:int|None = None, window:int=1) -> dict:
1✔
24
        """
25
        Retrieves a window of samples from a given dataset.
26

27
        Parameters
28
        ----------
29
        dataset : str
30
            The name of the dataset to retrieve samples from.
31
        index : int, optional
32
            The starting index of the samples. If None, a random index is chosen. Default is None.
33
        window : int, optional
34
            The number of consecutive samples to retrieve. Default is 1.
35

36
        Returns
37
        -------
38
        dict
39
            A dictionary containing the retrieved samples. The keys are input names, and the values are lists of samples.
40

41
        Raises
42
        ------
43
        ValueError
44
            If the dataset is not loaded.
45

46
        Examples
47
        --------
48
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
49
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
50
            :alt: Open in Colab
51

52
        Example usage:
53
            >>> model = Modely()
54
            >>> model.loadData('dataset_name')
55
            >>> samples = model.getSamples('dataset_name', index=10, window=5)
56
        """
57
        if index is None:
1✔
58
            index = random.randint(0, self._num_of_samples[dataset] - window)
1✔
59
        check(self._data_loaded, ValueError, 'The Dataset must first be loaded using <loadData> function!')
1✔
60
        if self._data_loaded:
1✔
61
            result_dict = {}
1✔
62
            for key in self._model_def['Inputs'].keys():
1✔
63
                result_dict[key] = []
1✔
64
            for idx in range(window):
1✔
65
                for key ,samples in self._data[dataset].items():
1✔
66
                    if key in self._model_def['Inputs'].keys():
1✔
67
                        result_dict[key].append(samples[index+idx])
1✔
68
            return result_dict
1✔
69

70
    @enforce_types
1✔
71
    def filterData(self, filter_function:Callable, dataset_name:str|None = None) -> None:
1✔
72
        """
73
        Filters the data in the dataset using the provided filter function.
74

75
        Parameters
76
        ----------
77
        filter_function : Callable
78
            A function that takes a sample as input and returns True if the sample should be kept, and False if it should be removed.
79
        dataset_name : str or None, optional
80
            The name of the dataset to filter. If None, all datasets are filtered. Default is None.
81

82
        Examples
83
        --------
84
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
85
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
86
            :alt: Open in Colab
87

88
        Example usage:
89
            >>> model = Modely()
90
            >>> model.loadData('dataset_name', 'path/to/data')
91
            >>> def filter_fn(sample):
92
            >>>     return sample['input1'] > 0
93
            >>> model.filterData(filter_fn, 'dataset_name')
94
        """
95
        idx_to_remove = []
1✔
96
        if dataset_name is None:
1✔
97
            for name in self._data.keys():
1✔
98
                dataset = self._data[name]
1✔
99
                n_samples = len(dataset[list(dataset.keys())[0]])
1✔
100

101
                data_for_filter = []
1✔
102
                for i in range(n_samples):
1✔
103
                    new_sample = {key: val[i] for key, val in dataset.items()}
1✔
104
                    data_for_filter.append(new_sample)
1✔
105

106
                for idx, sample in enumerate(data_for_filter):
1✔
107
                    if not filter_function(sample):
1✔
108
                        idx_to_remove.append(idx)
1✔
109

110
                for key in self._data[name].keys():
1✔
111
                    self._data[name][key] = np.delete(self._data[name][key], idx_to_remove, axis=0)
1✔
112
                    self._num_of_samples[name] = self._data[name][key].shape[0]
1✔
113
                self.visualizer.showDataset(name=name)
1✔
114

115
        else:
116
            dataset = self._data[dataset_name]
1✔
117
            n_samples = len(dataset[list(dataset.keys())[0]])
1✔
118

119
            data_for_filter = []
1✔
120
            for i in range(n_samples):
1✔
121
                new_sample = {key: val[i] for key, val in dataset.items()}
1✔
122
                data_for_filter.append(new_sample)
1✔
123

124
            for idx, sample in enumerate(data_for_filter):
1✔
125
                if not filter_function(sample):
1✔
126
                    idx_to_remove.append(idx)
1✔
127

128
            for key in self._data[dataset_name].keys():
1✔
129
                self._data[dataset_name][key] = np.delete(self._data[dataset_name][key], idx_to_remove, axis=0)
1✔
130
                self._num_of_samples[dataset_name] = self._data[dataset_name][key].shape[0]
1✔
131
            self.visualizer.showDataset(name=dataset_name)
1✔
132

133
    @enforce_types
1✔
134
    def resamplingData(self, df:pd.DataFrame, *, scale:float = 1e9) -> None:
1✔
135
        """
136
        Resamples the DataFrame to a specified sample time.
137

138
        Parameters
139
        ----------
140
        df : pd.DataFrame
141
            The DataFrame to resample.
142
        scale : float, optional
143
            The scale factor to convert the sample time to nanoseconds. Default is 1e9
144

145
        Returns
146
        -------
147
        pd.DataFrame
148
            The resampled DataFrame.
149

150
        Raises
151
        ------
152
        TypeError
153
            If the DataFrame does not contain a time column or if the time column is not in datetime format.
154

155
        Examples
156
        --------
157
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
158
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
159
            :alt: Open in Colab
160

161
        Example usage:
162
            >>> model = Modely()
163
            >>> df = pd.DataFrame({'time': np.array(range(60), dtype=np.float32),'x': np.array(10*[10] + 20*[20] + 30*[30], dtype=np.float32)})
164
            >>> resampled_df = model.resamplingData(df, scale=1e9)
165
        """
166
        sample_time_ns = int(self._model_def.getSampleTime() * scale)
1✔
167
        method = 'linear'
1✔
168
        if type(df.index) is pd.DatetimeIndex:
1✔
169
            df = df.resample(f"{sample_time_ns}ns").interpolate(method=method)
1✔
170
        elif 'time' in df.columns:
1✔
171
            if not ptypes.is_datetime64_any_dtype(df['time']):
1✔
172
                df['time'] = pd.to_datetime(df['time'], unit='s')
1✔
173
            df = df.set_index('time', drop=True)
1✔
174
            df = df.resample(f"{sample_time_ns}ns").interpolate(method=method)
1✔
175
        else:
176
            raise TypeError("No time column found in the DataFrame. Please provide a time column for resampling.")
1✔
177
        return df
1✔
178
    
179
    @enforce_types
1✔
180
    def __get_format_idxs(self, format: list | None = None) -> dict:
1✔
181
        model_inputs = self._model_def['Inputs']
1✔
182
        format_idx = {}
1✔
183
        idx = 0
1✔
184
        for item in format:
1✔
185
            if isinstance(item, tuple):
1✔
186
                for key in item:
×
187
                    if key not in model_inputs.keys():
×
188
                        idx += 1
×
189
                        break
×
190
                    n_cols = model_inputs[key]['dim']
×
191
                    format_idx[key] = (idx, idx + n_cols)
×
192
                idx += n_cols
×
193
            else:
194
                if item not in model_inputs.keys():
1✔
195
                    idx += 1
1✔
196
                    continue
1✔
197
                n_cols = model_inputs[item]['dim']
1✔
198
                format_idx[item] = (idx, idx + n_cols)
1✔
199
                idx += n_cols
1✔
200
        return format_idx
1✔
201
    
202
    @enforce_types
1✔
203
    def __get_files(self, folder:str) -> list:
1✔
204
        try:
1✔
205
            _, _, files = next(os.walk(folder))
1✔
206
            files.sort()
1✔
207
        except StopIteration as e:
×
208
            check(False, StopIteration, f'ERROR: The path "{folder}" does not exist!')
×
209
            return []
×
210
        return files
1✔
211
    
212
    @enforce_types
1✔
213
    def __stack_arrays(self, data: dict) -> tuple:
1✔
214
        ## Convert lists to numpy arrays
215
        num_of_samples = {}
1✔
216
        for key in data:
1✔
217
            data[key] = np.stack(data[key])
1✔
218
            if self._model_def['Inputs'][key]['dim'] > 1:
1✔
219
                data[key] = np.array(data[key].tolist(), dtype=np.float64)
1✔
220
            if data[key].ndim == 2:  ## Add the sample dimension
1✔
221
                data[key] = np.expand_dims(data[key], axis=-1)
1✔
222
            if data[key].ndim > 3:
1✔
223
                data[key] = np.squeeze(data[key], axis=1)
×
224
            num_of_samples[key] = data[key].shape[0]
1✔
225
        return num_of_samples
1✔
226

227
    @enforce_types
1✔
228
    def loadData(self, name:str,
1✔
229
                 source: str | dict | pd.DataFrame, *,
230
                 format: list | None = None,
231
                 skiplines: int = 0,
232
                 delimiter: str = ',',
233
                 header: int | str | Sequence | None = None,
234
                 resampling: bool = False
235
                 ) -> None:
236
        """
237
        Loads data into the model. The data can be loaded from a directory path containing the csv files or from a crafted dataset.
238

239
        Parameters
240
        ----------
241
        name : str
242
            The name of the dataset.
243
        source : str or list or pd.DataFrame
244
            The source of the data. Can be a directory path containing the csv files or a custom dataset provided as a dictionary or a pandas DataFrame.
245
        format : list or None, optional
246
            The format of the data. When loading multiple csv files the format parameter will define how to read each column of the file. Default is None.
247
        skiplines : int, optional
248
            The number of lines to skip at the beginning of the file. Default is 0.
249
        delimiter : str, optional
250
            The delimiter used in the data files. Default is ','.
251
        header : list or None, optional
252
            The header of the data files. Default is None.
253

254
        Raises
255
        ------
256
        ValueError
257
            If the network is not neuralized.
258
            If the delimiter is not valid.
259

260
        Examples
261
        --------
262
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
263
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
264
            :alt: Open in Colab
265

266
        Example - load data from files:
267
            >>> x = Input('x')
268
            >>> y = Input('y')
269
            >>> out = Output('out',Fir(x.tw(0.05)))
270
            >>> test = Modely(visualizer=None)
271
            >>> test.addModel('example_model', out)
272
            >>> test.neuralizeModel(0.01)
273
            >>> data_struct = ['x', '', 'y']
274
            >>> test.loadData(name='example_dataset', source='path/to/data', format=data_struct)
275

276
        Example - load data from a crafted dataset:
277
            >>> x = Input('x')
278
            >>> y = Input('y')
279
            >>> out = Output('out',Fir(x.tw(0.05)))
280
            >>> test = Modely(visualizer=None)
281
            >>> test.addModel('example_model', out)
282
            >>> test.neuralizeModel(0.01)
283
            >>> data_x = np.array(range(10))
284
            >>> dataset = {'x': data_x, 'y': (2*data_x)}
285
            >>> test.loadData(name='example_dataset',source=dataset)
286
        """
287
        check(self.neuralized, ValueError, "The network is not neuralized.")
1✔
288
        check(delimiter in ['\t', '\n', ';', ',', ' '], ValueError, 'delimiter not valid!')
1✔
289

290
        json_inputs = self._model_def['Inputs']
1✔
291
        ## Initialize the dictionary containing the data
292
        check_names(name, self._data.keys(), f"Dataset")
1✔
293
        self._data[name] = {}
1✔
294

295
        if type(source) is str:  ## we have a directory path containing the files
1✔
296
            ## collect column indexes
297
            format_idx = self.__get_format_idxs(format)
1✔
298
            ## Initialize each input key
299
            for key in format_idx.keys():
1✔
300
                self._data[name][key] = []
1✔
301
            ## obtain the file names
302
            files = self.__get_files(source)
1✔
303
            self._file_count = len(files)
1✔
304
            if self._file_count > 1:  ## Multifile
1✔
305
                self._multifile[name] = []
1✔
306

307
            ## Cycle through all the files
308
            for file in files:
1✔
309
                try:
1✔
310
                    ## read the csv
311
                    df = pd.read_csv(os.path.join(source, file), skiprows=skiplines, delimiter=delimiter, header=header)
1✔
312
                    ## Resampling if the time column is provided (must be a Datetime object)
313
                    if resampling:
1✔
314
                        self.resamplingData(df)
×
315
                except:
×
316
                    log.warning(f'Cannot read file {os.path.join(source, file)}')
×
UNCOV
317
                    continue
×
318
                if self._file_count > 1:
1✔
319
                    self._multifile[name].append((self._multifile[name][-1] + (len(df) - self._max_n_samples + 1)) if self._multifile[name] else len(df) - self._max_n_samples + 1)
1✔
320
                ## Cycle through all the windows
321
                for key, idxs in format_idx.items():
1✔
322
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
1✔
323
                    ## Save as numpy array the data
324
                    data = df.iloc[:, idxs[0]:idxs[1]].to_numpy()
1✔
325
                    self._data[name][key] += [data[i - back:i + forw] for i in range(self._max_samples_backward, len(df) - self._max_samples_forward + 1)]
1✔
326
        else:  ## we have a crafted dataset
327
            self._file_count = 1
1✔
328
            if isinstance(source, dict):
1✔
329
                # Merge a list of inputs into a single dictionary
330
                for key in json_inputs.keys():
1✔
331
                    if key not in source.keys():
1✔
332
                        continue
1✔
333
                    self._data[name][key] = []  ## Initialize the dataset
1✔
334
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
1✔
335
                    for idx in range(len(source[key]) - self._max_n_samples + 1):
1✔
336
                        self._data[name][key].append(source[key][idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)])
1✔
337
            else:
338
                if resampling:
1✔
339
                    source = self.resamplingData(source)
1✔
340
                for key in json_inputs.keys():
1✔
341
                    if key not in source.columns:
1✔
UNCOV
342
                        continue
×
343
                    self._data[name][key] = []  ## Initialize the dataset
1✔
344
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
1✔
345
                    for idx in range(len(source) - self._max_n_samples + 1):
1✔
346
                        window = source[key].iloc[idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)]
1✔
347
                        self._data[name][key].append(window.to_numpy())
1✔
348

349
        ## Convert lists to numpy arrays
350
        num_of_samples = self.__stack_arrays(self._data[name])
1✔
351
        # Check dim of the samples
352
        check(len(set(num_of_samples.values())) == 1, ValueError, f"The number of the sample of the dataset {name} are not the same for all input in the dataset: {num_of_samples}")
1✔
353
        self._num_of_samples[name] = num_of_samples[list(num_of_samples.keys())[0]]
1✔
354
        ## Set the Loaded flag to True
355
        self._data_loaded = True
1✔
356
        ## Update the number of datasets loaded
357
        self.__n_datasets = len(self._data.keys())
1✔
358
        self.__datasets_loaded.add(name)
1✔
359
        ## Show the dataset
360
        self.visualizer.showDataset(name=name)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc