• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tonegas / nnodely / 16502811447

24 Jul 2025 04:44PM UTC coverage: 97.767% (+0.1%) from 97.651%
16502811447

push

github

web-flow
New version 1.5.0

This pull request introduces version 1.5.0 of **nnodely**, featuring several updates:
1. Improved clarity of documentation and examples.
2. Support for managing multi-dataset features is now available.
3. DataFrames can now be used to create datasets.
4. Datasets can now be resampled.
5. Random data training has been fixed for both classic and recurrent training.
6. The `state` variable has been removed.
7. It is now possible to add or remove a connection or a closed loop.
8. Partial models can now be exported.
9. The `train` function and the result analysis have been separated.
10. A new function, `trainAndAnalyse`, is now available.
11. The report now works across all network types.
12. The training function code has been reorganized.

2901 of 2967 new or added lines in 53 files covered. (97.78%)

16 existing lines in 6 files now uncovered.

12652 of 12941 relevant lines covered (97.77%)

0.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.59
/nnodely/operators/loader.py
1
import os, random
1✔
2

3
import pandas as pd
1✔
4
import numpy as np
1✔
5
import pandas.api.types as ptypes
1✔
6
from collections.abc import Sequence, Callable
1✔
7

8
from nnodely.basic.relation import check_names
1✔
9
from nnodely.operators.network import Network
1✔
10
from nnodely.support.utils import check, log, enforce_types, NP_DTYPE
1✔
11

12
class Loader(Network):
1✔
13
    @enforce_types
1✔
14
    def __init__(self):
1✔
15
        check(type(self) is not Loader, TypeError, "Loader class cannot be instantiated directly")
1✔
16
        super().__init__()
1✔
17

18
        # Dataaset Parameters
19
        self.__n_datasets = 0
1✔
20
        self.__datasets_loaded = set()
1✔
21

22
    @enforce_types
1✔
23
    def getSamples(self, dataset:str, *, index:int|None = None, window:int=1) -> dict:
1✔
24
        """
25
        Retrieves a window of samples from a given dataset.
26

27
        Parameters
28
        ----------
29
        dataset : str
30
            The name of the dataset to retrieve samples from.
31
        index : int, optional
32
            The starting index of the samples. If None, a random index is chosen. Default is None.
33
        window : int, optional
34
            The number of consecutive samples to retrieve. Default is 1.
35

36
        Returns
37
        -------
38
        dict
39
            A dictionary containing the retrieved samples. The keys are input names, and the values are lists of samples.
40

41
        Raises
42
        ------
43
        ValueError
44
            If the dataset is not loaded.
45

46
        Examples
47
        --------
48
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
49
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
50
            :alt: Open in Colab
51

52
        Example usage:
53
            >>> model = Modely()
54
            >>> model.loadData('dataset_name')
55
            >>> samples = model.getSamples('dataset_name', index=10, window=5)
56
        """
57
        if index is None:
1✔
58
            index = random.randint(0, self._num_of_samples[dataset] - window)
1✔
59
        check(self._data_loaded, ValueError, 'The Dataset must first be loaded using <loadData> function!')
1✔
60
        if self._data_loaded:
1✔
61
            result_dict = {}
1✔
62
            for key in self._model_def['Inputs'].keys():
1✔
63
                result_dict[key] = []
1✔
64
            for idx in range(window):
1✔
65
                for key ,samples in self._data[dataset].items():
1✔
66
                    if key in self._model_def['Inputs'].keys():
1✔
67
                        result_dict[key].append(samples[index+idx])
1✔
68
            return result_dict
1✔
69

70
    @enforce_types
1✔
71
    def filterData(self, filter_function:Callable, dataset_name:str|None = None) -> None:
1✔
72
        """
73
        Filters the data in the dataset using the provided filter function.
74

75
        Parameters
76
        ----------
77
        filter_function : Callable
78
            A function that takes a sample as input and returns True if the sample should be kept, and False if it should be removed.
79
        dataset_name : str or None, optional
80
            The name of the dataset to filter. If None, all datasets are filtered. Default is None.
81

82
        Examples
83
        --------
84
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
85
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
86
            :alt: Open in Colab
87

88
        Example usage:
89
            >>> model = Modely()
90
            >>> model.loadData('dataset_name', 'path/to/data')
91
            >>> def filter_fn(sample):
92
            >>>     return sample['input1'] > 0
93
            >>> model.filterData(filter_fn, 'dataset_name')
94
        """
95
        idx_to_remove = []
1✔
96
        if dataset_name is None:
1✔
97
            for name in self._data.keys():
1✔
98
                dataset = self._data[name]
1✔
99
                n_samples = len(dataset[list(dataset.keys())[0]])
1✔
100

101
                data_for_filter = []
1✔
102
                for i in range(n_samples):
1✔
103
                    new_sample = {key: val[i] for key, val in dataset.items()}
1✔
104
                    data_for_filter.append(new_sample)
1✔
105

106
                for idx, sample in enumerate(data_for_filter):
1✔
107
                    if not filter_function(sample):
1✔
108
                        idx_to_remove.append(idx)
1✔
109

110
                for key in self._data[name].keys():
1✔
111
                    self._data[name][key] = np.delete(self._data[name][key], idx_to_remove, axis=0)
1✔
112
                    self._num_of_samples[name] = self._data[name][key].shape[0]
1✔
113
                self.visualizer.showDataset(name=name)
1✔
114

115
        else:
116
            dataset = self._data[dataset_name]
1✔
117
            n_samples = len(dataset[list(dataset.keys())[0]])
1✔
118

119
            data_for_filter = []
1✔
120
            for i in range(n_samples):
1✔
121
                new_sample = {key: val[i] for key, val in dataset.items()}
1✔
122
                data_for_filter.append(new_sample)
1✔
123

124
            for idx, sample in enumerate(data_for_filter):
1✔
125
                if not filter_function(sample):
1✔
126
                    idx_to_remove.append(idx)
1✔
127

128
            for key in self._data[dataset_name].keys():
1✔
129
                self._data[dataset_name][key] = np.delete(self._data[dataset_name][key], idx_to_remove, axis=0)
1✔
130
                self._num_of_samples[dataset_name] = self._data[dataset_name][key].shape[0]
1✔
131
            self.visualizer.showDataset(name=dataset_name)
1✔
132

133
    @enforce_types
1✔
134
    def resamplingData(self, df:pd.DataFrame, *, scale:float = 1e9) -> None:
1✔
135
        """
136
        Resamples the DataFrame to a specified sample time.
137

138
        Parameters
139
        ----------
140
        df : pd.DataFrame
141
            The DataFrame to resample.
142
        scale : float, optional
143
            The scale factor to convert the sample time to nanoseconds. Default is 1e9
144

145
        Returns
146
        -------
147
        pd.DataFrame
148
            The resampled DataFrame.
149

150
        Raises
151
        ------
152
        TypeError
153
            If the DataFrame does not contain a time column or if the time column is not in datetime format.
154

155
        Examples
156
        --------
157
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
158
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
159
            :alt: Open in Colab
160

161
        Example usage:
162
            >>> model = Modely()
163
            >>> df = pd.DataFrame({'time': np.array(range(60), dtype=np.float32),'x': np.array(10*[10] + 20*[20] + 30*[30], dtype=np.float32)})
164
            >>> resampled_df = model.resamplingData(df, scale=1e9)
165
        """
166
        sample_time_ns = int(self._model_def.getSampleTime() * scale)
1✔
167
        method = 'linear'
1✔
168
        if type(df.index) is pd.DatetimeIndex:
1✔
169
            df = df.resample(f"{sample_time_ns}ns").interpolate(method=method)
1✔
170
        elif 'time' in df.columns:
1✔
171
            if not ptypes.is_datetime64_any_dtype(df['time']):
1✔
172
                df['time'] = pd.to_datetime(df['time'], unit='s')
1✔
173
            df = df.set_index('time', drop=True)
1✔
174
            df = df.resample(f"{sample_time_ns}ns").interpolate(method=method)
1✔
175
        else:
176
            raise TypeError("No time column found in the DataFrame. Please provide a time column for resampling.")
1✔
177
        return df
1✔
178
    
179
    @enforce_types
1✔
180
    def __get_format_idxs(self, format: list | None = None) -> dict:
1✔
181
        model_inputs = self._model_def['Inputs']
1✔
182
        format_idx = {}
1✔
183
        idx = 0
1✔
184
        for item in format:
1✔
185
            if isinstance(item, tuple):
1✔
NEW
186
                for key in item:
×
NEW
187
                    if key not in model_inputs.keys():
×
NEW
188
                        idx += 1
×
NEW
189
                        break
×
NEW
190
                    n_cols = model_inputs[key]['dim']
×
NEW
191
                    format_idx[key] = (idx, idx + n_cols)
×
NEW
192
                idx += n_cols
×
193
            else:
194
                if item not in model_inputs.keys():
1✔
195
                    idx += 1
1✔
196
                    continue
1✔
197
                n_cols = model_inputs[item]['dim']
1✔
198
                format_idx[item] = (idx, idx + n_cols)
1✔
199
                idx += n_cols
1✔
200
        return format_idx
1✔
201
    
202
    @enforce_types
1✔
203
    def __get_files(self, folder:str) -> list:
1✔
204
        try:
1✔
205
            _, _, files = next(os.walk(folder))
1✔
206
            files.sort()
1✔
NEW
207
        except StopIteration as e:
×
NEW
208
            check(False, StopIteration, f'ERROR: The path "{folder}" does not exist!')
×
NEW
209
            return []
×
210
        return files
1✔
211
    
212
    @enforce_types
1✔
213
    def __stack_arrays(self, data: dict) -> tuple:
1✔
214
        ## Convert lists to numpy arrays
215
        num_of_samples = {}
1✔
216
        for key in data:
1✔
217
            data[key] = np.stack(data[key])
1✔
218
            if self._model_def['Inputs'][key]['dim'] > 1:
1✔
219
                data[key] = np.array(data[key].tolist(), dtype=np.float64)
1✔
220
            if data[key].ndim == 2:  ## Add the sample dimension
1✔
221
                data[key] = np.expand_dims(data[key], axis=-1)
1✔
222
            if data[key].ndim > 3:
1✔
NEW
223
                data[key] = np.squeeze(data[key], axis=1)
×
224
            num_of_samples[key] = data[key].shape[0]
1✔
225
        return num_of_samples
1✔
226

227
    def loadData(self, name:str,
1✔
228
                 source: str | dict | pd.DataFrame, *,
229
                 format: list | None = None,
230
                 skiplines: int = 0,
231
                 delimiter: str = ',',
232
                 header: int | str | Sequence | None = None,
233
                 resampling: bool = False
234
                 ) -> None:
235
        """
236
        Loads data into the model. The data can be loaded from a directory path containing the csv files or from a crafted dataset.
237

238
        Parameters
239
        ----------
240
        name : str
241
            The name of the dataset.
242
        source : str or list or pd.DataFrame
243
            The source of the data. Can be a directory path containing the csv files or a custom dataset provided as a dictionary or a pandas DataFrame.
244
        format : list or None, optional
245
            The format of the data. When loading multiple csv files the format parameter will define how to read each column of the file. Default is None.
246
        skiplines : int, optional
247
            The number of lines to skip at the beginning of the file. Default is 0.
248
        delimiter : str, optional
249
            The delimiter used in the data files. Default is ','.
250
        header : list or None, optional
251
            The header of the data files. Default is None.
252

253
        Raises
254
        ------
255
        ValueError
256
            If the network is not neuralized.
257
            If the delimiter is not valid.
258

259
        Examples
260
        --------
261
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
262
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
263
            :alt: Open in Colab
264

265
        Example - load data from files:
266
            >>> x = Input('x')
267
            >>> y = Input('y')
268
            >>> out = Output('out',Fir(x.tw(0.05)))
269
            >>> test = Modely(visualizer=None)
270
            >>> test.addModel('example_model', out)
271
            >>> test.neuralizeModel(0.01)
272
            >>> data_struct = ['x', '', 'y']
273
            >>> test.loadData(name='example_dataset', source='path/to/data', format=data_struct)
274

275
        Example - load data from a crafted dataset:
276
            >>> x = Input('x')
277
            >>> y = Input('y')
278
            >>> out = Output('out',Fir(x.tw(0.05)))
279
            >>> test = Modely(visualizer=None)
280
            >>> test.addModel('example_model', out)
281
            >>> test.neuralizeModel(0.01)
282
            >>> data_x = np.array(range(10))
283
            >>> dataset = {'x': data_x, 'y': (2*data_x)}
284
            >>> test.loadData(name='example_dataset',source=dataset)
285
        """
286
        check(self.neuralized, ValueError, "The network is not neuralized.")
1✔
287
        check(delimiter in ['\t', '\n', ';', ',', ' '], ValueError, 'delimiter not valid!')
1✔
288

289
        json_inputs = self._model_def['Inputs']
1✔
290
        ## Initialize the dictionary containing the data
291
        check_names(name, self._data.keys(), f"Dataset")
1✔
292
        self._data[name] = {}
1✔
293

294
        if type(source) is str:  ## we have a directory path containing the files
1✔
295
            ## collect column indexes
296
            format_idx = self.__get_format_idxs(format)
1✔
297
            ## Initialize each input key
298
            for key in format_idx.keys():
1✔
299
                self._data[name][key] = []
1✔
300
            ## obtain the file names
301
            files = self.__get_files(source)
1✔
302
            self._file_count = len(files)
1✔
303
            if self._file_count > 1:  ## Multifile
1✔
304
                self._multifile[name] = []
1✔
305

306
            ## Cycle through all the files
307
            for file in files:
1✔
308
                try:
1✔
309
                    ## read the csv
310
                    df = pd.read_csv(os.path.join(source, file), skiprows=skiplines, delimiter=delimiter, header=header)
1✔
311
                    ## Resampling if the time column is provided (must be a Datetime object)
312
                    if resampling:
1✔
NEW
313
                        self.resamplingData(df)
×
314
                except:
×
315
                    log.warning(f'Cannot read file {os.path.join(source, file)}')
×
316
                    continue
×
317
                if self._file_count > 1:
1✔
318
                    self._multifile[name].append((self._multifile[name][-1] + (len(df) - self._max_n_samples + 1)) if self._multifile[name] else len(df) - self._max_n_samples + 1)
1✔
319
                ## Cycle through all the windows
320
                for key, idxs in format_idx.items():
1✔
321
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
1✔
322
                    ## Save as numpy array the data
323
                    data = df.iloc[:, idxs[0]:idxs[1]].to_numpy()
1✔
324
                    self._data[name][key] += [data[i - back:i + forw] for i in range(self._max_samples_backward, len(df) - self._max_samples_forward + 1)]
1✔
325
        else:  ## we have a crafted dataset
326
            self._file_count = 1
1✔
327
            if isinstance(source, dict):
1✔
328
                # Merge a list of inputs into a single dictionary
329
                for key in json_inputs.keys():
1✔
330
                    if key not in source.keys():
1✔
331
                        continue
1✔
332
                    self._data[name][key] = []  ## Initialize the dataset
1✔
333
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
1✔
334
                    for idx in range(len(source[key]) - self._max_n_samples + 1):
1✔
335
                        self._data[name][key].append(source[key][idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)])
1✔
336
            else:
337
                if resampling:
1✔
338
                    source = self.resamplingData(source)
1✔
339
                for key in json_inputs.keys():
1✔
340
                    if key not in source.columns:
1✔
NEW
341
                        continue
×
342
                    self._data[name][key] = []  ## Initialize the dataset
1✔
343
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
1✔
344
                    for idx in range(len(source) - self._max_n_samples + 1):
1✔
345
                        window = source[key].iloc[idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)]
1✔
346
                        self._data[name][key].append(window.to_numpy())
1✔
347

348
        ## Convert lists to numpy arrays
349
        num_of_samples = self.__stack_arrays(self._data[name])
1✔
350
        # Check dim of the samples
351
        check(len(set(num_of_samples.values())) == 1, ValueError, f"The number of the sample of the dataset {name} are not the same for all input in the dataset: {num_of_samples}")
1✔
352
        self._num_of_samples[name] = num_of_samples[list(num_of_samples.keys())[0]]
1✔
353
        ## Set the Loaded flag to True
354
        self._data_loaded = True
1✔
355
        ## Update the number of datasets loaded
356
        self.__n_datasets = len(self._data.keys())
1✔
357
        self.__datasets_loaded.add(name)
1✔
358
        ## Show the dataset
359
        self.visualizer.showDataset(name=name)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc