16502811447

Committed 24 Jul 2025 04:44PM UTC coverage: 97.767% (+0.1%) from 97.651%

Build # 16502811447

Build Type

push

github

Committed by

web-flow

Commit Message

New version 1.5.0

This pull request introduces version 1.5.0 of **nnodely**, featuring several updates:
1. Improved clarity of documentation and examples.
2. Support for managing multi-dataset features is now available.
3. DataFrames can now be used to create datasets.
4. Datasets can now be resampled.
5. Random data training has been fixed for both classic and recurrent training.
6. The `state` variable has been removed.
7. It is now possible to add or remove a connection or a closed loop.
8. Partial models can now be exported.
9. The `train` function and the result analysis have been separated.
10. A new function, `trainAndAnalyse`, is now available.
11. The report now works across all network types.
12. The training function code has been reorganized.

Run Details

2901 of 2967 new or added lines in 53 files covered. (97.78%)

16 existing lines in 6 files now uncovered.

12652 of 12941 relevant lines covered (97.77%)

0.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.59

/nnodely/operators/loader.py

import os, random

import pandas as pd
import numpy as np
import pandas.api.types as ptypes
from collections.abc import Sequence, Callable

from nnodely.basic.relation import check_names
from nnodely.operators.network import Network
from nnodely.support.utils import check, log, enforce_types, NP_DTYPE

class Loader(Network):
    @enforce_types
    def __init__(self):
        check(type(self) is not Loader, TypeError, "Loader class cannot be instantiated directly")
        super().__init__()

        # Dataaset Parameters
        self.__n_datasets = 0
        self.__datasets_loaded = set()

    @enforce_types
    def getSamples(self, dataset:str, *, index:int|None = None, window:int=1) -> dict:
        """
        Retrieves a window of samples from a given dataset.

        Parameters
        ----------
        dataset : str
            The name of the dataset to retrieve samples from.
        index : int, optional
            The starting index of the samples. If None, a random index is chosen. Default is None.
        window : int, optional
            The number of consecutive samples to retrieve. Default is 1.

        Returns
        -------
        dict
            A dictionary containing the retrieved samples. The keys are input names, and the values are lists of samples.

        Raises
        ------
        ValueError
            If the dataset is not loaded.

        Examples
        --------
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
            :alt: Open in Colab

        Example usage:
            >>> model = Modely()
            >>> model.loadData('dataset_name')
            >>> samples = model.getSamples('dataset_name', index=10, window=5)
        """
        if index is None:
            index = random.randint(0, self._num_of_samples[dataset] - window)
        check(self._data_loaded, ValueError, 'The Dataset must first be loaded using <loadData> function!')
        if self._data_loaded:
            result_dict = {}
            for key in self._model_def['Inputs'].keys():
                result_dict[key] = []
            for idx in range(window):
                for key ,samples in self._data[dataset].items():
                    if key in self._model_def['Inputs'].keys():
                        result_dict[key].append(samples[index+idx])
            return result_dict

    @enforce_types
    def filterData(self, filter_function:Callable, dataset_name:str|None = None) -> None:
        """
        Filters the data in the dataset using the provided filter function.

        Parameters
        ----------
        filter_function : Callable
            A function that takes a sample as input and returns True if the sample should be kept, and False if it should be removed.
        dataset_name : str or None, optional
            The name of the dataset to filter. If None, all datasets are filtered. Default is None.

        Examples
        --------
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
            :alt: Open in Colab

        Example usage:
            >>> model = Modely()
            >>> model.loadData('dataset_name', 'path/to/data')
            >>> def filter_fn(sample):
            >>>     return sample['input1'] > 0
            >>> model.filterData(filter_fn, 'dataset_name')
        """
        idx_to_remove = []
        if dataset_name is None:
            for name in self._data.keys():
                dataset = self._data[name]
                n_samples = len(dataset[list(dataset.keys())[0]])

                data_for_filter = []
                for i in range(n_samples):
                    new_sample = {key: val[i] for key, val in dataset.items()}
                    data_for_filter.append(new_sample)

                for idx, sample in enumerate(data_for_filter):
                    if not filter_function(sample):
                        idx_to_remove.append(idx)

                for key in self._data[name].keys():
                    self._data[name][key] = np.delete(self._data[name][key], idx_to_remove, axis=0)
                    self._num_of_samples[name] = self._data[name][key].shape[0]
                self.visualizer.showDataset(name=name)

        else:
            dataset = self._data[dataset_name]
            n_samples = len(dataset[list(dataset.keys())[0]])

            data_for_filter = []
            for i in range(n_samples):
                new_sample = {key: val[i] for key, val in dataset.items()}
                data_for_filter.append(new_sample)

            for idx, sample in enumerate(data_for_filter):
                if not filter_function(sample):
                    idx_to_remove.append(idx)

            for key in self._data[dataset_name].keys():
                self._data[dataset_name][key] = np.delete(self._data[dataset_name][key], idx_to_remove, axis=0)
                self._num_of_samples[dataset_name] = self._data[dataset_name][key].shape[0]
            self.visualizer.showDataset(name=dataset_name)

    @enforce_types
    def resamplingData(self, df:pd.DataFrame, *, scale:float = 1e9) -> None:
        """
        Resamples the DataFrame to a specified sample time.

        Parameters
        ----------
        df : pd.DataFrame
            The DataFrame to resample.
        scale : float, optional
            The scale factor to convert the sample time to nanoseconds. Default is 1e9

        Returns
        -------
        pd.DataFrame
            The resampled DataFrame.

        Raises
        ------
        TypeError
            If the DataFrame does not contain a time column or if the time column is not in datetime format.

        Examples
        --------
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
            :alt: Open in Colab

        Example usage:
            >>> model = Modely()
            >>> df = pd.DataFrame({'time': np.array(range(60), dtype=np.float32),'x': np.array(10*[10] + 20*[20] + 30*[30], dtype=np.float32)})
            >>> resampled_df = model.resamplingData(df, scale=1e9)
        """
        sample_time_ns = int(self._model_def.getSampleTime() * scale)
        method = 'linear'
        if type(df.index) is pd.DatetimeIndex:
            df = df.resample(f"{sample_time_ns}ns").interpolate(method=method)
        elif 'time' in df.columns:
            if not ptypes.is_datetime64_any_dtype(df['time']):
                df['time'] = pd.to_datetime(df['time'], unit='s')
            df = df.set_index('time', drop=True)
            df = df.resample(f"{sample_time_ns}ns").interpolate(method=method)
        else:
            raise TypeError("No time column found in the DataFrame. Please provide a time column for resampling.")
        return df
    
    @enforce_types
    def __get_format_idxs(self, format: list | None = None) -> dict:
        model_inputs = self._model_def['Inputs']
        format_idx = {}
        idx = 0
        for item in format:
            if isinstance(item, tuple):
                for key in item:
                    if key not in model_inputs.keys():
                        idx += 1
                        break
                    n_cols = model_inputs[key]['dim']
                    format_idx[key] = (idx, idx + n_cols)
                idx += n_cols
            else:
                if item not in model_inputs.keys():
                    idx += 1
                    continue
                n_cols = model_inputs[item]['dim']
                format_idx[item] = (idx, idx + n_cols)
                idx += n_cols
        return format_idx
    
    @enforce_types
    def __get_files(self, folder:str) -> list:
        try:
            _, _, files = next(os.walk(folder))
            files.sort()
        except StopIteration as e:
            check(False, StopIteration, f'ERROR: The path "{folder}" does not exist!')
            return []
        return files
    
    @enforce_types
    def __stack_arrays(self, data: dict) -> tuple:
        ## Convert lists to numpy arrays
        num_of_samples = {}
        for key in data:
            data[key] = np.stack(data[key])
            if self._model_def['Inputs'][key]['dim'] > 1:
                data[key] = np.array(data[key].tolist(), dtype=np.float64)
            if data[key].ndim == 2:  ## Add the sample dimension
                data[key] = np.expand_dims(data[key], axis=-1)
            if data[key].ndim > 3:
                data[key] = np.squeeze(data[key], axis=1)
            num_of_samples[key] = data[key].shape[0]
        return num_of_samples

    def loadData(self, name:str,
                 source: str | dict | pd.DataFrame, *,
                 format: list | None = None,
                 skiplines: int = 0,
                 delimiter: str = ',',
                 header: int | str | Sequence | None = None,
                 resampling: bool = False
                 ) -> None:
        """
        Loads data into the model. The data can be loaded from a directory path containing the csv files or from a crafted dataset.

        Parameters
        ----------
        name : str
            The name of the dataset.
        source : str or list or pd.DataFrame
            The source of the data. Can be a directory path containing the csv files or a custom dataset provided as a dictionary or a pandas DataFrame.
        format : list or None, optional
            The format of the data. When loading multiple csv files the format parameter will define how to read each column of the file. Default is None.
        skiplines : int, optional
            The number of lines to skip at the beginning of the file. Default is 0.
        delimiter : str, optional
            The delimiter used in the data files. Default is ','.
        header : list or None, optional
            The header of the data files. Default is None.

        Raises
        ------
        ValueError
            If the network is not neuralized.
            If the delimiter is not valid.

        Examples
        --------
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
            :alt: Open in Colab

        Example - load data from files:
            >>> x = Input('x')
            >>> y = Input('y')
            >>> out = Output('out',Fir(x.tw(0.05)))
            >>> test = Modely(visualizer=None)
            >>> test.addModel('example_model', out)
            >>> test.neuralizeModel(0.01)
            >>> data_struct = ['x', '', 'y']
            >>> test.loadData(name='example_dataset', source='path/to/data', format=data_struct)

        Example - load data from a crafted dataset:
            >>> x = Input('x')
            >>> y = Input('y')
            >>> out = Output('out',Fir(x.tw(0.05)))
            >>> test = Modely(visualizer=None)
            >>> test.addModel('example_model', out)
            >>> test.neuralizeModel(0.01)
            >>> data_x = np.array(range(10))
            >>> dataset = {'x': data_x, 'y': (2*data_x)}
            >>> test.loadData(name='example_dataset',source=dataset)
        """
        check(self.neuralized, ValueError, "The network is not neuralized.")
        check(delimiter in ['\t', '\n', ';', ',', ' '], ValueError, 'delimiter not valid!')

        json_inputs = self._model_def['Inputs']
        ## Initialize the dictionary containing the data
        check_names(name, self._data.keys(), f"Dataset")
        self._data[name] = {}

        if type(source) is str:  ## we have a directory path containing the files
            ## collect column indexes
            format_idx = self.__get_format_idxs(format)
            ## Initialize each input key
            for key in format_idx.keys():
                self._data[name][key] = []
            ## obtain the file names
            files = self.__get_files(source)
            self._file_count = len(files)
            if self._file_count > 1:  ## Multifile
                self._multifile[name] = []

            ## Cycle through all the files
            for file in files:
                try:
                    ## read the csv
                    df = pd.read_csv(os.path.join(source, file), skiprows=skiplines, delimiter=delimiter, header=header)
                    ## Resampling if the time column is provided (must be a Datetime object)
                    if resampling:
                        self.resamplingData(df)
                except:
                    log.warning(f'Cannot read file {os.path.join(source, file)}')
                    continue
                if self._file_count > 1:
                    self._multifile[name].append((self._multifile[name][-1] + (len(df) - self._max_n_samples + 1)) if self._multifile[name] else len(df) - self._max_n_samples + 1)
                ## Cycle through all the windows
                for key, idxs in format_idx.items():
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
                    ## Save as numpy array the data
                    data = df.iloc[:, idxs[0]:idxs[1]].to_numpy()
                    self._data[name][key] += [data[i - back:i + forw] for i in range(self._max_samples_backward, len(df) - self._max_samples_forward + 1)]
        else:  ## we have a crafted dataset
            self._file_count = 1
            if isinstance(source, dict):
                # Merge a list of inputs into a single dictionary
                for key in json_inputs.keys():
                    if key not in source.keys():
                        continue
                    self._data[name][key] = []  ## Initialize the dataset
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
                    for idx in range(len(source[key]) - self._max_n_samples + 1):
                        self._data[name][key].append(source[key][idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)])
            else:
                if resampling:
                    source = self.resamplingData(source)
                for key in json_inputs.keys():
                    if key not in source.columns:
                        continue
                    self._data[name][key] = []  ## Initialize the dataset
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
                    for idx in range(len(source) - self._max_n_samples + 1):
                        window = source[key].iloc[idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)]
                        self._data[name][key].append(window.to_numpy())

        ## Convert lists to numpy arrays
        num_of_samples = self.__stack_arrays(self._data[name])
        # Check dim of the samples
        check(len(set(num_of_samples.values())) == 1, ValueError, f"The number of the sample of the dataset {name} are not the same for all input in the dataset: {num_of_samples}")
        self._num_of_samples[name] = num_of_samples[list(num_of_samples.keys())[0]]
        ## Set the Loaded flag to True
        self._data_loaded = True
        ## Update the number of datasets loaded
        self.__n_datasets = len(self._data.keys())
        self.__datasets_loaded.add(name)
        ## Show the dataset
        self.visualizer.showDataset(name=name)

1	import os, random	1✔
2
3	import pandas as pd	1✔
4	import numpy as np	1✔
5	import pandas.api.types as ptypes	1✔
6	from collections.abc import Sequence, Callable	1✔
7
8	from nnodely.basic.relation import check_names	1✔
9	from nnodely.operators.network import Network	1✔
10	from nnodely.support.utils import check, log, enforce_types, NP_DTYPE	1✔
11
12	class Loader(Network):	1✔
13	@enforce_types	1✔
14	def __init__(self):	1✔
15	check(type(self) is not Loader, TypeError, "Loader class cannot be instantiated directly")	1✔
16	super().__init__()	1✔
17
18	# Dataaset Parameters
19	self.__n_datasets = 0	1✔
20	self.__datasets_loaded = set()	1✔
21
22	@enforce_types	1✔
23	def getSamples(self, dataset:str, *, index:int\|None = None, window:int=1) -> dict:	1✔
24	"""
25	Retrieves a window of samples from a given dataset.
26
27	Parameters
28	----------
29	dataset : str
30	The name of the dataset to retrieve samples from.
31	index : int, optional
32	The starting index of the samples. If None, a random index is chosen. Default is None.
33	window : int, optional
34	The number of consecutive samples to retrieve. Default is 1.
35
36	Returns
37	-------
38	dict
39	A dictionary containing the retrieved samples. The keys are input names, and the values are lists of samples.
40
41	Raises
42	------
43	ValueError
44	If the dataset is not loaded.
45
46	Examples
47	--------
48	.. image:: https://colab.research.google.com/assets/colab-badge.svg
49	:target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
50	:alt: Open in Colab
51
52	Example usage:
53	>>> model = Modely()
54	>>> model.loadData('dataset_name')
55	>>> samples = model.getSamples('dataset_name', index=10, window=5)
56	"""
57	if index is None:	1✔
58	index = random.randint(0, self._num_of_samples[dataset] - window)	1✔
59	check(self._data_loaded, ValueError, 'The Dataset must first be loaded using <loadData> function!')	1✔
60	if self._data_loaded:	1✔
61	result_dict = {}	1✔
62	for key in self._model_def['Inputs'].keys():	1✔
63	result_dict[key] = []	1✔
64	for idx in range(window):	1✔
65	for key ,samples in self._data[dataset].items():	1✔
66	if key in self._model_def['Inputs'].keys():	1✔
67	result_dict[key].append(samples[index+idx])	1✔
68	return result_dict	1✔
69
70	@enforce_types	1✔
71	def filterData(self, filter_function:Callable, dataset_name:str\|None = None) -> None:	1✔
72	"""
73	Filters the data in the dataset using the provided filter function.
74
75	Parameters
76	----------
77	filter_function : Callable
78	A function that takes a sample as input and returns True if the sample should be kept, and False if it should be removed.
79	dataset_name : str or None, optional
80	The name of the dataset to filter. If None, all datasets are filtered. Default is None.
81
82	Examples
83	--------
84	.. image:: https://colab.research.google.com/assets/colab-badge.svg
85	:target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
86	:alt: Open in Colab
87
88	Example usage:
89	>>> model = Modely()
90	>>> model.loadData('dataset_name', 'path/to/data')
91	>>> def filter_fn(sample):
92	>>> return sample['input1'] > 0
93	>>> model.filterData(filter_fn, 'dataset_name')
94	"""
95	idx_to_remove = []	1✔
96	if dataset_name is None:	1✔
97	for name in self._data.keys():	1✔
98	dataset = self._data[name]	1✔
99	n_samples = len(dataset[list(dataset.keys())[0]])	1✔
100
101	data_for_filter = []	1✔
102	for i in range(n_samples):	1✔
103	new_sample = {key: val[i] for key, val in dataset.items()}	1✔
104	data_for_filter.append(new_sample)	1✔
105
106	for idx, sample in enumerate(data_for_filter):	1✔
107	if not filter_function(sample):	1✔
108	idx_to_remove.append(idx)	1✔
109
110	for key in self._data[name].keys():	1✔
111	self._data[name][key] = np.delete(self._data[name][key], idx_to_remove, axis=0)	1✔
112	self._num_of_samples[name] = self._data[name][key].shape[0]	1✔
113	self.visualizer.showDataset(name=name)	1✔
114
115	else:
116	dataset = self._data[dataset_name]	1✔
117	n_samples = len(dataset[list(dataset.keys())[0]])	1✔
118
119	data_for_filter = []	1✔
120	for i in range(n_samples):	1✔
121	new_sample = {key: val[i] for key, val in dataset.items()}	1✔
122	data_for_filter.append(new_sample)	1✔
123
124	for idx, sample in enumerate(data_for_filter):	1✔
125	if not filter_function(sample):	1✔
126	idx_to_remove.append(idx)	1✔
127
128	for key in self._data[dataset_name].keys():	1✔
129	self._data[dataset_name][key] = np.delete(self._data[dataset_name][key], idx_to_remove, axis=0)	1✔
130	self._num_of_samples[dataset_name] = self._data[dataset_name][key].shape[0]	1✔
131	self.visualizer.showDataset(name=dataset_name)	1✔
132
133	@enforce_types	1✔
134	def resamplingData(self, df:pd.DataFrame, *, scale:float = 1e9) -> None:	1✔
135	"""
136	Resamples the DataFrame to a specified sample time.
137
138	Parameters
139	----------
140	df : pd.DataFrame
141	The DataFrame to resample.
142	scale : float, optional
143	The scale factor to convert the sample time to nanoseconds. Default is 1e9
144
145	Returns
146	-------
147	pd.DataFrame
148	The resampled DataFrame.
149
150	Raises
151	------
152	TypeError
153	If the DataFrame does not contain a time column or if the time column is not in datetime format.
154
155	Examples
156	--------
157	.. image:: https://colab.research.google.com/assets/colab-badge.svg
158	:target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
159	:alt: Open in Colab
160
161	Example usage:
162	>>> model = Modely()
163	>>> df = pd.DataFrame({'time': np.array(range(60), dtype=np.float32),'x': np.array(10[10] + 20[20] + 30*[30], dtype=np.float32)})
164	>>> resampled_df = model.resamplingData(df, scale=1e9)
165	"""
166	sample_time_ns = int(self._model_def.getSampleTime() * scale)	1✔
167	method = 'linear'	1✔
168	if type(df.index) is pd.DatetimeIndex:	1✔
169	df = df.resample(f"{sample_time_ns}ns").interpolate(method=method)	1✔
170	elif 'time' in df.columns:	1✔
171	if not ptypes.is_datetime64_any_dtype(df['time']):	1✔
172	df['time'] = pd.to_datetime(df['time'], unit='s')	1✔
173	df = df.set_index('time', drop=True)	1✔
174	df = df.resample(f"{sample_time_ns}ns").interpolate(method=method)	1✔
175	else:
176	raise TypeError("No time column found in the DataFrame. Please provide a time column for resampling.")	1✔
177	return df	1✔
178
179	@enforce_types	1✔
180	def __get_format_idxs(self, format: list \| None = None) -> dict:	1✔
181	model_inputs = self._model_def['Inputs']	1✔
182	format_idx = {}	1✔
183	idx = 0	1✔
184	for item in format:	1✔
185	if isinstance(item, tuple):	1✔
NEW 186	for key in item:	×
NEW 187	if key not in model_inputs.keys():	×
NEW 188	idx += 1	×
NEW 189	break	×
NEW 190	n_cols = model_inputs[key]['dim']	×
NEW 191	format_idx[key] = (idx, idx + n_cols)	×
NEW 192	idx += n_cols	×
193	else:
194	if item not in model_inputs.keys():	1✔
195	idx += 1	1✔
196	continue	1✔
197	n_cols = model_inputs[item]['dim']	1✔
198	format_idx[item] = (idx, idx + n_cols)	1✔
199	idx += n_cols	1✔
200	return format_idx	1✔
201
202	@enforce_types	1✔
203	def __get_files(self, folder:str) -> list:	1✔
204	try:	1✔
205	_, _, files = next(os.walk(folder))	1✔
206	files.sort()	1✔
NEW 207	except StopIteration as e:	×
NEW 208	check(False, StopIteration, f'ERROR: The path "{folder}" does not exist!')	×
NEW 209	return []	×
210	return files	1✔
211
212	@enforce_types	1✔
213	def __stack_arrays(self, data: dict) -> tuple:	1✔
214	## Convert lists to numpy arrays
215	num_of_samples = {}	1✔
216	for key in data:	1✔
217	data[key] = np.stack(data[key])	1✔
218	if self._model_def['Inputs'][key]['dim'] > 1:	1✔
219	data[key] = np.array(data[key].tolist(), dtype=np.float64)	1✔
220	if data[key].ndim == 2: ## Add the sample dimension	1✔
221	data[key] = np.expand_dims(data[key], axis=-1)	1✔
222	if data[key].ndim > 3:	1✔
NEW 223	data[key] = np.squeeze(data[key], axis=1)	×
224	num_of_samples[key] = data[key].shape[0]	1✔
225	return num_of_samples	1✔
226
227	def loadData(self, name:str,	1✔
228	source: str \| dict \| pd.DataFrame, *,
229	format: list \| None = None,
230	skiplines: int = 0,
231	delimiter: str = ',',
232	header: int \| str \| Sequence \| None = None,
233	resampling: bool = False
234	) -> None:
235	"""
236	Loads data into the model. The data can be loaded from a directory path containing the csv files or from a crafted dataset.
237
238	Parameters
239	----------
240	name : str
241	The name of the dataset.
242	source : str or list or pd.DataFrame
243	The source of the data. Can be a directory path containing the csv files or a custom dataset provided as a dictionary or a pandas DataFrame.
244	format : list or None, optional
245	The format of the data. When loading multiple csv files the format parameter will define how to read each column of the file. Default is None.
246	skiplines : int, optional
247	The number of lines to skip at the beginning of the file. Default is 0.
248	delimiter : str, optional
249	The delimiter used in the data files. Default is ','.
250	header : list or None, optional
251	The header of the data files. Default is None.
252
253	Raises
254	------
255	ValueError
256	If the network is not neuralized.
257	If the delimiter is not valid.
258
259	Examples
260	--------
261	.. image:: https://colab.research.google.com/assets/colab-badge.svg
262	:target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
263	:alt: Open in Colab
264
265	Example - load data from files:
266	>>> x = Input('x')
267	>>> y = Input('y')
268	>>> out = Output('out',Fir(x.tw(0.05)))
269	>>> test = Modely(visualizer=None)
270	>>> test.addModel('example_model', out)
271	>>> test.neuralizeModel(0.01)
272	>>> data_struct = ['x', '', 'y']
273	>>> test.loadData(name='example_dataset', source='path/to/data', format=data_struct)
274
275	Example - load data from a crafted dataset:
276	>>> x = Input('x')
277	>>> y = Input('y')
278	>>> out = Output('out',Fir(x.tw(0.05)))
279	>>> test = Modely(visualizer=None)
280	>>> test.addModel('example_model', out)
281	>>> test.neuralizeModel(0.01)
282	>>> data_x = np.array(range(10))
283	>>> dataset = {'x': data_x, 'y': (2*data_x)}
284	>>> test.loadData(name='example_dataset',source=dataset)
285	"""
286	check(self.neuralized, ValueError, "The network is not neuralized.")	1✔
287	check(delimiter in ['\t', '\n', ';', ',', ' '], ValueError, 'delimiter not valid!')	1✔
288
289	json_inputs = self._model_def['Inputs']	1✔
290	## Initialize the dictionary containing the data
291	check_names(name, self._data.keys(), f"Dataset")	1✔
292	self._data[name] = {}	1✔
293
294	if type(source) is str: ## we have a directory path containing the files	1✔
295	## collect column indexes
296	format_idx = self.__get_format_idxs(format)	1✔
297	## Initialize each input key
298	for key in format_idx.keys():	1✔
299	self._data[name][key] = []	1✔
300	## obtain the file names
301	files = self.__get_files(source)	1✔
302	self._file_count = len(files)	1✔
303	if self._file_count > 1: ## Multifile	1✔
304	self._multifile[name] = []	1✔
305
306	## Cycle through all the files
307	for file in files:	1✔
308	try:	1✔
309	## read the csv
310	df = pd.read_csv(os.path.join(source, file), skiprows=skiplines, delimiter=delimiter, header=header)	1✔
311	## Resampling if the time column is provided (must be a Datetime object)
312	if resampling:	1✔
NEW 313	self.resamplingData(df)	×
314	except:	×
315	log.warning(f'Cannot read file {os.path.join(source, file)}')	×
316	continue	×
317	if self._file_count > 1:	1✔
318	self._multifile[name].append((self._multifile[name][-1] + (len(df) - self._max_n_samples + 1)) if self._multifile[name] else len(df) - self._max_n_samples + 1)	1✔
319	## Cycle through all the windows
320	for key, idxs in format_idx.items():	1✔
321	back, forw = self._input_ns_backward[key], self._input_ns_forward[key]	1✔
322	## Save as numpy array the data
323	data = df.iloc[:, idxs[0]:idxs[1]].to_numpy()	1✔
324	self._data[name][key] += [data[i - back:i + forw] for i in range(self._max_samples_backward, len(df) - self._max_samples_forward + 1)]	1✔
325	else: ## we have a crafted dataset
326	self._file_count = 1	1✔
327	if isinstance(source, dict):	1✔
328	# Merge a list of inputs into a single dictionary
329	for key in json_inputs.keys():	1✔
330	if key not in source.keys():	1✔
331	continue	1✔
332	self._data[name][key] = [] ## Initialize the dataset	1✔
333	back, forw = self._input_ns_backward[key], self._input_ns_forward[key]	1✔
334	for idx in range(len(source[key]) - self._max_n_samples + 1):	1✔
335	self._data[name][key].append(source[key][idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)])	1✔
336	else:
337	if resampling:	1✔
338	source = self.resamplingData(source)	1✔
339	for key in json_inputs.keys():	1✔
340	if key not in source.columns:	1✔
NEW 341	continue	×
342	self._data[name][key] = [] ## Initialize the dataset	1✔
343	back, forw = self._input_ns_backward[key], self._input_ns_forward[key]	1✔
344	for idx in range(len(source) - self._max_n_samples + 1):	1✔
345	window = source[key].iloc[idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)]	1✔
346	self._data[name][key].append(window.to_numpy())	1✔
347
348	## Convert lists to numpy arrays
349	num_of_samples = self.__stack_arrays(self._data[name])	1✔
350	# Check dim of the samples
351	check(len(set(num_of_samples.values())) == 1, ValueError, f"The number of the sample of the dataset {name} are not the same for all input in the dataset: {num_of_samples}")	1✔
352	self._num_of_samples[name] = num_of_samples[list(num_of_samples.keys())[0]]	1✔
353	## Set the Loaded flag to True
354	self._data_loaded = True	1✔
355	## Update the number of datasets loaded
356	self.__n_datasets = len(self._data.keys())	1✔
357	self.__datasets_loaded.add(name)	1✔
358	## Show the dataset
359	self.visualizer.showDataset(name=name)	1✔

tonegas / nnodely / 16502811447

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous