18305449975

Committed 07 Oct 2025 07:32AM UTC coverage: 97.691% (-0.04%) from 97.727%

Build # 18305449975

Build Type

push

github

Committed by

tonegas

Commit Message

Modified the version

Run Details

1 of 1 new or added line in 1 file covered. (100.0%)

38 existing lines in 5 files now uncovered.

12733 of 13034 relevant lines covered (97.69%)

0.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.75

/nnodely/operators/loader.py

import os, random

import pandas as pd
import numpy as np
import pandas.api.types as ptypes
from collections.abc import Sequence, Callable

from nnodely.basic.relation import check_names
from nnodely.operators.network import Network
from nnodely.support.utils import check, enforce_types

from nnodely.support.logger import logging, nnLogger
log = nnLogger(__name__, logging.WARNING)

class Loader(Network):
    @enforce_types
    def __init__(self):
        check(type(self) is not Loader, TypeError, "Loader class cannot be instantiated directly")
        super().__init__()

        # Dataaset Parameters
        self.__n_datasets = 0
        self.__datasets_loaded = set()

    @enforce_types
    def getSamples(self, dataset:str, *, index:int|None = None, window:int=1) -> dict:
        """
        Retrieves a window of samples from a given dataset.

        Parameters
        ----------
        dataset : str
            The name of the dataset to retrieve samples from.
        index : int, optional
            The starting index of the samples. If None, a random index is chosen. Default is None.
        window : int, optional
            The number of consecutive samples to retrieve. Default is 1.

        Returns
        -------
        dict
            A dictionary containing the retrieved samples. The keys are input names, and the values are lists of samples.

        Raises
        ------
        ValueError
            If the dataset is not loaded.

        Examples
        --------
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
            :alt: Open in Colab

        Example usage:
            >>> model = Modely()
            >>> model.loadData('dataset_name')
            >>> samples = model.getSamples('dataset_name', index=10, window=5)
        """
        if index is None:
            index = random.randint(0, self._num_of_samples[dataset] - window)
        check(self._data_loaded, ValueError, 'The Dataset must first be loaded using <loadData> function!')
        if self._data_loaded:
            result_dict = {}
            for key in self._model_def['Inputs'].keys():
                result_dict[key] = []
            for idx in range(window):
                for key ,samples in self._data[dataset].items():
                    if key in self._model_def['Inputs'].keys():
                        result_dict[key].append(samples[index+idx])
            return result_dict

    @enforce_types
    def filterData(self, filter_function:Callable, dataset_name:str|None = None) -> None:
        """
        Filters the data in the dataset using the provided filter function.

        Parameters
        ----------
        filter_function : Callable
            A function that takes a sample as input and returns True if the sample should be kept, and False if it should be removed.
        dataset_name : str or None, optional
            The name of the dataset to filter. If None, all datasets are filtered. Default is None.

        Examples
        --------
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
            :alt: Open in Colab

        Example usage:
            >>> model = Modely()
            >>> model.loadData('dataset_name', 'path/to/data')
            >>> def filter_fn(sample):
            >>>     return sample['input1'] > 0
            >>> model.filterData(filter_fn, 'dataset_name')
        """
        idx_to_remove = []
        if dataset_name is None:
            for name in self._data.keys():
                dataset = self._data[name]
                n_samples = len(dataset[list(dataset.keys())[0]])

                data_for_filter = []
                for i in range(n_samples):
                    new_sample = {key: val[i] for key, val in dataset.items()}
                    data_for_filter.append(new_sample)

                for idx, sample in enumerate(data_for_filter):
                    if not filter_function(sample):
                        idx_to_remove.append(idx)

                for key in self._data[name].keys():
                    self._data[name][key] = np.delete(self._data[name][key], idx_to_remove, axis=0)
                    self._num_of_samples[name] = self._data[name][key].shape[0]
                self.visualizer.showDataset(name=name)

        else:
            dataset = self._data[dataset_name]
            n_samples = len(dataset[list(dataset.keys())[0]])

            data_for_filter = []
            for i in range(n_samples):
                new_sample = {key: val[i] for key, val in dataset.items()}
                data_for_filter.append(new_sample)

            for idx, sample in enumerate(data_for_filter):
                if not filter_function(sample):
                    idx_to_remove.append(idx)

            for key in self._data[dataset_name].keys():
                self._data[dataset_name][key] = np.delete(self._data[dataset_name][key], idx_to_remove, axis=0)
                self._num_of_samples[dataset_name] = self._data[dataset_name][key].shape[0]
            self.visualizer.showDataset(name=dataset_name)

    @enforce_types
    def resamplingData(self, df:pd.DataFrame, *, scale:float = 1e9) -> None:
        """
        Resamples the DataFrame to a specified sample time.

        Parameters
        ----------
        df : pd.DataFrame
            The DataFrame to resample.
        scale : float, optional
            The scale factor to convert the sample time to nanoseconds. Default is 1e9

        Returns
        -------
        pd.DataFrame
            The resampled DataFrame.

        Raises
        ------
        TypeError
            If the DataFrame does not contain a time column or if the time column is not in datetime format.

        Examples
        --------
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
            :alt: Open in Colab

        Example usage:
            >>> model = Modely()
            >>> df = pd.DataFrame({'time': np.array(range(60), dtype=np.float32),'x': np.array(10*[10] + 20*[20] + 30*[30], dtype=np.float32)})
            >>> resampled_df = model.resamplingData(df, scale=1e9)
        """
        sample_time_ns = int(self._model_def.getSampleTime() * scale)
        method = 'linear'
        if type(df.index) is pd.DatetimeIndex:
            df = df.resample(f"{sample_time_ns}ns").interpolate(method=method)
        elif 'time' in df.columns:
            if not ptypes.is_datetime64_any_dtype(df['time']):
                df['time'] = pd.to_datetime(df['time'], unit='s')
            df = df.set_index('time', drop=True)
            df = df.resample(f"{sample_time_ns}ns").interpolate(method=method)
        else:
            raise TypeError("No time column found in the DataFrame. Please provide a time column for resampling.")
        return df
    
    @enforce_types
    def __get_format_idxs(self, format: list | None = None) -> dict:
        model_inputs = self._model_def['Inputs']
        format_idx = {}
        idx = 0
        for item in format:
            if isinstance(item, tuple):
                for key in item:
                    if key not in model_inputs.keys():
                        idx += 1
                        break
                    n_cols = model_inputs[key]['dim']
                    format_idx[key] = (idx, idx + n_cols)
                idx += n_cols
            else:
                if item not in model_inputs.keys():
                    idx += 1
                    continue
                n_cols = model_inputs[item]['dim']
                format_idx[item] = (idx, idx + n_cols)
                idx += n_cols
        return format_idx
    
    @enforce_types
    def __get_files(self, folder:str) -> list:
        try:
            _, _, files = next(os.walk(folder))
            files.sort()
        except StopIteration as e:
            check(False, StopIteration, f'ERROR: The path "{folder}" does not exist!')
            return []
        return files
    
    @enforce_types
    def __stack_arrays(self, data: dict) -> tuple:
        ## Convert lists to numpy arrays
        num_of_samples = {}
        for key in data:
            data[key] = np.stack(data[key])
            if self._model_def['Inputs'][key]['dim'] > 1:
                data[key] = np.array(data[key].tolist(), dtype=np.float64)
            if data[key].ndim == 2:  ## Add the sample dimension
                data[key] = np.expand_dims(data[key], axis=-1)
            if data[key].ndim > 3:
                data[key] = np.squeeze(data[key], axis=1)
            num_of_samples[key] = data[key].shape[0]
        return num_of_samples

    @enforce_types
    def loadData(self, name:str,
                 source: str | dict | pd.DataFrame, *,
                 format: list | None = None,
                 skiplines: int = 0,
                 delimiter: str = ',',
                 header: int | str | Sequence | None = None,
                 resampling: bool = False
                 ) -> None:
        """
        Loads data into the model. The data can be loaded from a directory path containing the csv files or from a crafted dataset.

        Parameters
        ----------
        name : str
            The name of the dataset.
        source : str or list or pd.DataFrame
            The source of the data. Can be a directory path containing the csv files or a custom dataset provided as a dictionary or a pandas DataFrame.
        format : list or None, optional
            The format of the data. When loading multiple csv files the format parameter will define how to read each column of the file. Default is None.
        skiplines : int, optional
            The number of lines to skip at the beginning of the file. Default is 0.
        delimiter : str, optional
            The delimiter used in the data files. Default is ','.
        header : list or None, optional
            The header of the data files. Default is None.

        Raises
        ------
        ValueError
            If the network is not neuralized.
            If the delimiter is not valid.

        Examples
        --------
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
            :alt: Open in Colab

        Example - load data from files:
            >>> x = Input('x')
            >>> y = Input('y')
            >>> out = Output('out',Fir(x.tw(0.05)))
            >>> test = Modely(visualizer=None)
            >>> test.addModel('example_model', out)
            >>> test.neuralizeModel(0.01)
            >>> data_struct = ['x', '', 'y']
            >>> test.loadData(name='example_dataset', source='path/to/data', format=data_struct)

        Example - load data from a crafted dataset:
            >>> x = Input('x')
            >>> y = Input('y')
            >>> out = Output('out',Fir(x.tw(0.05)))
            >>> test = Modely(visualizer=None)
            >>> test.addModel('example_model', out)
            >>> test.neuralizeModel(0.01)
            >>> data_x = np.array(range(10))
            >>> dataset = {'x': data_x, 'y': (2*data_x)}
            >>> test.loadData(name='example_dataset',source=dataset)
        """
        check(self.neuralized, ValueError, "The network is not neuralized.")
        check(delimiter in ['\t', '\n', ';', ',', ' '], ValueError, 'delimiter not valid!')

        json_inputs = self._model_def['Inputs']
        ## Initialize the dictionary containing the data
        check_names(name, self._data.keys(), f"Dataset")
        self._data[name] = {}

        if type(source) is str:  ## we have a directory path containing the files
            ## collect column indexes
            format_idx = self.__get_format_idxs(format)
            ## Initialize each input key
            for key in format_idx.keys():
                self._data[name][key] = []
            ## obtain the file names
            files = self.__get_files(source)
            self._file_count = len(files)
            if self._file_count > 1:  ## Multifile
                self._multifile[name] = []

            ## Cycle through all the files
            for file in files:
                try:
                    ## read the csv
                    df = pd.read_csv(os.path.join(source, file), skiprows=skiplines, delimiter=delimiter, header=header)
                    ## Resampling if the time column is provided (must be a Datetime object)
                    if resampling:
                        self.resamplingData(df)
                except:
                    log.warning(f'Cannot read file {os.path.join(source, file)}')
                    continue
                if self._file_count > 1:
                    self._multifile[name].append((self._multifile[name][-1] + (len(df) - self._max_n_samples + 1)) if self._multifile[name] else len(df) - self._max_n_samples + 1)
                ## Cycle through all the windows
                for key, idxs in format_idx.items():
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
                    ## Save as numpy array the data
                    data = df.iloc[:, idxs[0]:idxs[1]].to_numpy()
                    self._data[name][key] += [data[i - back:i + forw] for i in range(self._max_samples_backward, len(df) - self._max_samples_forward + 1)]
        else:  ## we have a crafted dataset
            self._file_count = 1
            if isinstance(source, dict):
                # Merge a list of inputs into a single dictionary
                for key in json_inputs.keys():
                    if key not in source.keys():
                        continue
                    self._data[name][key] = []  ## Initialize the dataset
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
                    for idx in range(len(source[key]) - self._max_n_samples + 1):
                        self._data[name][key].append(source[key][idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)])
            else:
                if resampling:
                    source = self.resamplingData(source)
                for key in json_inputs.keys():
                    if key not in source.columns:
                        continue
                    self._data[name][key] = []  ## Initialize the dataset
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
                    for idx in range(len(source) - self._max_n_samples + 1):
                        window = source[key].iloc[idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)]
                        self._data[name][key].append(window.to_numpy())

        ## Convert lists to numpy arrays
        num_of_samples = self.__stack_arrays(self._data[name])
        # Check dim of the samples
        check(len(set(num_of_samples.values())) == 1, ValueError, f"The number of the sample of the dataset {name} are not the same for all input in the dataset: {num_of_samples}")
        self._num_of_samples[name] = num_of_samples[list(num_of_samples.keys())[0]]
        ## Set the Loaded flag to True
        self._data_loaded = True
        ## Update the number of datasets loaded
        self.__n_datasets = len(self._data.keys())
        self.__datasets_loaded.add(name)
        ## Show the dataset
        self.visualizer.showDataset(name=name)

1	import os, random	1✔
2
3	import pandas as pd	1✔
4	import numpy as np	1✔
5	import pandas.api.types as ptypes	1✔
6	from collections.abc import Sequence, Callable	1✔
7
8	from nnodely.basic.relation import check_names	1✔
9	from nnodely.operators.network import Network	1✔
10	from nnodely.support.utils import check, enforce_types	1✔
11
12	from nnodely.support.logger import logging, nnLogger	1✔
13	log = nnLogger(__name__, logging.WARNING)	1✔
14
15	class Loader(Network):	1✔
16	@enforce_types	1✔
17	def __init__(self):	1✔
18	check(type(self) is not Loader, TypeError, "Loader class cannot be instantiated directly")	1✔
19	super().__init__()	1✔
20
21	# Dataaset Parameters
22	self.__n_datasets = 0	1✔
23	self.__datasets_loaded = set()	1✔
24
25	@enforce_types	1✔
26	def getSamples(self, dataset:str, *, index:int\|None = None, window:int=1) -> dict:	1✔
27	"""
28	Retrieves a window of samples from a given dataset.
29
30	Parameters
31	----------
32	dataset : str
33	The name of the dataset to retrieve samples from.
34	index : int, optional
35	The starting index of the samples. If None, a random index is chosen. Default is None.
36	window : int, optional
37	The number of consecutive samples to retrieve. Default is 1.
38
39	Returns
40	-------
41	dict
42	A dictionary containing the retrieved samples. The keys are input names, and the values are lists of samples.
43
44	Raises
45	------
46	ValueError
47	If the dataset is not loaded.
48
49	Examples
50	--------
51	.. image:: https://colab.research.google.com/assets/colab-badge.svg
52	:target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
53	:alt: Open in Colab
54
55	Example usage:
56	>>> model = Modely()
57	>>> model.loadData('dataset_name')
58	>>> samples = model.getSamples('dataset_name', index=10, window=5)
59	"""
60	if index is None:	1✔
61	index = random.randint(0, self._num_of_samples[dataset] - window)	1✔
62	check(self._data_loaded, ValueError, 'The Dataset must first be loaded using <loadData> function!')	1✔
63	if self._data_loaded:	1✔
64	result_dict = {}	1✔
65	for key in self._model_def['Inputs'].keys():	1✔
66	result_dict[key] = []	1✔
67	for idx in range(window):	1✔
68	for key ,samples in self._data[dataset].items():	1✔
69	if key in self._model_def['Inputs'].keys():	1✔
70	result_dict[key].append(samples[index+idx])	1✔
71	return result_dict	1✔
72
73	@enforce_types	1✔
74	def filterData(self, filter_function:Callable, dataset_name:str\|None = None) -> None:	1✔
75	"""
76	Filters the data in the dataset using the provided filter function.
77
78	Parameters
79	----------
80	filter_function : Callable
81	A function that takes a sample as input and returns True if the sample should be kept, and False if it should be removed.
82	dataset_name : str or None, optional
83	The name of the dataset to filter. If None, all datasets are filtered. Default is None.
84
85	Examples
86	--------
87	.. image:: https://colab.research.google.com/assets/colab-badge.svg
88	:target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
89	:alt: Open in Colab
90
91	Example usage:
92	>>> model = Modely()
93	>>> model.loadData('dataset_name', 'path/to/data')
94	>>> def filter_fn(sample):
95	>>> return sample['input1'] > 0
96	>>> model.filterData(filter_fn, 'dataset_name')
97	"""
98	idx_to_remove = []	1✔
99	if dataset_name is None:	1✔
100	for name in self._data.keys():	1✔
101	dataset = self._data[name]	1✔
102	n_samples = len(dataset[list(dataset.keys())[0]])	1✔
103
104	data_for_filter = []	1✔
105	for i in range(n_samples):	1✔
106	new_sample = {key: val[i] for key, val in dataset.items()}	1✔
107	data_for_filter.append(new_sample)	1✔
108
109	for idx, sample in enumerate(data_for_filter):	1✔
110	if not filter_function(sample):	1✔
111	idx_to_remove.append(idx)	1✔
112
113	for key in self._data[name].keys():	1✔
114	self._data[name][key] = np.delete(self._data[name][key], idx_to_remove, axis=0)	1✔
115	self._num_of_samples[name] = self._data[name][key].shape[0]	1✔
116	self.visualizer.showDataset(name=name)	1✔
117
118	else:
119	dataset = self._data[dataset_name]	1✔
120	n_samples = len(dataset[list(dataset.keys())[0]])	1✔
121
122	data_for_filter = []	1✔
123	for i in range(n_samples):	1✔
124	new_sample = {key: val[i] for key, val in dataset.items()}	1✔
125	data_for_filter.append(new_sample)	1✔
126
127	for idx, sample in enumerate(data_for_filter):	1✔
128	if not filter_function(sample):	1✔
129	idx_to_remove.append(idx)	1✔
130
131	for key in self._data[dataset_name].keys():	1✔
132	self._data[dataset_name][key] = np.delete(self._data[dataset_name][key], idx_to_remove, axis=0)	1✔
133	self._num_of_samples[dataset_name] = self._data[dataset_name][key].shape[0]	1✔
134	self.visualizer.showDataset(name=dataset_name)	1✔
135
136	@enforce_types	1✔
137	def resamplingData(self, df:pd.DataFrame, *, scale:float = 1e9) -> None:	1✔
138	"""
139	Resamples the DataFrame to a specified sample time.
140
141	Parameters
142	----------
143	df : pd.DataFrame
144	The DataFrame to resample.
145	scale : float, optional
146	The scale factor to convert the sample time to nanoseconds. Default is 1e9
147
148	Returns
149	-------
150	pd.DataFrame
151	The resampled DataFrame.
152
153	Raises
154	------
155	TypeError
156	If the DataFrame does not contain a time column or if the time column is not in datetime format.
157
158	Examples
159	--------
160	.. image:: https://colab.research.google.com/assets/colab-badge.svg
161	:target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
162	:alt: Open in Colab
163
164	Example usage:
165	>>> model = Modely()
166	>>> df = pd.DataFrame({'time': np.array(range(60), dtype=np.float32),'x': np.array(10[10] + 20[20] + 30*[30], dtype=np.float32)})
167	>>> resampled_df = model.resamplingData(df, scale=1e9)
168	"""
169	sample_time_ns = int(self._model_def.getSampleTime() * scale)	1✔
170	method = 'linear'	1✔
171	if type(df.index) is pd.DatetimeIndex:	1✔
172	df = df.resample(f"{sample_time_ns}ns").interpolate(method=method)	1✔
173	elif 'time' in df.columns:	1✔
174	if not ptypes.is_datetime64_any_dtype(df['time']):	1✔
175	df['time'] = pd.to_datetime(df['time'], unit='s')	1✔
176	df = df.set_index('time', drop=True)	1✔
177	df = df.resample(f"{sample_time_ns}ns").interpolate(method=method)	1✔
178	else:
179	raise TypeError("No time column found in the DataFrame. Please provide a time column for resampling.")	1✔
180	return df	1✔
181
182	@enforce_types	1✔
183	def __get_format_idxs(self, format: list \| None = None) -> dict:	1✔
184	model_inputs = self._model_def['Inputs']	1✔
185	format_idx = {}	1✔
186	idx = 0	1✔
187	for item in format:	1✔
188	if isinstance(item, tuple):	1✔
189	for key in item:	×
190	if key not in model_inputs.keys():	×
191	idx += 1	×
192	break	×
UNCOV 193	n_cols = model_inputs[key]['dim']	×
UNCOV 194	format_idx[key] = (idx, idx + n_cols)	×
UNCOV 195	idx += n_cols	×
196	else:
197	if item not in model_inputs.keys():	1✔
198	idx += 1	1✔
199	continue	1✔
200	n_cols = model_inputs[item]['dim']	1✔
201	format_idx[item] = (idx, idx + n_cols)	1✔
202	idx += n_cols	1✔
203	return format_idx	1✔
204
205	@enforce_types	1✔
206	def __get_files(self, folder:str) -> list:	1✔
207	try:	1✔
208	_, _, files = next(os.walk(folder))	1✔
209	files.sort()	1✔
UNCOV 210	except StopIteration as e:	×
UNCOV 211	check(False, StopIteration, f'ERROR: The path "{folder}" does not exist!')	×
UNCOV 212	return []	×
213	return files	1✔
214
215	@enforce_types	1✔
216	def __stack_arrays(self, data: dict) -> tuple:	1✔
217	## Convert lists to numpy arrays
218	num_of_samples = {}	1✔
219	for key in data:	1✔
220	data[key] = np.stack(data[key])	1✔
221	if self._model_def['Inputs'][key]['dim'] > 1:	1✔
222	data[key] = np.array(data[key].tolist(), dtype=np.float64)	1✔
223	if data[key].ndim == 2: ## Add the sample dimension	1✔
224	data[key] = np.expand_dims(data[key], axis=-1)	1✔
225	if data[key].ndim > 3:	1✔
UNCOV 226	data[key] = np.squeeze(data[key], axis=1)	×
227	num_of_samples[key] = data[key].shape[0]	1✔
228	return num_of_samples	1✔
229
230	@enforce_types	1✔
231	def loadData(self, name:str,	1✔
232	source: str \| dict \| pd.DataFrame, *,
233	format: list \| None = None,
234	skiplines: int = 0,
235	delimiter: str = ',',
236	header: int \| str \| Sequence \| None = None,
237	resampling: bool = False
238	) -> None:
239	"""
240	Loads data into the model. The data can be loaded from a directory path containing the csv files or from a crafted dataset.
241
242	Parameters
243	----------
244	name : str
245	The name of the dataset.
246	source : str or list or pd.DataFrame
247	The source of the data. Can be a directory path containing the csv files or a custom dataset provided as a dictionary or a pandas DataFrame.
248	format : list or None, optional
249	The format of the data. When loading multiple csv files the format parameter will define how to read each column of the file. Default is None.
250	skiplines : int, optional
251	The number of lines to skip at the beginning of the file. Default is 0.
252	delimiter : str, optional
253	The delimiter used in the data files. Default is ','.
254	header : list or None, optional
255	The header of the data files. Default is None.
256
257	Raises
258	------
259	ValueError
260	If the network is not neuralized.
261	If the delimiter is not valid.
262
263	Examples
264	--------
265	.. image:: https://colab.research.google.com/assets/colab-badge.svg
266	:target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
267	:alt: Open in Colab
268
269	Example - load data from files:
270	>>> x = Input('x')
271	>>> y = Input('y')
272	>>> out = Output('out',Fir(x.tw(0.05)))
273	>>> test = Modely(visualizer=None)
274	>>> test.addModel('example_model', out)
275	>>> test.neuralizeModel(0.01)
276	>>> data_struct = ['x', '', 'y']
277	>>> test.loadData(name='example_dataset', source='path/to/data', format=data_struct)
278
279	Example - load data from a crafted dataset:
280	>>> x = Input('x')
281	>>> y = Input('y')
282	>>> out = Output('out',Fir(x.tw(0.05)))
283	>>> test = Modely(visualizer=None)
284	>>> test.addModel('example_model', out)
285	>>> test.neuralizeModel(0.01)
286	>>> data_x = np.array(range(10))
287	>>> dataset = {'x': data_x, 'y': (2*data_x)}
288	>>> test.loadData(name='example_dataset',source=dataset)
289	"""
290	check(self.neuralized, ValueError, "The network is not neuralized.")	1✔
291	check(delimiter in ['\t', '\n', ';', ',', ' '], ValueError, 'delimiter not valid!')	1✔
292
293	json_inputs = self._model_def['Inputs']	1✔
294	## Initialize the dictionary containing the data
295	check_names(name, self._data.keys(), f"Dataset")	1✔
296	self._data[name] = {}	1✔
297
298	if type(source) is str: ## we have a directory path containing the files	1✔
299	## collect column indexes
300	format_idx = self.__get_format_idxs(format)	1✔
301	## Initialize each input key
302	for key in format_idx.keys():	1✔
303	self._data[name][key] = []	1✔
304	## obtain the file names
305	files = self.__get_files(source)	1✔
306	self._file_count = len(files)	1✔
307	if self._file_count > 1: ## Multifile	1✔
308	self._multifile[name] = []	1✔
309
310	## Cycle through all the files
311	for file in files:	1✔
312	try:	1✔
313	## read the csv
314	df = pd.read_csv(os.path.join(source, file), skiprows=skiplines, delimiter=delimiter, header=header)	1✔
315	## Resampling if the time column is provided (must be a Datetime object)
316	if resampling:	1✔
317	self.resamplingData(df)	×
UNCOV 318	except:	×
UNCOV 319	log.warning(f'Cannot read file {os.path.join(source, file)}')	×
UNCOV 320	continue	×
321	if self._file_count > 1:	1✔
322	self._multifile[name].append((self._multifile[name][-1] + (len(df) - self._max_n_samples + 1)) if self._multifile[name] else len(df) - self._max_n_samples + 1)	1✔
323	## Cycle through all the windows
324	for key, idxs in format_idx.items():	1✔
325	back, forw = self._input_ns_backward[key], self._input_ns_forward[key]	1✔
326	## Save as numpy array the data
327	data = df.iloc[:, idxs[0]:idxs[1]].to_numpy()	1✔
328	self._data[name][key] += [data[i - back:i + forw] for i in range(self._max_samples_backward, len(df) - self._max_samples_forward + 1)]	1✔
329	else: ## we have a crafted dataset
330	self._file_count = 1	1✔
331	if isinstance(source, dict):	1✔
332	# Merge a list of inputs into a single dictionary
333	for key in json_inputs.keys():	1✔
334	if key not in source.keys():	1✔
335	continue	1✔
336	self._data[name][key] = [] ## Initialize the dataset	1✔
337	back, forw = self._input_ns_backward[key], self._input_ns_forward[key]	1✔
338	for idx in range(len(source[key]) - self._max_n_samples + 1):	1✔
339	self._data[name][key].append(source[key][idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)])	1✔
340	else:
341	if resampling:	1✔
342	source = self.resamplingData(source)	1✔
343	for key in json_inputs.keys():	1✔
344	if key not in source.columns:	1✔
UNCOV 345	continue	×
346	self._data[name][key] = [] ## Initialize the dataset	1✔
347	back, forw = self._input_ns_backward[key], self._input_ns_forward[key]	1✔
348	for idx in range(len(source) - self._max_n_samples + 1):	1✔
349	window = source[key].iloc[idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)]	1✔
350	self._data[name][key].append(window.to_numpy())	1✔
351
352	## Convert lists to numpy arrays
353	num_of_samples = self.__stack_arrays(self._data[name])	1✔
354	# Check dim of the samples
355	check(len(set(num_of_samples.values())) == 1, ValueError, f"The number of the sample of the dataset {name} are not the same for all input in the dataset: {num_of_samples}")	1✔
356	self._num_of_samples[name] = num_of_samples[list(num_of_samples.keys())[0]]	1✔
357	## Set the Loaded flag to True
358	self._data_loaded = True	1✔
359	## Update the number of datasets loaded
360	self.__n_datasets = len(self._data.keys())	1✔
361	self.__datasets_loaded.add(name)	1✔
362	## Show the dataset
363	self.visualizer.showDataset(name=name)	1✔

tonegas / nnodely / 18305449975

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous