17272281697

Committed 27 Aug 2025 04:09PM UTC coverage: 97.727% (-0.04%) from 97.767%

Build # 17272281697

Build Type

push

github

Committed by

tonegas

Commit Message

minor chages

Run Details

8 of 12 new or added lines in 2 files covered. (66.67%)

14 existing lines in 4 files now uncovered.

12727 of 13023 relevant lines covered (97.73%)

0.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.64

/nnodely/operators/loader.py

import os, random

import pandas as pd
import numpy as np
import pandas.api.types as ptypes
from collections.abc import Sequence, Callable

from nnodely.basic.relation import check_names
from nnodely.operators.network import Network
from nnodely.support.utils import check, log, enforce_types, NP_DTYPE

class Loader(Network):
    @enforce_types
    def __init__(self):
        check(type(self) is not Loader, TypeError, "Loader class cannot be instantiated directly")
        super().__init__()

        # Dataaset Parameters
        self.__n_datasets = 0
        self.__datasets_loaded = set()

    @enforce_types
    def getSamples(self, dataset:str, *, index:int|None = None, window:int=1) -> dict:
        """
        Retrieves a window of samples from a given dataset.

        Parameters
        ----------
        dataset : str
            The name of the dataset to retrieve samples from.
        index : int, optional
            The starting index of the samples. If None, a random index is chosen. Default is None.
        window : int, optional
            The number of consecutive samples to retrieve. Default is 1.

        Returns
        -------
        dict
            A dictionary containing the retrieved samples. The keys are input names, and the values are lists of samples.

        Raises
        ------
        ValueError
            If the dataset is not loaded.

        Examples
        --------
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
            :alt: Open in Colab

        Example usage:
            >>> model = Modely()
            >>> model.loadData('dataset_name')
            >>> samples = model.getSamples('dataset_name', index=10, window=5)
        """
        if index is None:
            index = random.randint(0, self._num_of_samples[dataset] - window)
        check(self._data_loaded, ValueError, 'The Dataset must first be loaded using <loadData> function!')
        if self._data_loaded:
            result_dict = {}
            for key in self._model_def['Inputs'].keys():
                result_dict[key] = []
            for idx in range(window):
                for key ,samples in self._data[dataset].items():
                    if key in self._model_def['Inputs'].keys():
                        result_dict[key].append(samples[index+idx])
            return result_dict

    @enforce_types
    def filterData(self, filter_function:Callable, dataset_name:str|None = None) -> None:
        """
        Filters the data in the dataset using the provided filter function.

        Parameters
        ----------
        filter_function : Callable
            A function that takes a sample as input and returns True if the sample should be kept, and False if it should be removed.
        dataset_name : str or None, optional
            The name of the dataset to filter. If None, all datasets are filtered. Default is None.

        Examples
        --------
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
            :alt: Open in Colab

        Example usage:
            >>> model = Modely()
            >>> model.loadData('dataset_name', 'path/to/data')
            >>> def filter_fn(sample):
            >>>     return sample['input1'] > 0
            >>> model.filterData(filter_fn, 'dataset_name')
        """
        idx_to_remove = []
        if dataset_name is None:
            for name in self._data.keys():
                dataset = self._data[name]
                n_samples = len(dataset[list(dataset.keys())[0]])

                data_for_filter = []
                for i in range(n_samples):
                    new_sample = {key: val[i] for key, val in dataset.items()}
                    data_for_filter.append(new_sample)

                for idx, sample in enumerate(data_for_filter):
                    if not filter_function(sample):
                        idx_to_remove.append(idx)

                for key in self._data[name].keys():
                    self._data[name][key] = np.delete(self._data[name][key], idx_to_remove, axis=0)
                    self._num_of_samples[name] = self._data[name][key].shape[0]
                self.visualizer.showDataset(name=name)

        else:
            dataset = self._data[dataset_name]
            n_samples = len(dataset[list(dataset.keys())[0]])

            data_for_filter = []
            for i in range(n_samples):
                new_sample = {key: val[i] for key, val in dataset.items()}
                data_for_filter.append(new_sample)

            for idx, sample in enumerate(data_for_filter):
                if not filter_function(sample):
                    idx_to_remove.append(idx)

            for key in self._data[dataset_name].keys():
                self._data[dataset_name][key] = np.delete(self._data[dataset_name][key], idx_to_remove, axis=0)
                self._num_of_samples[dataset_name] = self._data[dataset_name][key].shape[0]
            self.visualizer.showDataset(name=dataset_name)

    @enforce_types
    def resamplingData(self, df:pd.DataFrame, *, scale:float = 1e9) -> None:
        """
        Resamples the DataFrame to a specified sample time.

        Parameters
        ----------
        df : pd.DataFrame
            The DataFrame to resample.
        scale : float, optional
            The scale factor to convert the sample time to nanoseconds. Default is 1e9

        Returns
        -------
        pd.DataFrame
            The resampled DataFrame.

        Raises
        ------
        TypeError
            If the DataFrame does not contain a time column or if the time column is not in datetime format.

        Examples
        --------
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
            :alt: Open in Colab

        Example usage:
            >>> model = Modely()
            >>> df = pd.DataFrame({'time': np.array(range(60), dtype=np.float32),'x': np.array(10*[10] + 20*[20] + 30*[30], dtype=np.float32)})
            >>> resampled_df = model.resamplingData(df, scale=1e9)
        """
        sample_time_ns = int(self._model_def.getSampleTime() * scale)
        method = 'linear'
        if type(df.index) is pd.DatetimeIndex:
            df = df.resample(f"{sample_time_ns}ns").interpolate(method=method)
        elif 'time' in df.columns:
            if not ptypes.is_datetime64_any_dtype(df['time']):
                df['time'] = pd.to_datetime(df['time'], unit='s')
            df = df.set_index('time', drop=True)
            df = df.resample(f"{sample_time_ns}ns").interpolate(method=method)
        else:
            raise TypeError("No time column found in the DataFrame. Please provide a time column for resampling.")
        return df
    
    @enforce_types
    def __get_format_idxs(self, format: list | None = None) -> dict:
        model_inputs = self._model_def['Inputs']
        format_idx = {}
        idx = 0
        for item in format:
            if isinstance(item, tuple):
                for key in item:
                    if key not in model_inputs.keys():
                        idx += 1
                        break
                    n_cols = model_inputs[key]['dim']
                    format_idx[key] = (idx, idx + n_cols)
                idx += n_cols
            else:
                if item not in model_inputs.keys():
                    idx += 1
                    continue
                n_cols = model_inputs[item]['dim']
                format_idx[item] = (idx, idx + n_cols)
                idx += n_cols
        return format_idx
    
    @enforce_types
    def __get_files(self, folder:str) -> list:
        try:
            _, _, files = next(os.walk(folder))
            files.sort()
        except StopIteration as e:
            check(False, StopIteration, f'ERROR: The path "{folder}" does not exist!')
            return []
        return files
    
    @enforce_types
    def __stack_arrays(self, data: dict) -> tuple:
        ## Convert lists to numpy arrays
        num_of_samples = {}
        for key in data:
            data[key] = np.stack(data[key])
            if self._model_def['Inputs'][key]['dim'] > 1:
                data[key] = np.array(data[key].tolist(), dtype=np.float64)
            if data[key].ndim == 2:  ## Add the sample dimension
                data[key] = np.expand_dims(data[key], axis=-1)
            if data[key].ndim > 3:
                data[key] = np.squeeze(data[key], axis=1)
            num_of_samples[key] = data[key].shape[0]
        return num_of_samples

    @enforce_types
    def loadData(self, name:str,
                 source: str | dict | pd.DataFrame, *,
                 format: list | None = None,
                 skiplines: int = 0,
                 delimiter: str = ',',
                 header: int | str | Sequence | None = None,
                 resampling: bool = False
                 ) -> None:
        """
        Loads data into the model. The data can be loaded from a directory path containing the csv files or from a crafted dataset.

        Parameters
        ----------
        name : str
            The name of the dataset.
        source : str or list or pd.DataFrame
            The source of the data. Can be a directory path containing the csv files or a custom dataset provided as a dictionary or a pandas DataFrame.
        format : list or None, optional
            The format of the data. When loading multiple csv files the format parameter will define how to read each column of the file. Default is None.
        skiplines : int, optional
            The number of lines to skip at the beginning of the file. Default is 0.
        delimiter : str, optional
            The delimiter used in the data files. Default is ','.
        header : list or None, optional
            The header of the data files. Default is None.

        Raises
        ------
        ValueError
            If the network is not neuralized.
            If the delimiter is not valid.

        Examples
        --------
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
            :alt: Open in Colab

        Example - load data from files:
            >>> x = Input('x')
            >>> y = Input('y')
            >>> out = Output('out',Fir(x.tw(0.05)))
            >>> test = Modely(visualizer=None)
            >>> test.addModel('example_model', out)
            >>> test.neuralizeModel(0.01)
            >>> data_struct = ['x', '', 'y']
            >>> test.loadData(name='example_dataset', source='path/to/data', format=data_struct)

        Example - load data from a crafted dataset:
            >>> x = Input('x')
            >>> y = Input('y')
            >>> out = Output('out',Fir(x.tw(0.05)))
            >>> test = Modely(visualizer=None)
            >>> test.addModel('example_model', out)
            >>> test.neuralizeModel(0.01)
            >>> data_x = np.array(range(10))
            >>> dataset = {'x': data_x, 'y': (2*data_x)}
            >>> test.loadData(name='example_dataset',source=dataset)
        """
        check(self.neuralized, ValueError, "The network is not neuralized.")
        check(delimiter in ['\t', '\n', ';', ',', ' '], ValueError, 'delimiter not valid!')

        json_inputs = self._model_def['Inputs']
        ## Initialize the dictionary containing the data
        check_names(name, self._data.keys(), f"Dataset")
        self._data[name] = {}

        if type(source) is str:  ## we have a directory path containing the files
            ## collect column indexes
            format_idx = self.__get_format_idxs(format)
            ## Initialize each input key
            for key in format_idx.keys():
                self._data[name][key] = []
            ## obtain the file names
            files = self.__get_files(source)
            self._file_count = len(files)
            if self._file_count > 1:  ## Multifile
                self._multifile[name] = []

            ## Cycle through all the files
            for file in files:
                try:
                    ## read the csv
                    df = pd.read_csv(os.path.join(source, file), skiprows=skiplines, delimiter=delimiter, header=header)
                    ## Resampling if the time column is provided (must be a Datetime object)
                    if resampling:
                        self.resamplingData(df)
                except:
                    log.warning(f'Cannot read file {os.path.join(source, file)}')
                    continue
                if self._file_count > 1:
                    self._multifile[name].append((self._multifile[name][-1] + (len(df) - self._max_n_samples + 1)) if self._multifile[name] else len(df) - self._max_n_samples + 1)
                ## Cycle through all the windows
                for key, idxs in format_idx.items():
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
                    ## Save as numpy array the data
                    data = df.iloc[:, idxs[0]:idxs[1]].to_numpy()
                    self._data[name][key] += [data[i - back:i + forw] for i in range(self._max_samples_backward, len(df) - self._max_samples_forward + 1)]
        else:  ## we have a crafted dataset
            self._file_count = 1
            if isinstance(source, dict):
                # Merge a list of inputs into a single dictionary
                for key in json_inputs.keys():
                    if key not in source.keys():
                        continue
                    self._data[name][key] = []  ## Initialize the dataset
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
                    for idx in range(len(source[key]) - self._max_n_samples + 1):
                        self._data[name][key].append(source[key][idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)])
            else:
                if resampling:
                    source = self.resamplingData(source)
                for key in json_inputs.keys():
                    if key not in source.columns:
                        continue
                    self._data[name][key] = []  ## Initialize the dataset
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
                    for idx in range(len(source) - self._max_n_samples + 1):
                        window = source[key].iloc[idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)]
                        self._data[name][key].append(window.to_numpy())

        ## Convert lists to numpy arrays
        num_of_samples = self.__stack_arrays(self._data[name])
        # Check dim of the samples
        check(len(set(num_of_samples.values())) == 1, ValueError, f"The number of the sample of the dataset {name} are not the same for all input in the dataset: {num_of_samples}")
        self._num_of_samples[name] = num_of_samples[list(num_of_samples.keys())[0]]
        ## Set the Loaded flag to True
        self._data_loaded = True
        ## Update the number of datasets loaded
        self.__n_datasets = len(self._data.keys())
        self.__datasets_loaded.add(name)
        ## Show the dataset
        self.visualizer.showDataset(name=name)

1	import os, random	1✔
2
3	import pandas as pd	1✔
4	import numpy as np	1✔
5	import pandas.api.types as ptypes	1✔
6	from collections.abc import Sequence, Callable	1✔
7
8	from nnodely.basic.relation import check_names	1✔
9	from nnodely.operators.network import Network	1✔
10	from nnodely.support.utils import check, log, enforce_types, NP_DTYPE	1✔
11
12	class Loader(Network):	1✔
13	@enforce_types	1✔
14	def __init__(self):	1✔
15	check(type(self) is not Loader, TypeError, "Loader class cannot be instantiated directly")	1✔
16	super().__init__()	1✔
17
18	# Dataaset Parameters
19	self.__n_datasets = 0	1✔
20	self.__datasets_loaded = set()	1✔
21
22	@enforce_types	1✔
23	def getSamples(self, dataset:str, *, index:int\|None = None, window:int=1) -> dict:	1✔
24	"""
25	Retrieves a window of samples from a given dataset.
26
27	Parameters
28	----------
29	dataset : str
30	The name of the dataset to retrieve samples from.
31	index : int, optional
32	The starting index of the samples. If None, a random index is chosen. Default is None.
33	window : int, optional
34	The number of consecutive samples to retrieve. Default is 1.
35
36	Returns
37	-------
38	dict
39	A dictionary containing the retrieved samples. The keys are input names, and the values are lists of samples.
40
41	Raises
42	------
43	ValueError
44	If the dataset is not loaded.
45
46	Examples
47	--------
48	.. image:: https://colab.research.google.com/assets/colab-badge.svg
49	:target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
50	:alt: Open in Colab
51
52	Example usage:
53	>>> model = Modely()
54	>>> model.loadData('dataset_name')
55	>>> samples = model.getSamples('dataset_name', index=10, window=5)
56	"""
57	if index is None:	1✔
58	index = random.randint(0, self._num_of_samples[dataset] - window)	1✔
59	check(self._data_loaded, ValueError, 'The Dataset must first be loaded using <loadData> function!')	1✔
60	if self._data_loaded:	1✔
61	result_dict = {}	1✔
62	for key in self._model_def['Inputs'].keys():	1✔
63	result_dict[key] = []	1✔
64	for idx in range(window):	1✔
65	for key ,samples in self._data[dataset].items():	1✔
66	if key in self._model_def['Inputs'].keys():	1✔
67	result_dict[key].append(samples[index+idx])	1✔
68	return result_dict	1✔
69
70	@enforce_types	1✔
71	def filterData(self, filter_function:Callable, dataset_name:str\|None = None) -> None:	1✔
72	"""
73	Filters the data in the dataset using the provided filter function.
74
75	Parameters
76	----------
77	filter_function : Callable
78	A function that takes a sample as input and returns True if the sample should be kept, and False if it should be removed.
79	dataset_name : str or None, optional
80	The name of the dataset to filter. If None, all datasets are filtered. Default is None.
81
82	Examples
83	--------
84	.. image:: https://colab.research.google.com/assets/colab-badge.svg
85	:target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
86	:alt: Open in Colab
87
88	Example usage:
89	>>> model = Modely()
90	>>> model.loadData('dataset_name', 'path/to/data')
91	>>> def filter_fn(sample):
92	>>> return sample['input1'] > 0
93	>>> model.filterData(filter_fn, 'dataset_name')
94	"""
95	idx_to_remove = []	1✔
96	if dataset_name is None:	1✔
97	for name in self._data.keys():	1✔
98	dataset = self._data[name]	1✔
99	n_samples = len(dataset[list(dataset.keys())[0]])	1✔
100
101	data_for_filter = []	1✔
102	for i in range(n_samples):	1✔
103	new_sample = {key: val[i] for key, val in dataset.items()}	1✔
104	data_for_filter.append(new_sample)	1✔
105
106	for idx, sample in enumerate(data_for_filter):	1✔
107	if not filter_function(sample):	1✔
108	idx_to_remove.append(idx)	1✔
109
110	for key in self._data[name].keys():	1✔
111	self._data[name][key] = np.delete(self._data[name][key], idx_to_remove, axis=0)	1✔
112	self._num_of_samples[name] = self._data[name][key].shape[0]	1✔
113	self.visualizer.showDataset(name=name)	1✔
114
115	else:
116	dataset = self._data[dataset_name]	1✔
117	n_samples = len(dataset[list(dataset.keys())[0]])	1✔
118
119	data_for_filter = []	1✔
120	for i in range(n_samples):	1✔
121	new_sample = {key: val[i] for key, val in dataset.items()}	1✔
122	data_for_filter.append(new_sample)	1✔
123
124	for idx, sample in enumerate(data_for_filter):	1✔
125	if not filter_function(sample):	1✔
126	idx_to_remove.append(idx)	1✔
127
128	for key in self._data[dataset_name].keys():	1✔
129	self._data[dataset_name][key] = np.delete(self._data[dataset_name][key], idx_to_remove, axis=0)	1✔
130	self._num_of_samples[dataset_name] = self._data[dataset_name][key].shape[0]	1✔
131	self.visualizer.showDataset(name=dataset_name)	1✔
132
133	@enforce_types	1✔
134	def resamplingData(self, df:pd.DataFrame, *, scale:float = 1e9) -> None:	1✔
135	"""
136	Resamples the DataFrame to a specified sample time.
137
138	Parameters
139	----------
140	df : pd.DataFrame
141	The DataFrame to resample.
142	scale : float, optional
143	The scale factor to convert the sample time to nanoseconds. Default is 1e9
144
145	Returns
146	-------
147	pd.DataFrame
148	The resampled DataFrame.
149
150	Raises
151	------
152	TypeError
153	If the DataFrame does not contain a time column or if the time column is not in datetime format.
154
155	Examples
156	--------
157	.. image:: https://colab.research.google.com/assets/colab-badge.svg
158	:target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
159	:alt: Open in Colab
160
161	Example usage:
162	>>> model = Modely()
163	>>> df = pd.DataFrame({'time': np.array(range(60), dtype=np.float32),'x': np.array(10[10] + 20[20] + 30*[30], dtype=np.float32)})
164	>>> resampled_df = model.resamplingData(df, scale=1e9)
165	"""
166	sample_time_ns = int(self._model_def.getSampleTime() * scale)	1✔
167	method = 'linear'	1✔
168	if type(df.index) is pd.DatetimeIndex:	1✔
169	df = df.resample(f"{sample_time_ns}ns").interpolate(method=method)	1✔
170	elif 'time' in df.columns:	1✔
171	if not ptypes.is_datetime64_any_dtype(df['time']):	1✔
172	df['time'] = pd.to_datetime(df['time'], unit='s')	1✔
173	df = df.set_index('time', drop=True)	1✔
174	df = df.resample(f"{sample_time_ns}ns").interpolate(method=method)	1✔
175	else:
176	raise TypeError("No time column found in the DataFrame. Please provide a time column for resampling.")	1✔
177	return df	1✔
178
179	@enforce_types	1✔
180	def __get_format_idxs(self, format: list \| None = None) -> dict:	1✔
181	model_inputs = self._model_def['Inputs']	1✔
182	format_idx = {}	1✔
183	idx = 0	1✔
184	for item in format:	1✔
185	if isinstance(item, tuple):	1✔
186	for key in item:	×
187	if key not in model_inputs.keys():	×
188	idx += 1	×
189	break	×
190	n_cols = model_inputs[key]['dim']	×
191	format_idx[key] = (idx, idx + n_cols)	×
192	idx += n_cols	×
193	else:
194	if item not in model_inputs.keys():	1✔
195	idx += 1	1✔
196	continue	1✔
197	n_cols = model_inputs[item]['dim']	1✔
198	format_idx[item] = (idx, idx + n_cols)	1✔
199	idx += n_cols	1✔
200	return format_idx	1✔
201
202	@enforce_types	1✔
203	def __get_files(self, folder:str) -> list:	1✔
204	try:	1✔
205	_, _, files = next(os.walk(folder))	1✔
206	files.sort()	1✔
207	except StopIteration as e:	×
208	check(False, StopIteration, f'ERROR: The path "{folder}" does not exist!')	×
209	return []	×
210	return files	1✔
211
212	@enforce_types	1✔
213	def __stack_arrays(self, data: dict) -> tuple:	1✔
214	## Convert lists to numpy arrays
215	num_of_samples = {}	1✔
216	for key in data:	1✔
217	data[key] = np.stack(data[key])	1✔
218	if self._model_def['Inputs'][key]['dim'] > 1:	1✔
219	data[key] = np.array(data[key].tolist(), dtype=np.float64)	1✔
220	if data[key].ndim == 2: ## Add the sample dimension	1✔
221	data[key] = np.expand_dims(data[key], axis=-1)	1✔
222	if data[key].ndim > 3:	1✔
223	data[key] = np.squeeze(data[key], axis=1)	×
224	num_of_samples[key] = data[key].shape[0]	1✔
225	return num_of_samples	1✔
226
227	@enforce_types	1✔
228	def loadData(self, name:str,	1✔
229	source: str \| dict \| pd.DataFrame, *,
230	format: list \| None = None,
231	skiplines: int = 0,
232	delimiter: str = ',',
233	header: int \| str \| Sequence \| None = None,
234	resampling: bool = False
235	) -> None:
236	"""
237	Loads data into the model. The data can be loaded from a directory path containing the csv files or from a crafted dataset.
238
239	Parameters
240	----------
241	name : str
242	The name of the dataset.
243	source : str or list or pd.DataFrame
244	The source of the data. Can be a directory path containing the csv files or a custom dataset provided as a dictionary or a pandas DataFrame.
245	format : list or None, optional
246	The format of the data. When loading multiple csv files the format parameter will define how to read each column of the file. Default is None.
247	skiplines : int, optional
248	The number of lines to skip at the beginning of the file. Default is 0.
249	delimiter : str, optional
250	The delimiter used in the data files. Default is ','.
251	header : list or None, optional
252	The header of the data files. Default is None.
253
254	Raises
255	------
256	ValueError
257	If the network is not neuralized.
258	If the delimiter is not valid.
259
260	Examples
261	--------
262	.. image:: https://colab.research.google.com/assets/colab-badge.svg
263	:target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
264	:alt: Open in Colab
265
266	Example - load data from files:
267	>>> x = Input('x')
268	>>> y = Input('y')
269	>>> out = Output('out',Fir(x.tw(0.05)))
270	>>> test = Modely(visualizer=None)
271	>>> test.addModel('example_model', out)
272	>>> test.neuralizeModel(0.01)
273	>>> data_struct = ['x', '', 'y']
274	>>> test.loadData(name='example_dataset', source='path/to/data', format=data_struct)
275
276	Example - load data from a crafted dataset:
277	>>> x = Input('x')
278	>>> y = Input('y')
279	>>> out = Output('out',Fir(x.tw(0.05)))
280	>>> test = Modely(visualizer=None)
281	>>> test.addModel('example_model', out)
282	>>> test.neuralizeModel(0.01)
283	>>> data_x = np.array(range(10))
284	>>> dataset = {'x': data_x, 'y': (2*data_x)}
285	>>> test.loadData(name='example_dataset',source=dataset)
286	"""
287	check(self.neuralized, ValueError, "The network is not neuralized.")	1✔
288	check(delimiter in ['\t', '\n', ';', ',', ' '], ValueError, 'delimiter not valid!')	1✔
289
290	json_inputs = self._model_def['Inputs']	1✔
291	## Initialize the dictionary containing the data
292	check_names(name, self._data.keys(), f"Dataset")	1✔
293	self._data[name] = {}	1✔
294
295	if type(source) is str: ## we have a directory path containing the files	1✔
296	## collect column indexes
297	format_idx = self.__get_format_idxs(format)	1✔
298	## Initialize each input key
299	for key in format_idx.keys():	1✔
300	self._data[name][key] = []	1✔
301	## obtain the file names
302	files = self.__get_files(source)	1✔
303	self._file_count = len(files)	1✔
304	if self._file_count > 1: ## Multifile	1✔
305	self._multifile[name] = []	1✔
306
307	## Cycle through all the files
308	for file in files:	1✔
309	try:	1✔
310	## read the csv
311	df = pd.read_csv(os.path.join(source, file), skiprows=skiplines, delimiter=delimiter, header=header)	1✔
312	## Resampling if the time column is provided (must be a Datetime object)
313	if resampling:	1✔
314	self.resamplingData(df)	×
315	except:	×
316	log.warning(f'Cannot read file {os.path.join(source, file)}')	×
UNCOV 317	continue	×
318	if self._file_count > 1:	1✔
319	self._multifile[name].append((self._multifile[name][-1] + (len(df) - self._max_n_samples + 1)) if self._multifile[name] else len(df) - self._max_n_samples + 1)	1✔
320	## Cycle through all the windows
321	for key, idxs in format_idx.items():	1✔
322	back, forw = self._input_ns_backward[key], self._input_ns_forward[key]	1✔
323	## Save as numpy array the data
324	data = df.iloc[:, idxs[0]:idxs[1]].to_numpy()	1✔
325	self._data[name][key] += [data[i - back:i + forw] for i in range(self._max_samples_backward, len(df) - self._max_samples_forward + 1)]	1✔
326	else: ## we have a crafted dataset
327	self._file_count = 1	1✔
328	if isinstance(source, dict):	1✔
329	# Merge a list of inputs into a single dictionary
330	for key in json_inputs.keys():	1✔
331	if key not in source.keys():	1✔
332	continue	1✔
333	self._data[name][key] = [] ## Initialize the dataset	1✔
334	back, forw = self._input_ns_backward[key], self._input_ns_forward[key]	1✔
335	for idx in range(len(source[key]) - self._max_n_samples + 1):	1✔
336	self._data[name][key].append(source[key][idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)])	1✔
337	else:
338	if resampling:	1✔
339	source = self.resamplingData(source)	1✔
340	for key in json_inputs.keys():	1✔
341	if key not in source.columns:	1✔
UNCOV 342	continue	×
343	self._data[name][key] = [] ## Initialize the dataset	1✔
344	back, forw = self._input_ns_backward[key], self._input_ns_forward[key]	1✔
345	for idx in range(len(source) - self._max_n_samples + 1):	1✔
346	window = source[key].iloc[idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)]	1✔
347	self._data[name][key].append(window.to_numpy())	1✔
348
349	## Convert lists to numpy arrays
350	num_of_samples = self.__stack_arrays(self._data[name])	1✔
351	# Check dim of the samples
352	check(len(set(num_of_samples.values())) == 1, ValueError, f"The number of the sample of the dataset {name} are not the same for all input in the dataset: {num_of_samples}")	1✔
353	self._num_of_samples[name] = num_of_samples[list(num_of_samples.keys())[0]]	1✔
354	## Set the Loaded flag to True
355	self._data_loaded = True	1✔
356	## Update the number of datasets loaded
357	self.__n_datasets = len(self._data.keys())	1✔
358	self.__datasets_loaded.add(name)	1✔
359	## Show the dataset
360	self.visualizer.showDataset(name=name)	1✔

tonegas / nnodely / 17272281697

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous