14319828903

Committed 07 Apr 2025 09:27PM UTC coverage: 97.259% (+0.2%) from 97.035%

Build # 14319828903

Build Type

Pull #86

github

Committed by

web-flow

Commit Message

Merge 44b7c25ee into e9c323c4f

Pull Request Pull Request #86: Smallclasses

Run Details

2275 of 2409 new or added lines in 54 files covered. (94.44%)

1 existing line in 1 file now uncovered.

11637 of 11965 relevant lines covered (97.26%)

0.97 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

73.37

/nnodely/operators/loader.py

import os, random

import pandas as pd
import numpy as np
import pandas.api.types as ptypes
from collections.abc import Sequence, Callable

from nnodely.support.utils import check, log, enforce_types

class Loader:
    def __init__(self):
        check(type(self) is not Loader, TypeError, "Loader class cannot be instantiated directly")

        # Dataaset Parameters
        self.__n_datasets = 0
        self.__datasets_loaded = set()

        self._data_loaded = False
        self._file_count = 0
        self._num_of_samples = {}
        self._data = {}
        self._multifile = {}

    @enforce_types
    def getSamples(self, dataset:str, index:int|None = None, window:int=1) -> dict:
        """
        Retrieves a window of samples from a given dataset.

        Parameters
        ----------
        dataset : str
            The name of the dataset to retrieve samples from.
        index : int, optional
            The starting index of the samples. If None, a random index is chosen. Default is None.
        window : int, optional
            The number of consecutive samples to retrieve. Default is 1.

        Returns
        -------
        dict
            A dictionary containing the retrieved samples. The keys are input and state names, and the values are lists of samples.

        Raises
        ------
        ValueError
            If the dataset is not loaded.

        Examples
        --------
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
            :alt: Open in Colab

        Example usage:
            >>> model = Modely()
            >>> model.loadData('dataset_name')
            >>> samples = model.getSamples('dataset_name', index=10, window=5)
        """
        if index is None:
            index = random.randint(0, self._num_of_samples[dataset] - window)
        check(self._data_loaded, ValueError, 'The Dataset must first be loaded using <loadData> function!')
        if self._data_loaded:
            result_dict = {}
            for key in (self._model_def['Inputs'].keys() | self._model_def['States'].keys()):
                result_dict[key] = []
            for idx in range(window):
                for key ,samples in self._data[dataset].items():
                    if key in (self._model_def['Inputs'].keys() | self._model_def['States'].keys()):
                        result_dict[key].append(samples[index+idx])
            return result_dict

    @enforce_types
    def filterData(self, filter_function:Callable, dataset_name:str|None = None) -> None:
        """
        Filters the data in the dataset using the provided filter function.

        Parameters
        ----------
        filter_function : Callable
            A function that takes a sample as input and returns True if the sample should be kept, and False if it should be removed.
        dataset_name : str or None, optional
            The name of the dataset to filter. If None, all datasets are filtered. Default is None.

        Examples
        --------
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
            :alt: Open in Colab

        Example usage:
            >>> model = Modely()
            >>> model.loadData('dataset_name', 'path/to/data')
            >>> def filter_fn(sample):
            >>>     return sample['input1'] > 0
            >>> model.filterData(filter_fn, 'dataset_name')
        """
        idx_to_remove = []
        if dataset_name is None:
            for name in self._data.keys():
                dataset = self._data[name]
                n_samples = len(dataset[list(dataset.keys())[0]])

                data_for_filter = []
                for i in range(n_samples):
                    new_sample = {key: val[i] for key, val in dataset.items()}
                    data_for_filter.append(new_sample)

                for idx, sample in enumerate(data_for_filter):
                    if not filter_function(sample):
                        idx_to_remove.append(idx)

                for key in self._data[name].keys():
                    self._data[name][key] = np.delete(self._data[name][key], idx_to_remove, axis=0)
                    self._num_of_samples[name] = self._data[name][key].shape[0]
                self.visualizer.showDataset(name=name)

        else:
            dataset = self._data[dataset_name]
            n_samples = len(dataset[list(dataset.keys())[0]])

            data_for_filter = []
            for i in range(n_samples):
                new_sample = {key: val[i] for key, val in dataset.items()}
                data_for_filter.append(new_sample)

            for idx, sample in enumerate(data_for_filter):
                if not filter_function(sample):
                    idx_to_remove.append(idx)

            for key in self._data[dataset_name].keys():
                self._data[dataset_name][key] = np.delete(self._data[dataset_name][key], idx_to_remove, axis=0)
                self._num_of_samples[dataset_name] = self._data[dataset_name][key].shape[0]
            self.visualizer.showDataset(name=dataset_name)

    @enforce_types
    def loadData(self, name:str,
                 source: str | dict | pd.DataFrame,
                 format: list | None = None,
                 skiplines: int = 0,
                 delimiter: str = ',',
                 header: int | str | Sequence | None = None,
                 resampling: bool = False
                 ) -> None:
        """
        Loads data into the model. The data can be loaded from a directory path containing the csv files or from a crafted dataset.

        Parameters
        ----------
        name : str
            The name of the dataset.
        source : str or list or pd.DataFrame
            The source of the data. Can be a directory path containing the csv files or a list of custom data.
        format : list or None, optional
            The format of the data. When loading multiple csv files the format parameter will define how to read each column of the file. Default is None.
        skiplines : int, optional
            The number of lines to skip at the beginning of the file. Default is 0.
        delimiter : str, optional
            The delimiter used in the data files. Default is ','.
        header : list or None, optional
            The header of the data files. Default is None.

        Raises
        ------
        ValueError
            If the network is not neuralized.
            If the delimiter is not valid.

        Examples
        --------
        .. image:: https://colab.research.google.com/assets/colab-badge.svg
            :target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
            :alt: Open in Colab

        Example - load data from files:
            >>> x = Input('x')
            >>> y = Input('y')
            >>> out = Output('out',Fir(x.tw(0.05)))
            >>> test = Modely(visualizer=None)
            >>> test.addModel('example_model', out)
            >>> test.neuralizeModel(0.01)
            >>> data_struct = ['x', '', 'y']
            >>> test.loadData(name='example_dataset', source='path/to/data', format=data_struct)

        Example - load data from a crafted dataset:
            >>> x = Input('x')
            >>> y = Input('y')
            >>> out = Output('out',Fir(x.tw(0.05)))
            >>> test = Modely(visualizer=None)
            >>> test.addModel('example_model', out)
            >>> test.neuralizeModel(0.01)
            >>> data_x = np.array(range(10))
            >>> dataset = {'x': data_x, 'y': (2*data_x)}
            >>> test.loadData(name='example_dataset',source=dataset)
        """
        check(self.neuralized, ValueError, "The network is not neuralized.")
        check(delimiter in ['\t', '\n', ';', ',', ' '], ValueError, 'delimiter not valid!')

        json_inputs = self._model_def['Inputs'] | self._model_def['States']
        model_inputs = list(json_inputs.keys())
        ## Initialize the dictionary containing the data
        if name in list(self._data.keys()):
            log.warning(f'Dataset named {name} already loaded! overriding the existing one..')
        self._data[name] = {}

        num_of_samples = {}
        if type(source) is str:  ## we have a directory path containing the files
            ## collect column indexes
            format_idx = {}
            idx = 0
            for item in format:
                if isinstance(item, tuple):
                    for key in item:
                        if key not in model_inputs:
                            idx += 1
                            break
                        n_cols = json_inputs[key]['dim']
                        format_idx[key] = (idx, idx + n_cols)
                    idx += n_cols
                else:
                    if item not in model_inputs:
                        idx += 1
                        continue
                    n_cols = json_inputs[item]['dim']
                    format_idx[item] = (idx, idx + n_cols)
                    idx += n_cols

            ## Initialize each input key
            for key in format_idx.keys():
                self._data[name][key] = []

            ## obtain the file names
            try:
                _, _, files = next(os.walk(source))
                files.sort()
            except StopIteration as e:
                check(False, StopIteration, f'ERROR: The path "{source}" does not exist!')
                return
            self._file_count = len(files)
            if self._file_count > 1:  ## Multifile
                self._multifile[name] = []

            ## Cycle through all the files
            for file in files:
                try:
                    ## read the csv
                    df = pd.read_csv(os.path.join(source, file), skiprows=skiplines, delimiter=delimiter, header=header)
                except:
                    log.warning(f'Cannot read file {os.path.join(source, file)}')
                    continue
                if self._file_count > 1:
                    self._multifile[name].append(
                        (self._multifile[name][-1] + (len(df) - self._max_n_samples + 1)) if self._multifile[name] else len(
                            df) - self._max_n_samples + 1)
                ## Cycle through all the windows
                for key, idxs in format_idx.items():
                    back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
                    ## Save as numpy array the data
                    data = df.iloc[:, idxs[0]:idxs[1]].to_numpy()
                    self._data[name][key] += [data[i - back:i + forw] for i in
                                              range(self._max_samples_backward, len(df) - self._max_samples_forward + 1)]

            ## Stack the files
            for key in format_idx.keys():
                self._data[name][key] = np.stack(self._data[name][key])
                num_of_samples[key] = self._data[name][key].shape[0]

        elif type(source) is dict:  ## we have a crafted dataset
            self._file_count = 1

            ## Check if the inputs are correct
            # assert set(model_inputs).issubset(source.keys()), f'The dataset is missing some inputs. Inputs needed for the model: {model_inputs}'

            # Merge a list of inputs into a single dictionary
            for key in model_inputs:
                if key not in source.keys():
                    continue

                self._data[name][key] = []  ## Initialize the dataset

                back, forw = self._input_ns_backward[key], self._input_ns_forward[key]
                for idx in range(len(source[key]) - self._max_n_samples + 1):
                    self._data[name][key].append(
                        source[key][idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)])

            ## Stack the files
            for key in model_inputs:
                if key not in source.keys():
                    continue
                self._data[name][key] = np.stack(self._data[name][key])
                if self._data[name][key].ndim == 2:  ## Add the sample dimension
                    self._data[name][key] = np.expand_dims(self._data[name][key], axis=-1)
                if self._data[name][key].ndim > 3:
                    self._data[name][key] = np.squeeze(self._data[name][key], axis=1)
                num_of_samples[key] = self._data[name][key].shape[0]

        elif isinstance(source, pd.DataFrame):  ## we have a crafted dataset
            self._file_count = 1

            ## Resampling if the time column is provided (must be a Datetime object)
            if resampling:
                if type(source.index) is pd.DatetimeIndex:
                    source = source.resample(f"{int(self._model_def.getSampleTime()  * 1e9)}ns").interpolate(method="linear")
                elif 'time' in source.columns:
                    if not ptypes.is_datetime64_any_dtype(source['time']):
                        source['time'] = pd.to_datetime(source['time'], unit='s')
                    source = source.set_index('time', drop=True)
                    source = source.resample(f"{int(self._model_def.getSampleTime() * 1e9)}ns").interpolate(method="linear")
                else:
                    raise TypeError(
                        "No time column found in the DataFrame. Please provide a time column for resampling.")

            processed_data = {}
            for key in model_inputs:
                if key not in source.columns:
                    continue

                processed_data[key] = []  ## Initialize the dataset
                back, forw = self._input_ns_backward[key], self._input_ns_forward[key]

                for idx in range(len(source) - self._max_n_samples + 1):
                    window = source[key].iloc[idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)]
                    processed_data[key].append(window.to_numpy())

            ## Convert lists to numpy arrays
            for key in processed_data:
                processed_data[key] = np.stack(processed_data[key])
                if json_inputs[key]['dim'] > 1:
                    processed_data[key] = np.array(processed_data[key].tolist(), dtype=np.float64)
                if processed_data[key].ndim == 2:  ## Add the sample dimension
                    processed_data[key] = np.expand_dims(processed_data[key], axis=-1)
                if processed_data[key].ndim > 3:
                    processed_data[key] = np.squeeze(processed_data[key], axis=1)
                num_of_samples[key] = processed_data[key].shape[0]

            self._data[name] = processed_data

        # Check dim of the samples
        check(len(set(num_of_samples.values())) == 1, ValueError,
              f"The number of the sample of the dataset {name} are not the same for all input in the dataset: {num_of_samples}")
        self._num_of_samples[name] = num_of_samples[list(num_of_samples.keys())[0]]

        ## Set the Loaded flag to True
        self._data_loaded = True
        ## Update the number of datasets loaded
        self.__n_datasets = len(self._data.keys())
        self.__datasets_loaded.add(name)
        ## Show the dataset
        self.visualizer.showDataset(name=name)

1	import os, random	1✔
2
3	import pandas as pd	1✔
4	import numpy as np	1✔
5	import pandas.api.types as ptypes	1✔
6	from collections.abc import Sequence, Callable	1✔
7
8	from nnodely.support.utils import check, log, enforce_types	1✔
9
10	class Loader:	1✔
11	def __init__(self):	1✔
12	check(type(self) is not Loader, TypeError, "Loader class cannot be instantiated directly")	1✔
13
14	# Dataaset Parameters
15	self.__n_datasets = 0	1✔
16	self.__datasets_loaded = set()	1✔
17
18	self._data_loaded = False	1✔
19	self._file_count = 0	1✔
20	self._num_of_samples = {}	1✔
21	self._data = {}	1✔
22	self._multifile = {}	1✔
23
24	@enforce_types	1✔
25	def getSamples(self, dataset:str, index:int\|None = None, window:int=1) -> dict:	1✔
26	"""
27	Retrieves a window of samples from a given dataset.
28
29	Parameters
30	----------
31	dataset : str
32	The name of the dataset to retrieve samples from.
33	index : int, optional
34	The starting index of the samples. If None, a random index is chosen. Default is None.
35	window : int, optional
36	The number of consecutive samples to retrieve. Default is 1.
37
38	Returns
39	-------
40	dict
41	A dictionary containing the retrieved samples. The keys are input and state names, and the values are lists of samples.
42
43	Raises
44	------
45	ValueError
46	If the dataset is not loaded.
47
48	Examples
49	--------
50	.. image:: https://colab.research.google.com/assets/colab-badge.svg
51	:target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
52	:alt: Open in Colab
53
54	Example usage:
55	>>> model = Modely()
56	>>> model.loadData('dataset_name')
57	>>> samples = model.getSamples('dataset_name', index=10, window=5)
58	"""
59	if index is None:	1✔
60	index = random.randint(0, self._num_of_samples[dataset] - window)	1✔
61	check(self._data_loaded, ValueError, 'The Dataset must first be loaded using <loadData> function!')	1✔
62	if self._data_loaded:	1✔
63	result_dict = {}	1✔
64	for key in (self._model_def['Inputs'].keys() \| self._model_def['States'].keys()):	1✔
65	result_dict[key] = []	1✔
66	for idx in range(window):	1✔
67	for key ,samples in self._data[dataset].items():	1✔
68	if key in (self._model_def['Inputs'].keys() \| self._model_def['States'].keys()):	1✔
69	result_dict[key].append(samples[index+idx])	1✔
70	return result_dict	1✔
71
72	@enforce_types	1✔
73	def filterData(self, filter_function:Callable, dataset_name:str\|None = None) -> None:	1✔
74	"""
75	Filters the data in the dataset using the provided filter function.
76
77	Parameters
78	----------
79	filter_function : Callable
80	A function that takes a sample as input and returns True if the sample should be kept, and False if it should be removed.
81	dataset_name : str or None, optional
82	The name of the dataset to filter. If None, all datasets are filtered. Default is None.
83
84	Examples
85	--------
86	.. image:: https://colab.research.google.com/assets/colab-badge.svg
87	:target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
88	:alt: Open in Colab
89
90	Example usage:
91	>>> model = Modely()
92	>>> model.loadData('dataset_name', 'path/to/data')
93	>>> def filter_fn(sample):
94	>>> return sample['input1'] > 0
95	>>> model.filterData(filter_fn, 'dataset_name')
96	"""
NEW 97	idx_to_remove = []	×
NEW 98	if dataset_name is None:	×
NEW 99	for name in self._data.keys():	×
NEW 100	dataset = self._data[name]	×
NEW 101	n_samples = len(dataset[list(dataset.keys())[0]])	×
102
NEW 103	data_for_filter = []	×
NEW 104	for i in range(n_samples):	×
NEW 105	new_sample = {key: val[i] for key, val in dataset.items()}	×
NEW 106	data_for_filter.append(new_sample)	×
107
NEW 108	for idx, sample in enumerate(data_for_filter):	×
NEW 109	if not filter_function(sample):	×
NEW 110	idx_to_remove.append(idx)	×
111
NEW 112	for key in self._data[name].keys():	×
NEW 113	self._data[name][key] = np.delete(self._data[name][key], idx_to_remove, axis=0)	×
NEW 114	self._num_of_samples[name] = self._data[name][key].shape[0]	×
NEW 115	self.visualizer.showDataset(name=name)	×
116
117	else:
NEW 118	dataset = self._data[dataset_name]	×
NEW 119	n_samples = len(dataset[list(dataset.keys())[0]])	×
120
NEW 121	data_for_filter = []	×
NEW 122	for i in range(n_samples):	×
NEW 123	new_sample = {key: val[i] for key, val in dataset.items()}	×
NEW 124	data_for_filter.append(new_sample)	×
125
NEW 126	for idx, sample in enumerate(data_for_filter):	×
NEW 127	if not filter_function(sample):	×
NEW 128	idx_to_remove.append(idx)	×
129
NEW 130	for key in self._data[dataset_name].keys():	×
NEW 131	self._data[dataset_name][key] = np.delete(self._data[dataset_name][key], idx_to_remove, axis=0)	×
NEW 132	self._num_of_samples[dataset_name] = self._data[dataset_name][key].shape[0]	×
NEW 133	self.visualizer.showDataset(name=dataset_name)	×
134
135	@enforce_types	1✔
136	def loadData(self, name:str,	1✔
137	source: str \| dict \| pd.DataFrame,
138	format: list \| None = None,
139	skiplines: int = 0,
140	delimiter: str = ',',
141	header: int \| str \| Sequence \| None = None,
142	resampling: bool = False
143	) -> None:
144	"""
145	Loads data into the model. The data can be loaded from a directory path containing the csv files or from a crafted dataset.
146
147	Parameters
148	----------
149	name : str
150	The name of the dataset.
151	source : str or list or pd.DataFrame
152	The source of the data. Can be a directory path containing the csv files or a list of custom data.
153	format : list or None, optional
154	The format of the data. When loading multiple csv files the format parameter will define how to read each column of the file. Default is None.
155	skiplines : int, optional
156	The number of lines to skip at the beginning of the file. Default is 0.
157	delimiter : str, optional
158	The delimiter used in the data files. Default is ','.
159	header : list or None, optional
160	The header of the data files. Default is None.
161
162	Raises
163	------
164	ValueError
165	If the network is not neuralized.
166	If the delimiter is not valid.
167
168	Examples
169	--------
170	.. image:: https://colab.research.google.com/assets/colab-badge.svg
171	:target: https://colab.research.google.com/github/tonegas/nnodely/blob/main/examples/dataset.ipynb
172	:alt: Open in Colab
173
174	Example - load data from files:
175	>>> x = Input('x')
176	>>> y = Input('y')
177	>>> out = Output('out',Fir(x.tw(0.05)))
178	>>> test = Modely(visualizer=None)
179	>>> test.addModel('example_model', out)
180	>>> test.neuralizeModel(0.01)
181	>>> data_struct = ['x', '', 'y']
182	>>> test.loadData(name='example_dataset', source='path/to/data', format=data_struct)
183
184	Example - load data from a crafted dataset:
185	>>> x = Input('x')
186	>>> y = Input('y')
187	>>> out = Output('out',Fir(x.tw(0.05)))
188	>>> test = Modely(visualizer=None)
189	>>> test.addModel('example_model', out)
190	>>> test.neuralizeModel(0.01)
191	>>> data_x = np.array(range(10))
192	>>> dataset = {'x': data_x, 'y': (2*data_x)}
193	>>> test.loadData(name='example_dataset',source=dataset)
194	"""
195	check(self.neuralized, ValueError, "The network is not neuralized.")	1✔
196	check(delimiter in ['\t', '\n', ';', ',', ' '], ValueError, 'delimiter not valid!')	1✔
197
198	json_inputs = self._model_def['Inputs'] \| self._model_def['States']	1✔
199	model_inputs = list(json_inputs.keys())	1✔
200	## Initialize the dictionary containing the data
201	if name in list(self._data.keys()):	1✔
202	log.warning(f'Dataset named {name} already loaded! overriding the existing one..')	1✔
203	self._data[name] = {}	1✔
204
205	num_of_samples = {}	1✔
206	if type(source) is str: ## we have a directory path containing the files	1✔
207	## collect column indexes
208	format_idx = {}	1✔
209	idx = 0	1✔
210	for item in format:	1✔
211	if isinstance(item, tuple):	1✔
NEW 212	for key in item:	×
NEW 213	if key not in model_inputs:	×
NEW 214	idx += 1	×
NEW 215	break	×
NEW 216	n_cols = json_inputs[key]['dim']	×
NEW 217	format_idx[key] = (idx, idx + n_cols)	×
NEW 218	idx += n_cols	×
219	else:
220	if item not in model_inputs:	1✔
221	idx += 1	1✔
222	continue	1✔
223	n_cols = json_inputs[item]['dim']	1✔
224	format_idx[item] = (idx, idx + n_cols)	1✔
225	idx += n_cols	1✔
226
227	## Initialize each input key
228	for key in format_idx.keys():	1✔
229	self._data[name][key] = []	1✔
230
231	## obtain the file names
232	try:	1✔
233	_, _, files = next(os.walk(source))	1✔
234	files.sort()	1✔
NEW 235	except StopIteration as e:	×
NEW 236	check(False, StopIteration, f'ERROR: The path "{source}" does not exist!')	×
NEW 237	return	×
238	self._file_count = len(files)	1✔
239	if self._file_count > 1: ## Multifile	1✔
240	self._multifile[name] = []	1✔
241
242	## Cycle through all the files
243	for file in files:	1✔
244	try:	1✔
245	## read the csv
246	df = pd.read_csv(os.path.join(source, file), skiprows=skiplines, delimiter=delimiter, header=header)	1✔
NEW 247	except:	×
NEW 248	log.warning(f'Cannot read file {os.path.join(source, file)}')	×
NEW 249	continue	×
250	if self._file_count > 1:	1✔
251	self._multifile[name].append(	1✔
252	(self._multifile[name][-1] + (len(df) - self._max_n_samples + 1)) if self._multifile[name] else len(
253	df) - self._max_n_samples + 1)
254	## Cycle through all the windows
255	for key, idxs in format_idx.items():	1✔
256	back, forw = self._input_ns_backward[key], self._input_ns_forward[key]	1✔
257	## Save as numpy array the data
258	data = df.iloc[:, idxs[0]:idxs[1]].to_numpy()	1✔
259	self._data[name][key] += [data[i - back:i + forw] for i in	1✔
260	range(self._max_samples_backward, len(df) - self._max_samples_forward + 1)]
261
262	## Stack the files
263	for key in format_idx.keys():	1✔
264	self._data[name][key] = np.stack(self._data[name][key])	1✔
265	num_of_samples[key] = self._data[name][key].shape[0]	1✔
266
267	elif type(source) is dict: ## we have a crafted dataset	1✔
268	self._file_count = 1	1✔
269
270	## Check if the inputs are correct
271	# assert set(model_inputs).issubset(source.keys()), f'The dataset is missing some inputs. Inputs needed for the model: {model_inputs}'
272
273	# Merge a list of inputs into a single dictionary
274	for key in model_inputs:	1✔
275	if key not in source.keys():	1✔
276	continue	1✔
277
278	self._data[name][key] = [] ## Initialize the dataset	1✔
279
280	back, forw = self._input_ns_backward[key], self._input_ns_forward[key]	1✔
281	for idx in range(len(source[key]) - self._max_n_samples + 1):	1✔
282	self._data[name][key].append(	1✔
283	source[key][idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)])
284
285	## Stack the files
286	for key in model_inputs:	1✔
287	if key not in source.keys():	1✔
288	continue	1✔
289	self._data[name][key] = np.stack(self._data[name][key])	1✔
290	if self._data[name][key].ndim == 2: ## Add the sample dimension	1✔
291	self._data[name][key] = np.expand_dims(self._data[name][key], axis=-1)	1✔
292	if self._data[name][key].ndim > 3:	1✔
NEW 293	self._data[name][key] = np.squeeze(self._data[name][key], axis=1)	×
294	num_of_samples[key] = self._data[name][key].shape[0]	1✔
295
296	elif isinstance(source, pd.DataFrame): ## we have a crafted dataset	1✔
297	self._file_count = 1	1✔
298
299	## Resampling if the time column is provided (must be a Datetime object)
300	if resampling:	1✔
301	if type(source.index) is pd.DatetimeIndex:	1✔
302	source = source.resample(f"{int(self._model_def.getSampleTime() * 1e9)}ns").interpolate(method="linear")	1✔
303	elif 'time' in source.columns:	1✔
304	if not ptypes.is_datetime64_any_dtype(source['time']):	1✔
305	source['time'] = pd.to_datetime(source['time'], unit='s')	1✔
306	source = source.set_index('time', drop=True)	1✔
307	source = source.resample(f"{int(self._model_def.getSampleTime() * 1e9)}ns").interpolate(method="linear")	1✔
308	else:
309	raise TypeError(	1✔
310	"No time column found in the DataFrame. Please provide a time column for resampling.")
311
312	processed_data = {}	1✔
313	for key in model_inputs:	1✔
314	if key not in source.columns:	1✔
NEW 315	continue	×
316
317	processed_data[key] = [] ## Initialize the dataset	1✔
318	back, forw = self._input_ns_backward[key], self._input_ns_forward[key]	1✔
319
320	for idx in range(len(source) - self._max_n_samples + 1):	1✔
321	window = source[key].iloc[idx + (self._max_samples_backward - back):idx + (self._max_samples_backward + forw)]	1✔
322	processed_data[key].append(window.to_numpy())	1✔
323
324	## Convert lists to numpy arrays
325	for key in processed_data:	1✔
326	processed_data[key] = np.stack(processed_data[key])	1✔
327	if json_inputs[key]['dim'] > 1:	1✔
328	processed_data[key] = np.array(processed_data[key].tolist(), dtype=np.float64)	1✔
329	if processed_data[key].ndim == 2: ## Add the sample dimension	1✔
330	processed_data[key] = np.expand_dims(processed_data[key], axis=-1)	1✔
331	if processed_data[key].ndim > 3:	1✔
NEW 332	processed_data[key] = np.squeeze(processed_data[key], axis=1)	×
333	num_of_samples[key] = processed_data[key].shape[0]	1✔
334
335	self._data[name] = processed_data	1✔
336
337	# Check dim of the samples
338	check(len(set(num_of_samples.values())) == 1, ValueError,	1✔
339	f"The number of the sample of the dataset {name} are not the same for all input in the dataset: {num_of_samples}")
340	self._num_of_samples[name] = num_of_samples[list(num_of_samples.keys())[0]]	1✔
341
342	## Set the Loaded flag to True
343	self._data_loaded = True	1✔
344	## Update the number of datasets loaded
345	self.__n_datasets = len(self._data.keys())	1✔
346	self.__datasets_loaded.add(name)	1✔
347	## Show the dataset
348	self.visualizer.showDataset(name=name)	1✔

tonegas / nnodely / 14319828903

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous