WassimTenachi / PhySO / #13

Committed 10 Jun 2024 12:28AM UTC coverage: 52.052% (-30.3%) from 82.385%

Build # #13

Build Type

push

coveralls-python

Committed by

WassimTenachi

Commit Message

Update requirements.txt

Run Details

2980 of 5725 relevant lines covered (52.05%)

0.52 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

24.39

/physo/physym/tests/dataset_UnitTest.py

import unittest
import numpy as np
import torch

from physo.physym import dataset
from physo.physym import library as Lib
from physo.physym.functions import data_conversion, data_conversion_inv


class TestDataset(unittest.TestCase):

    def test_Dataset_assertions(self):

        DEVICE = 'cpu'
        if torch.cuda.is_available():
            DEVICE = 'cuda'

        # DATA
        N = int(1e6)
        x = data_conversion  (np.linspace(0.04, 4, N)  ).to(DEVICE)
        v = data_conversion  (np.linspace(0.10, 10, N) ).to(DEVICE)
        t = data_conversion  (np.linspace(0.06, 6, N)  ).to(DEVICE)
        M  = data_conversion (1e6).to(DEVICE)
        c  = data_conversion (3e8).to(DEVICE)
        pi = data_conversion (np.pi).to(DEVICE)
        const1 = data_conversion (1.).to(DEVICE)


        X = torch.stack((x, v, t), axis=0)
        y_target = data_conversion  (np.linspace(0.01, 6, N)  ).to(DEVICE)
        y_weights = data_conversion (np.random.rand(N)  ).to(DEVICE)

        # LIBRARY CONFIG
        args_make_tokens = {
                        # operations
                        "op_names"             : "all",  # or ["mul", "neg", "inv", "sin"]
                        "use_protected_ops"    : False,
                        # input variables
                        "input_var_ids"        : {"x" : 0         , "v" : 1          , "t" : 2,        },
                        "input_var_units"      : {"x" : [1, 0, 0] , "v" : [1, -1, 0] , "t" : [0, 1, 0] },
                        "input_var_complexity" : {"x" : 0.        , "v" : 1.         , "t" : 0.,       },
                        # constants
                        "constants"            : {"pi" : pi        , "c" : c         , "M" : M         , "const1" : const1    },
                        "constants_units"      : {"pi" : [0, 0, 0] , "c" : [1, -1, 0], "M" : [0, 0, 1] , "const1" : [0, 0, 0] },
                        "constants_complexity" : {"pi" : 0.        , "c" : 0.        , "M" : 1.        , "const1" : 1.        },
                            }
        my_lib = Lib.Library(args_make_tokens = args_make_tokens,
                             superparent_units = [1, -2, 1], superparent_name = "y")

        def make_dataset_for_regular_SR(library, X, y, y_weights=1.):
            my_dataset = dataset.Dataset(multi_X=[X, ], multi_y=[y, ], multi_y_weights=[y_weights, ], library=library)
            return my_dataset

        # ------- TEST CREATION -------
        try:
            my_dataset = make_dataset_for_regular_SR(library=my_lib, X=X, y=y_target)
        except:
            self.fail("Dataset creation failed.")

        # ------- ASSERTIONS : FLOAT TYPE -------
        with self.assertRaises(AssertionError):
            my_dataset = make_dataset_for_regular_SR(library = my_lib, X = torch.ones((3, 100), dtype=int), y = torch.ones((100,)))
        with self.assertRaises(AssertionError):
            my_dataset = make_dataset_for_regular_SR(library = my_lib, X = torch.ones((3, 100)), y = torch.ones((100,), dtype=int))

        # ------- ASSERTIONS : SHAPE -------
        with self.assertRaises(AssertionError):
            my_dataset = make_dataset_for_regular_SR(library = my_lib, X = torch.ones((3, 100),), y = torch.ones((200,)))
        with self.assertRaises(AssertionError):
            my_dataset = make_dataset_for_regular_SR(library = my_lib, X = torch.ones((100, 3),), y = torch.ones((100,)))

        # ------- ASSERTIONS : VARIABLE ID -------
        with self.assertRaises(AssertionError):
            my_dataset = make_dataset_for_regular_SR(library=my_lib, X=torch.ones((1, 100), ), y=torch.ones((100,)))

        # ------- ASSERTIONS : ONE REALIZATION -------
        my_dataset = make_dataset_for_regular_SR(library=my_lib, X=X, y=y_target, y_weights=y_weights)
        self.assertTrue(my_dataset.n_realizations == 1)

        self.assertTrue((my_dataset.multi_X_flatten == X).all())
        self.assertTrue((my_dataset.multi_y_flatten == y_target).all())
        self.assertTrue((my_dataset.multi_y_weights_flatten == y_weights).all())

        return None

    def test_Dataset_assertions_multi_real(self):

        DEVICE = 'cpu'
        if torch.cuda.is_available():
            DEVICE = 'cuda'

        # -------------------------------------- Making fake datasets --------------------------------------

        multi_X = []
        for n_samples in [90, 100, 110]:
            x1 = np.linspace(0, 10, n_samples)
            x2 = np.linspace(0, 1 , n_samples)
            X = np.stack((x1,x2),axis=0)
            X = torch.tensor(X).to(DEVICE)
            multi_X.append(X)
        multi_X = multi_X*10                         # (n_realizations,) of (n_dim, [n_samples depends on dataset],)

        n_samples_per_dataset = np.array([X.shape[1] for X in multi_X])
        n_all_samples = n_samples_per_dataset.sum()
        n_realizations = len(multi_X)
        def flatten_multi_data (multi_data,):
            """
            Flattens multiple datasets into a single one for vectorized evaluation.
            Parameters
            ----------
            multi_data : list of length (n_realizations,) of torch.tensor of shape (..., [n_samples depends on dataset],)
                List of datasets to be flattened.
            Returns
            -------
            torch.tensor of shape (..., n_all_samples)
                Flattened data (n_all_samples = sum([n_samples depends on dataset])).
            """
            flattened_data = torch.cat(multi_data, axis=-1) # (..., n_all_samples)
            return flattened_data

        def unflatten_multi_data (flattened_data):
            """
            Unflattens a single data into multiple ones.
            Parameters
            ----------
            flattened_data : torch.tensor of shape (..., n_all_samples)
                Flattened data (n_all_samples = sum([n_samples depends on dataset])).
            Returns
            -------
            list of len (n_realizations,) of torch.tensor of shape (..., [n_samples depends on dataset],)
                Unflattened data.
            """
            return list(torch.split(flattened_data, n_samples_per_dataset.tolist(), dim=-1)) # (n_realizations,) of (..., [n_samples depends on dataset],)

        y_weights_per_dataset = np.array([0, 0.001, 1.0]*10) # Shows weights work
        #y_weights_per_dataset = np.array([1., 1., 1.]*10)
        multi_y_weights = [np.full(shape=(n_samples_per_dataset[i],), fill_value=y_weights_per_dataset[i]) for i in range (n_realizations)]
        multi_y_weights = [torch.tensor(y_weights).to(DEVICE) for y_weights in multi_y_weights]
        y_weights_flatten = flatten_multi_data(multi_y_weights)

        multi_X_flatten = flatten_multi_data(multi_X)  # (n_dim, n_all_samples)

        # Making fake ideal parameters
        # n_spe_params   = 3
        # n_class_params = 2
        random_shift       = (np.random.rand(n_realizations,3)-0.5)*0.8
        ideal_spe_params   = torch.tensor(np.array([1.123, 0.345, 0.116]) + random_shift) # (n_realizations, n_spe_params,)
        ideal_spe_params   = ideal_spe_params.transpose(0,1)                              # (n_spe_params, n_realizations)
        ideal_class_params = torch.tensor(np.array([1.389, 1.005]))                       # (n_class_params, )

        ideal_spe_params_flatten = torch.cat(
            [torch.tile(ideal_spe_params[:,i], (n_samples_per_dataset[i],1)).transpose(0,1) for i in range (n_realizations)], # (n_realizations,) of (n_spe_params, [n_samples depends on dataset],)
            axis = 1
        ) # (n_spe_params, n_all_samples)

        ideal_class_params_flatten = torch.tile(ideal_class_params, (n_all_samples,1)).transpose(0,1) # (n_class_params, n_all_samples)

        def trial_func (X, params, class_params):
            y = params[0]*torch.exp(-params[1]*X[0])*torch.cos(class_params[0]*X[0]+params[2]) + class_params[1]*X[1]
            return y

        y_ideals_flatten = trial_func (multi_X_flatten, ideal_spe_params_flatten, ideal_class_params_flatten) # (n_all_samples,)
        multi_y_target   = unflatten_multi_data(y_ideals_flatten)                                         # (n_realizations,) of (n_samples depends on dataset,)

        k0_init = [1.,1.,1.]*10 # np.full(n_realizations, 1.)
        # consts
        pi     = data_conversion (np.pi) .to(DEVICE)
        const1 = data_conversion (1.)    .to(DEVICE)

        # LIBRARY CONFIG
        args_make_tokens = {
                        # operations
                        "op_names"             : "all",
                        "use_protected_ops"    : True,
                        # input variables
                        "input_var_ids"        : {"t" : 0         , "l" : 1          },
                        "input_var_units"      : {"t" : [1, 0, 0] , "l" : [0, 1, 0]  },
                        "input_var_complexity" : {"t" : 0.        , "l" : 1.         },
                        # constants
                        "constants"            : {"pi" : pi        , "const1" : const1    },
                        "constants_units"      : {"pi" : [0, 0, 0] , "const1" : [0, 0, 0] },
                        "constants_complexity" : {"pi" : 1.        , "const1" : 1.        },
                        # free constants
                        "class_free_constants"            : {"c0"              , "c1"               },
                        "class_free_constants_init_val"   : {"c0" : 1.         , "c1"  : 1.         },
                        "class_free_constants_units"      : {"c0" : [-1, 0, 0] , "c1"  : [0, -1, 0] },
                        "class_free_constants_complexity" : {"c0" : 1.         , "c1"  : 1.         },
                        # free constants
                        "spe_free_constants"            : {"k0"              , "k1"               , "k2"               },
                        "spe_free_constants_init_val"   : {"k0" : k0_init    , "k1"  : 1.         , "k2"  : 1.         },
                        "spe_free_constants_units"      : {"k0" : [0, 0, 0]  , "k1"  : [-1, 0, 0] , "k2"  : [0, 0, 0]  },
                        "spe_free_constants_complexity" : {"k0" : 1.         , "k1"  : 1.         , "k2"  : 1.         },
                           }
        my_lib = Lib.Library(args_make_tokens = args_make_tokens,
                             superparent_units = [0, 0, 0], superparent_name = "y")

        n_realizations = len(multi_X)

        # ------- TEST CREATION -------
        try:
            my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, library=my_lib)
        except:
            self.fail("Dataset creation failed.")

        # ------- TESTS -------

        # Wrong number of realizations between X and y_target
        with self.assertRaises(AssertionError):
            my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target[:-1], library=my_lib)
        with self.assertRaises(AssertionError):
            my_dataset = dataset.Dataset(multi_X=multi_X[:-1], multi_y=multi_y_target, library=my_lib)

        # Sending data for one realization only / sending tensor type
        with self.assertRaises(AssertionError):
            my_dataset = dataset.Dataset(multi_X=multi_X[0], multi_y=multi_y_target[0], library=my_lib)

        # Test number of realizations
        my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, library=my_lib)
        self.assertEqual(my_dataset.n_realizations, n_realizations)

        # Test conversion to torch, when already torch tensors
        my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, library=my_lib)
        for i in range (n_realizations):
            self.assertTrue(torch.is_tensor(my_dataset.multi_X[i]))
            self.assertTrue(torch.is_tensor(my_dataset.multi_y[i]))

        # Test conversion to torch, when numpy arrays
        my_dataset = dataset.Dataset(multi_X=[X.cpu().numpy() for X in multi_X],
                                     multi_y=[y.cpu().numpy() for y in multi_y_target], library=my_lib)
        for i in range (n_realizations):
            self.assertTrue(torch.is_tensor(my_dataset.multi_X[i]))
            self.assertTrue(torch.is_tensor(my_dataset.multi_y[i]))

        # Wrong type
        with self.assertRaises(AssertionError):
            my_dataset = dataset.Dataset(multi_X=[X.cpu().numpy().astype(int) for X in multi_X], multi_y=multi_y_target,
                                         library=my_lib)
        with self.assertRaises(AssertionError):
            my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=[y.cpu().numpy().astype(int) for y in multi_y_target],
                                         library=my_lib)
        # Containing NaNs
        wrong_multi_X = [X.cpu().numpy().copy() for X in multi_X]
        wrong_multi_X [0][0, 0] = float(np.NAN)
        wrong_multi_y = [y.cpu().numpy().copy() for y in multi_y_target]
        wrong_multi_y [0][0] = float(np.NAN)
        with self.assertRaises(AssertionError):
            my_dataset = dataset.Dataset(multi_X=wrong_multi_X, multi_y=multi_y_target, library=my_lib)
        with self.assertRaises(AssertionError):
            my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=wrong_multi_y, library=my_lib)

        # Containing inconsistent n_dim
        wrong_multi_X = [X.cpu().numpy().copy() for X in multi_X]
        wrong_multi_X [0] = wrong_multi_X[0][:-1,:] # removing one dim in realization 0
        with self.assertRaises(AssertionError):
            my_dataset = dataset.Dataset(multi_X=wrong_multi_X, multi_y=multi_y_target, library=my_lib)

        # Containing too low dimension given library
        wrong_multi_X = [X.cpu().numpy().copy() for X in multi_X]
        wrong_multi_X = [np.stack([wrong_multi_X[i][0,:]]*1) for i in range(n_realizations)] # 1D per realization
        with self.assertRaises(AssertionError):
            my_dataset = dataset.Dataset(multi_X=wrong_multi_X, multi_y=multi_y_target, library=my_lib)

        # ------ Test weights as one single float ------
        # Creating dataset
        try:
            my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, multi_y_weights=2.0, library=my_lib)
        except:
            self.fail("Dataset creation failed.")
        # Tensor type and content
        for i, y_weights in enumerate(my_dataset.multi_y_weights):
            self.assertTrue(torch.is_tensor(y_weights))
            expected = torch.full_like(multi_y_target[i], fill_value=2.0)
            self.assertTrue((y_weights == expected).all())
        # NAN assertion
        with self.assertRaises(AssertionError):
            my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, multi_y_weights=np.NAN,
                                         library=my_lib)
        # Wrong type -> Converts to float in this case
        my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, multi_y_weights=int(2), library=my_lib)
        for i, y_weights in enumerate(my_dataset.multi_y_weights):
            self.assertTrue(torch.is_tensor(y_weights))
            expected = torch.full_like(multi_y_target[i], fill_value=2.0)
            self.assertTrue((y_weights == expected).all())

        # ------ Test weights as (n_realizations,) of floats ------
        # Creating dataset
        try:
            my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, multi_y_weights=y_weights_per_dataset,
                                         library=my_lib)
        except:
            self.fail("Dataset creation failed.")
        # Tensor type and content
        for i, y_weights in enumerate(my_dataset.multi_y_weights):
            self.assertTrue(torch.is_tensor(y_weights))
            expected = torch.full_like(multi_y_target[i], fill_value=y_weights_per_dataset[i])
            self.assertTrue((y_weights == expected).all())
        self.assertTrue(torch.is_tensor(my_dataset.multi_y_weights_flatten))
        self.assertTrue((my_dataset.multi_y_weights_flatten == y_weights_flatten ).all())
        # NAN assertion
        with self.assertRaises(AssertionError):
            wrong_y_weights_per_dataset = y_weights_per_dataset.copy()
            wrong_y_weights_per_dataset[0] = np.NAN
            my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target,
                                         multi_y_weights=wrong_y_weights_per_dataset, library=my_lib)
        # Wrong (n_realizations,) length
        with self.assertRaises(AssertionError):
            wrong_y_weights_per_dataset = y_weights_per_dataset.copy()[:-1]
            my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target,
                                         multi_y_weights=wrong_y_weights_per_dataset, library=my_lib)
        # Wrong type -> Converts to float in this case
        my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target,
                                     multi_y_weights=y_weights_per_dataset.astype(int), library=my_lib)
        for i, y_weights in enumerate(my_dataset.multi_y_weights):
            self.assertTrue(torch.is_tensor(y_weights))
            expected = torch.full_like(multi_y_target[i], fill_value=float(int(y_weights_per_dataset[i])))
            self.assertTrue((y_weights == expected).all())

        # ------ Test weights as (n_realizations,) of ([n_samples depends on dataset]) ------
        # Creating dataset
        try:
            my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, multi_y_weights=multi_y_weights,
                                         library=my_lib)
        except:
            self.fail("Dataset creation failed.")
        # Tensor type and content
        for i, y_weights in enumerate(my_dataset.multi_y_weights):
            self.assertTrue(torch.is_tensor(y_weights))
            expected = multi_y_weights[i]
            self.assertTrue((y_weights == expected).all())
        self.assertTrue(torch.is_tensor(my_dataset.multi_y_weights_flatten))
        self.assertTrue((my_dataset.multi_y_weights_flatten == y_weights_flatten ).all())
        # NAN assertion
        with self.assertRaises(AssertionError):
            wrong_multi_y_weights = [y.cpu().numpy().copy() for y in multi_y_weights]
            wrong_multi_y_weights[0][0] = float(np.NAN)
            my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, multi_y_weights=wrong_multi_y_weights,
                                         library=my_lib)
        # Wrong (n_realizations,) length
        with self.assertRaises(AssertionError):
            wrong_multi_y_weights = [y.cpu().numpy().copy() for y in multi_y_weights]
            wrong_multi_y_weights = wrong_multi_y_weights[:-1]
            my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, multi_y_weights=wrong_multi_y_weights,
                                         library=my_lib)
        # Inconsistent n_samples
        with self.assertRaises(AssertionError):
            wrong_multi_y_weights = [y.cpu().numpy().copy() for y in multi_y_weights]
            wrong_multi_y_weights[0] = wrong_multi_y_weights[0][:-1]
            my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, multi_y_weights=wrong_multi_y_weights,
                                         library=my_lib)
        # Conversion to torch
        my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target,
                                     multi_y_weights=[y.cpu().numpy() for y in multi_y_weights], library=my_lib)
        for i, y_weights in enumerate(my_dataset.multi_y_weights):
            self.assertTrue(torch.is_tensor(y_weights))
            expected = multi_y_weights[i]
            self.assertTrue((y_weights == expected).all())
        # Wrong type
        with self.assertRaises(AssertionError):
            my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target,
                                         multi_y_weights=[y.cpu().numpy().astype(int) for y in multi_y_weights],
                                         library=my_lib)

        # ----- Flattened values -----
        my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, multi_y_weights=multi_y_weights,
                                     library=my_lib)
        self.assertTrue(torch.is_tensor(my_dataset.multi_X_flatten))
        self.assertTrue(torch.is_tensor(my_dataset.multi_y_flatten))
        self.assertTrue(torch.is_tensor(my_dataset.multi_y_weights_flatten))

        self.assertTrue((my_dataset.multi_X_flatten         == multi_X_flatten   ).all())
        self.assertTrue((my_dataset.multi_y_flatten         == y_ideals_flatten  ).all())
        self.assertTrue((my_dataset.multi_y_weights_flatten == y_weights_flatten ).all())


        return None



if __name__ == '__main__':
    unittest.main(verbosity=2)

1	import unittest	1✔
2	import numpy as np	1✔
3	import torch	1✔
4
5	from physo.physym import dataset	1✔
6	from physo.physym import library as Lib	1✔
7	from physo.physym.functions import data_conversion, data_conversion_inv	1✔
8
9
10	class TestDataset(unittest.TestCase):	1✔
11
12	def test_Dataset_assertions(self):	1✔
13
14	DEVICE = 'cpu'	1✔
15	if torch.cuda.is_available():	1✔
16	DEVICE = 'cuda'	×
17
18	# DATA
19	N = int(1e6)	1✔
20	x = data_conversion (np.linspace(0.04, 4, N) ).to(DEVICE)	1✔
21	v = data_conversion (np.linspace(0.10, 10, N) ).to(DEVICE)	1✔
22	t = data_conversion (np.linspace(0.06, 6, N) ).to(DEVICE)	1✔
23	M = data_conversion (1e6).to(DEVICE)	1✔
24	c = data_conversion (3e8).to(DEVICE)	1✔
25	pi = data_conversion (np.pi).to(DEVICE)	1✔
26	const1 = data_conversion (1.).to(DEVICE)	1✔
27
28
29	X = torch.stack((x, v, t), axis=0)	1✔
30	y_target = data_conversion (np.linspace(0.01, 6, N) ).to(DEVICE)	1✔
31	y_weights = data_conversion (np.random.rand(N) ).to(DEVICE)	×
32
33	# LIBRARY CONFIG
34	args_make_tokens = {	1✔
35	# operations
36	"op_names" : "all", # or ["mul", "neg", "inv", "sin"]
37	"use_protected_ops" : False,
38	# input variables
39	"input_var_ids" : {"x" : 0 , "v" : 1 , "t" : 2, },
40	"input_var_units" : {"x" : [1, 0, 0] , "v" : [1, -1, 0] , "t" : [0, 1, 0] },
41	"input_var_complexity" : {"x" : 0. , "v" : 1. , "t" : 0., },
42	# constants
43	"constants" : {"pi" : pi , "c" : c , "M" : M , "const1" : const1 },
44	"constants_units" : {"pi" : [0, 0, 0] , "c" : [1, -1, 0], "M" : [0, 0, 1] , "const1" : [0, 0, 0] },
45	"constants_complexity" : {"pi" : 0. , "c" : 0. , "M" : 1. , "const1" : 1. },
46	}
47	my_lib = Lib.Library(args_make_tokens = args_make_tokens,	1✔
48	superparent_units = [1, -2, 1], superparent_name = "y")
49
50	def make_dataset_for_regular_SR(library, X, y, y_weights=1.):	1✔
51	my_dataset = dataset.Dataset(multi_X=[X, ], multi_y=[y, ], multi_y_weights=[y_weights, ], library=library)	1✔
52	return my_dataset	×
53
54	# ------- TEST CREATION -------
55	try:	×
56	my_dataset = make_dataset_for_regular_SR(library=my_lib, X=X, y=y_target)	1✔
57	except:	1✔
58	self.fail("Dataset creation failed.")	1✔
59
60	# ------- ASSERTIONS : FLOAT TYPE -------
61	with self.assertRaises(AssertionError):	×
62	my_dataset = make_dataset_for_regular_SR(library = my_lib, X = torch.ones((3, 100), dtype=int), y = torch.ones((100,)))	1✔
63	with self.assertRaises(AssertionError):	1✔
64	my_dataset = make_dataset_for_regular_SR(library = my_lib, X = torch.ones((3, 100)), y = torch.ones((100,), dtype=int))	1✔
65
66	# ------- ASSERTIONS : SHAPE -------
67	with self.assertRaises(AssertionError):	×
68	my_dataset = make_dataset_for_regular_SR(library = my_lib, X = torch.ones((3, 100),), y = torch.ones((200,)))	1✔
69	with self.assertRaises(AssertionError):	1✔
70	my_dataset = make_dataset_for_regular_SR(library = my_lib, X = torch.ones((100, 3),), y = torch.ones((100,)))	1✔
71
72	# ------- ASSERTIONS : VARIABLE ID -------
73	with self.assertRaises(AssertionError):	×
74	my_dataset = make_dataset_for_regular_SR(library=my_lib, X=torch.ones((1, 100), ), y=torch.ones((100,)))	1✔
75
76	# ------- ASSERTIONS : ONE REALIZATION -------
77	my_dataset = make_dataset_for_regular_SR(library=my_lib, X=X, y=y_target, y_weights=y_weights)	1✔
78	self.assertTrue(my_dataset.n_realizations == 1)	×
79
80	self.assertTrue((my_dataset.multi_X_flatten == X).all())	1✔
81	self.assertTrue((my_dataset.multi_y_flatten == y_target).all())	×
82	self.assertTrue((my_dataset.multi_y_weights_flatten == y_weights).all())	×
83
84	return None	1✔
85
86	def test_Dataset_assertions_multi_real(self):	1✔
87
88	DEVICE = 'cpu'	1✔
89	if torch.cuda.is_available():	1✔
90	DEVICE = 'cuda'	1✔
91
92	# -------------------------------------- Making fake datasets --------------------------------------
93
94	multi_X = []	1✔
95	for n_samples in [90, 100, 110]:	1✔
96	x1 = np.linspace(0, 10, n_samples)	×
97	x2 = np.linspace(0, 1 , n_samples)	×
98	X = np.stack((x1,x2),axis=0)	1✔
99	X = torch.tensor(X).to(DEVICE)	×
100	multi_X.append(X)	1✔
101	multi_X = multi_X*10 # (n_realizations,) of (n_dim, [n_samples depends on dataset],)	1✔
102
103	n_samples_per_dataset = np.array([X.shape[1] for X in multi_X])	1✔
104	n_all_samples = n_samples_per_dataset.sum()	1✔
105	n_realizations = len(multi_X)	1✔
106	def flatten_multi_data (multi_data,):	×
107	"""
108	Flattens multiple datasets into a single one for vectorized evaluation.
109	Parameters
110	----------
111	multi_data : list of length (n_realizations,) of torch.tensor of shape (..., [n_samples depends on dataset],)
112	List of datasets to be flattened.
113	Returns
114	-------
115	torch.tensor of shape (..., n_all_samples)
116	Flattened data (n_all_samples = sum([n_samples depends on dataset])).
117	"""
118	flattened_data = torch.cat(multi_data, axis=-1) # (..., n_all_samples)	×
119	return flattened_data	×
120
121	def unflatten_multi_data (flattened_data):	1✔
122	"""
123	Unflattens a single data into multiple ones.
124	Parameters
125	----------
126	flattened_data : torch.tensor of shape (..., n_all_samples)
127	Flattened data (n_all_samples = sum([n_samples depends on dataset])).
128	Returns
129	-------
130	list of len (n_realizations,) of torch.tensor of shape (..., [n_samples depends on dataset],)
131	Unflattened data.
132	"""
133	return list(torch.split(flattened_data, n_samples_per_dataset.tolist(), dim=-1)) # (n_realizations,) of (..., [n_samples depends on dataset],)	×
134
135	y_weights_per_dataset = np.array([0, 0.001, 1.0]*10) # Shows weights work	×
136	#y_weights_per_dataset = np.array([1., 1., 1.]*10)
137	multi_y_weights = [np.full(shape=(n_samples_per_dataset[i],), fill_value=y_weights_per_dataset[i]) for i in range (n_realizations)]	×
138	multi_y_weights = [torch.tensor(y_weights).to(DEVICE) for y_weights in multi_y_weights]	×
139	y_weights_flatten = flatten_multi_data(multi_y_weights)	×
140
141	multi_X_flatten = flatten_multi_data(multi_X) # (n_dim, n_all_samples)	×
142
143	# Making fake ideal parameters
144	# n_spe_params = 3
145	# n_class_params = 2
146	random_shift = (np.random.rand(n_realizations,3)-0.5)*0.8	×
147	ideal_spe_params = torch.tensor(np.array([1.123, 0.345, 0.116]) + random_shift) # (n_realizations, n_spe_params,)	×
148	ideal_spe_params = ideal_spe_params.transpose(0,1) # (n_spe_params, n_realizations)	×
149	ideal_class_params = torch.tensor(np.array([1.389, 1.005])) # (n_class_params, )	×
150
151	ideal_spe_params_flatten = torch.cat(	×
152	[torch.tile(ideal_spe_params[:,i], (n_samples_per_dataset[i],1)).transpose(0,1) for i in range (n_realizations)], # (n_realizations,) of (n_spe_params, [n_samples depends on dataset],)
153	axis = 1
154	) # (n_spe_params, n_all_samples)
155
156	ideal_class_params_flatten = torch.tile(ideal_class_params, (n_all_samples,1)).transpose(0,1) # (n_class_params, n_all_samples)	×
157
158	def trial_func (X, params, class_params):	×
159	y = params[0]torch.exp(-params[1]X[0])torch.cos(class_params[0]X[0]+params[2]) + class_params[1]*X[1]	×
160	return y	×
161
162	y_ideals_flatten = trial_func (multi_X_flatten, ideal_spe_params_flatten, ideal_class_params_flatten) # (n_all_samples,)	×
163	multi_y_target = unflatten_multi_data(y_ideals_flatten) # (n_realizations,) of (n_samples depends on dataset,)	×
164
165	k0_init = [1.,1.,1.]*10 # np.full(n_realizations, 1.)	×
166	# consts
167	pi = data_conversion (np.pi) .to(DEVICE)	×
168	const1 = data_conversion (1.) .to(DEVICE)	×
169
170	# LIBRARY CONFIG
171	args_make_tokens = {	×
172	# operations
173	"op_names" : "all",
174	"use_protected_ops" : True,
175	# input variables
176	"input_var_ids" : {"t" : 0 , "l" : 1 },
177	"input_var_units" : {"t" : [1, 0, 0] , "l" : [0, 1, 0] },
178	"input_var_complexity" : {"t" : 0. , "l" : 1. },
179	# constants
180	"constants" : {"pi" : pi , "const1" : const1 },
181	"constants_units" : {"pi" : [0, 0, 0] , "const1" : [0, 0, 0] },
182	"constants_complexity" : {"pi" : 1. , "const1" : 1. },
183	# free constants
184	"class_free_constants" : {"c0" , "c1" },
185	"class_free_constants_init_val" : {"c0" : 1. , "c1" : 1. },
186	"class_free_constants_units" : {"c0" : [-1, 0, 0] , "c1" : [0, -1, 0] },
187	"class_free_constants_complexity" : {"c0" : 1. , "c1" : 1. },
188	# free constants
189	"spe_free_constants" : {"k0" , "k1" , "k2" },
190	"spe_free_constants_init_val" : {"k0" : k0_init , "k1" : 1. , "k2" : 1. },
191	"spe_free_constants_units" : {"k0" : [0, 0, 0] , "k1" : [-1, 0, 0] , "k2" : [0, 0, 0] },
192	"spe_free_constants_complexity" : {"k0" : 1. , "k1" : 1. , "k2" : 1. },
193	}
194	my_lib = Lib.Library(args_make_tokens = args_make_tokens,	×
195	superparent_units = [0, 0, 0], superparent_name = "y")
196
197	n_realizations = len(multi_X)	×
198
199	# ------- TEST CREATION -------
200	try:	×
201	my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, library=my_lib)	×
202	except:	×
203	self.fail("Dataset creation failed.")	×
204
205	# ------- TESTS -------
206
207	# Wrong number of realizations between X and y_target
208	with self.assertRaises(AssertionError):	×
209	my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target[:-1], library=my_lib)	×
210	with self.assertRaises(AssertionError):	×
211	my_dataset = dataset.Dataset(multi_X=multi_X[:-1], multi_y=multi_y_target, library=my_lib)	×
212
213	# Sending data for one realization only / sending tensor type
214	with self.assertRaises(AssertionError):	×
215	my_dataset = dataset.Dataset(multi_X=multi_X[0], multi_y=multi_y_target[0], library=my_lib)	×
216
217	# Test number of realizations
218	my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, library=my_lib)	×
219	self.assertEqual(my_dataset.n_realizations, n_realizations)	×
220
221	# Test conversion to torch, when already torch tensors
222	my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, library=my_lib)	×
223	for i in range (n_realizations):	×
224	self.assertTrue(torch.is_tensor(my_dataset.multi_X[i]))	×
225	self.assertTrue(torch.is_tensor(my_dataset.multi_y[i]))	×
226
227	# Test conversion to torch, when numpy arrays
228	my_dataset = dataset.Dataset(multi_X=[X.cpu().numpy() for X in multi_X],	×
229	multi_y=[y.cpu().numpy() for y in multi_y_target], library=my_lib)
230	for i in range (n_realizations):	×
231	self.assertTrue(torch.is_tensor(my_dataset.multi_X[i]))	×
232	self.assertTrue(torch.is_tensor(my_dataset.multi_y[i]))	×
233
234	# Wrong type
235	with self.assertRaises(AssertionError):	×
236	my_dataset = dataset.Dataset(multi_X=[X.cpu().numpy().astype(int) for X in multi_X], multi_y=multi_y_target,	×
237	library=my_lib)
238	with self.assertRaises(AssertionError):	×
239	my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=[y.cpu().numpy().astype(int) for y in multi_y_target],	×
240	library=my_lib)
241	# Containing NaNs
242	wrong_multi_X = [X.cpu().numpy().copy() for X in multi_X]	×
243	wrong_multi_X [0][0, 0] = float(np.NAN)	×
244	wrong_multi_y = [y.cpu().numpy().copy() for y in multi_y_target]	×
245	wrong_multi_y [0][0] = float(np.NAN)	×
246	with self.assertRaises(AssertionError):	×
247	my_dataset = dataset.Dataset(multi_X=wrong_multi_X, multi_y=multi_y_target, library=my_lib)	×
248	with self.assertRaises(AssertionError):	×
249	my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=wrong_multi_y, library=my_lib)	×
250
251	# Containing inconsistent n_dim
252	wrong_multi_X = [X.cpu().numpy().copy() for X in multi_X]	×
253	wrong_multi_X [0] = wrong_multi_X[0][:-1,:] # removing one dim in realization 0	×
254	with self.assertRaises(AssertionError):	×
255	my_dataset = dataset.Dataset(multi_X=wrong_multi_X, multi_y=multi_y_target, library=my_lib)	×
256
257	# Containing too low dimension given library
258	wrong_multi_X = [X.cpu().numpy().copy() for X in multi_X]	×
259	wrong_multi_X = [np.stack([wrong_multi_X[i][0,:]]*1) for i in range(n_realizations)] # 1D per realization	×
260	with self.assertRaises(AssertionError):	×
261	my_dataset = dataset.Dataset(multi_X=wrong_multi_X, multi_y=multi_y_target, library=my_lib)	×
262
263	# ------ Test weights as one single float ------
264	# Creating dataset
265	try:	×
266	my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, multi_y_weights=2.0, library=my_lib)	×
267	except:	×
268	self.fail("Dataset creation failed.")	×
269	# Tensor type and content
270	for i, y_weights in enumerate(my_dataset.multi_y_weights):	×
271	self.assertTrue(torch.is_tensor(y_weights))	×
272	expected = torch.full_like(multi_y_target[i], fill_value=2.0)	×
273	self.assertTrue((y_weights == expected).all())	×
274	# NAN assertion
275	with self.assertRaises(AssertionError):	×
276	my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, multi_y_weights=np.NAN,	×
277	library=my_lib)
278	# Wrong type -> Converts to float in this case
279	my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, multi_y_weights=int(2), library=my_lib)	×
280	for i, y_weights in enumerate(my_dataset.multi_y_weights):	×
281	self.assertTrue(torch.is_tensor(y_weights))	×
282	expected = torch.full_like(multi_y_target[i], fill_value=2.0)	×
283	self.assertTrue((y_weights == expected).all())	×
284
285	# ------ Test weights as (n_realizations,) of floats ------
286	# Creating dataset
287	try:	×
288	my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, multi_y_weights=y_weights_per_dataset,	×
289	library=my_lib)
290	except:	×
291	self.fail("Dataset creation failed.")	×
292	# Tensor type and content
293	for i, y_weights in enumerate(my_dataset.multi_y_weights):	×
294	self.assertTrue(torch.is_tensor(y_weights))	×
295	expected = torch.full_like(multi_y_target[i], fill_value=y_weights_per_dataset[i])	×
296	self.assertTrue((y_weights == expected).all())	×
297	self.assertTrue(torch.is_tensor(my_dataset.multi_y_weights_flatten))	×
298	self.assertTrue((my_dataset.multi_y_weights_flatten == y_weights_flatten ).all())	×
299	# NAN assertion
300	with self.assertRaises(AssertionError):	×
301	wrong_y_weights_per_dataset = y_weights_per_dataset.copy()	×
302	wrong_y_weights_per_dataset[0] = np.NAN	×
303	my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target,	×
304	multi_y_weights=wrong_y_weights_per_dataset, library=my_lib)
305	# Wrong (n_realizations,) length
306	with self.assertRaises(AssertionError):	×
307	wrong_y_weights_per_dataset = y_weights_per_dataset.copy()[:-1]	×
308	my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target,	×
309	multi_y_weights=wrong_y_weights_per_dataset, library=my_lib)
310	# Wrong type -> Converts to float in this case
311	my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target,	×
312	multi_y_weights=y_weights_per_dataset.astype(int), library=my_lib)
313	for i, y_weights in enumerate(my_dataset.multi_y_weights):	×
314	self.assertTrue(torch.is_tensor(y_weights))	×
315	expected = torch.full_like(multi_y_target[i], fill_value=float(int(y_weights_per_dataset[i])))	×
316	self.assertTrue((y_weights == expected).all())	×
317
318	# ------ Test weights as (n_realizations,) of ([n_samples depends on dataset]) ------
319	# Creating dataset
320	try:	×
321	my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, multi_y_weights=multi_y_weights,	×
322	library=my_lib)
323	except:	×
324	self.fail("Dataset creation failed.")	×
325	# Tensor type and content
326	for i, y_weights in enumerate(my_dataset.multi_y_weights):	×
327	self.assertTrue(torch.is_tensor(y_weights))	×
328	expected = multi_y_weights[i]	×
329	self.assertTrue((y_weights == expected).all())	×
330	self.assertTrue(torch.is_tensor(my_dataset.multi_y_weights_flatten))	×
331	self.assertTrue((my_dataset.multi_y_weights_flatten == y_weights_flatten ).all())	×
332	# NAN assertion
333	with self.assertRaises(AssertionError):	×
334	wrong_multi_y_weights = [y.cpu().numpy().copy() for y in multi_y_weights]	×
335	wrong_multi_y_weights[0][0] = float(np.NAN)	×
336	my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, multi_y_weights=wrong_multi_y_weights,	×
337	library=my_lib)
338	# Wrong (n_realizations,) length
339	with self.assertRaises(AssertionError):	×
340	wrong_multi_y_weights = [y.cpu().numpy().copy() for y in multi_y_weights]	×
341	wrong_multi_y_weights = wrong_multi_y_weights[:-1]	×
342	my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, multi_y_weights=wrong_multi_y_weights,	×
343	library=my_lib)
344	# Inconsistent n_samples
345	with self.assertRaises(AssertionError):	×
346	wrong_multi_y_weights = [y.cpu().numpy().copy() for y in multi_y_weights]	×
347	wrong_multi_y_weights[0] = wrong_multi_y_weights[0][:-1]	×
348	my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, multi_y_weights=wrong_multi_y_weights,	×
349	library=my_lib)
350	# Conversion to torch
351	my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target,	×
352	multi_y_weights=[y.cpu().numpy() for y in multi_y_weights], library=my_lib)
353	for i, y_weights in enumerate(my_dataset.multi_y_weights):	×
354	self.assertTrue(torch.is_tensor(y_weights))	×
355	expected = multi_y_weights[i]	×
356	self.assertTrue((y_weights == expected).all())	×
357	# Wrong type
358	with self.assertRaises(AssertionError):	×
359	my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target,	×
360	multi_y_weights=[y.cpu().numpy().astype(int) for y in multi_y_weights],
361	library=my_lib)
362
363	# ----- Flattened values -----
364	my_dataset = dataset.Dataset(multi_X=multi_X, multi_y=multi_y_target, multi_y_weights=multi_y_weights,	×
365	library=my_lib)
366	self.assertTrue(torch.is_tensor(my_dataset.multi_X_flatten))	×
367	self.assertTrue(torch.is_tensor(my_dataset.multi_y_flatten))	×
368	self.assertTrue(torch.is_tensor(my_dataset.multi_y_weights_flatten))	×
369
370	self.assertTrue((my_dataset.multi_X_flatten == multi_X_flatten ).all())	×
371	self.assertTrue((my_dataset.multi_y_flatten == y_ideals_flatten ).all())	×
372	self.assertTrue((my_dataset.multi_y_weights_flatten == y_weights_flatten ).all())	×
373
374
375	return None	×
376
377
378
379	if __name__ == '__main__':	×
380	unittest.main(verbosity=2)	×

WassimTenachi / PhySO / #13

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous