• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

WassimTenachi / PhySO / #14

10 Jun 2024 12:28AM UTC coverage: 77.799% (+25.7%) from 52.052%
#14

push

coveralls-python

WassimTenachi
Update requirements.txt

6525 of 8387 relevant lines covered (77.8%)

0.78 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.58
/physo/benchmark/FeynmanDataset/FeynmanProblem.py
1
import pandas as pd
1✔
2
import numpy as np
1✔
3
import pathlib
1✔
4
import sympy
1✔
5
import matplotlib.pyplot as plt
1✔
6

7
# Internal imports
8
from physo.benchmark.utils import symbolic_utils as su
1✔
9

10
# Dataset paths
11
PARENT_FOLDER = pathlib.Path(__file__).parents[0]
1✔
12
PATH_FEYNMAN_EQS_CSV       = PARENT_FOLDER / "FeynmanEquations.csv"
1✔
13
PATH_FEYNMAN_EQS_BONUS_CSV = PARENT_FOLDER / "BonusEquations.csv"
1✔
14
PATH_UNITS_CSV             = PARENT_FOLDER / "units.csv"
1✔
15

16

17
# ---------------------------------------------------------------------------------------------------------------------
18
# --------------------------------------------------- LOADING CSVs  ---------------------------------------------------
19
# ---------------------------------------------------------------------------------------------------------------------
20

21
def load_feynman_bulk_equations_csv (filepath_eqs ="FeynmanEquations.csv"):
1✔
22
    """
23
    Loads FeynmanEquations.csv into a clean pd.DataFrame (corrects typos).
24
    Source file can be found here: https://space.mit.edu/home/tegmark/aifeynman.html
25
    Parameters
26
    ----------
27
    filepath_eqs : str
28
        Path to FeynmanEquations.csv.
29
    Returns
30
    -------
31
    eqs_feynman_df : pd.DataFrame
32
    """
33
    eqs_feynman_df = pd.read_csv(filepath_eqs, sep=",")
1✔
34
    # drop last row(s) of NaNs
35
    eqs_feynman_df = eqs_feynman_df[~eqs_feynman_df[eqs_feynman_df.columns[0]].isnull()]
1✔
36
    # Set types for int columns
37
    eqs_feynman_df = eqs_feynman_df.astype({'Number': int, '# variables': int})
1✔
38
    # Number of equations
39
    n_eqs = len(eqs_feynman_df)
1✔
40

41
    # ---- Correcting typos in the file ----
42
    # Equation II.37.1 takes 3 arguments not 6
43
    eqs_feynman_df.loc[eqs_feynman_df["Filename"] == "II.37.1",   "# variables"] = 3
1✔
44
    # Equation I.18.12 takes 3 arguments not 2
45
    eqs_feynman_df.loc[eqs_feynman_df["Filename"] == "I.18.12",   "# variables"] = 3
1✔
46
    # Equation I.18.14 takes 4 arguments not 3
47
    eqs_feynman_df.loc[eqs_feynman_df["Filename"] == "I.18.14",   "# variables"] = 4
1✔
48
    # Equation III.10.19 takes 4 arguments not 3
49
    eqs_feynman_df.loc[eqs_feynman_df["Filename"] == "III.10.19", "# variables"] = 4
1✔
50
    # Equation I.38.12 takes 4 arguments not 3
51
    eqs_feynman_df.loc[eqs_feynman_df["Filename"] == "I.38.12",   "# variables"] = 4
1✔
52
    # Equation III.19.51 takes 5 arguments not 4
53
    eqs_feynman_df.loc[eqs_feynman_df["Filename"] == "III.19.51", "# variables"] = 5
1✔
54

55
    # ---- Verifying number of variables for safety ----
56
    # Checking the number of variables declared in the file for each problem
57
    # Expected number of variables for each problem
58
    expected_n_vars = (~eqs_feynman_df[["v%i_name" % (i) for i in range(1, 11)]].isnull().to_numpy()).sum(axis=1)       # (n_eqs,)
1✔
59
    # Declared number of variables for each problem
60
    n_vars = eqs_feynman_df["# variables"].to_numpy()                                                                   # (n_eqs,)
1✔
61
    # Is nb of declared variable consistent with variables columns ?
62
    is_consistent = np.equal(expected_n_vars, n_vars)                                                                   # (n_eqs,)
1✔
63
    assert is_consistent.all(), "Nb. of filled variables columns not consistent with declared nb. of variables for " \
1✔
64
                                "problems:\n %s"%(str(eqs_feynman_df.loc[~is_consistent]))
65

66

67
    # ---- Making bulk and bonus datasets consistent ----
68

69
    # Input variable related columns names: 'v1_name', 'v1_low', 'v1_high', 'v2_name' etc.
70
    variables_columns_names = np.array([['v%i_name'%(i), 'v%i_low'%(i), 'v%i_high'%(i)] for i in range (1,11)]).flatten()
1✔
71
    # Essential equations related columns names: 'Output', 'Formula', '# variables', 'v1_name', 'v1_low', etc.
72
    essential_columns_names = ['Output', 'Formula', '# variables'] + variables_columns_names.tolist()
1✔
73

74
    # Adding columns
75
    # Adding set columns indicating from which file these equations come from (bulk file or bonus file)
76
    eqs_feynman_df["Set"] = "bulk"
1✔
77
    # Adding equation names as a column (I.6.2a etc.)
78
    eqs_feynman_df["Name"] = eqs_feynman_df["Filename"]
1✔
79

80
    # Columns to keep: 'Filename', 'Name', 'Set', 'Number', 'Output', 'Formula', '# variables', 'v1_name', 'v1_low',etc.
81
    columns_to_keep_names = ['Filename', 'Name', 'Set', 'Number'] + essential_columns_names
1✔
82
    # Selecting
83
    eqs_feynman_df = eqs_feynman_df[columns_to_keep_names]
1✔
84

85
    return eqs_feynman_df
1✔
86

87

88
def load_feynman_bonus_equations_csv (filepath_eqs_bonus = "BonusEquations.csv"):
1✔
89
    """
90
    Loads BonusEquations.csv into a clean pd.DataFrame (corrects typos).
91
    Source file can be found here: https://space.mit.edu/home/tegmark/aifeynman.html
92
    Parameters
93
    ----------
94
    filepath_eqs_bonus : str
95
        Path to BonusEquations.csv.
96
    Returns
97
    -------
98
    eqs_feynman_df : pd.DataFrame
99
    """
100
    eqs_feynman_df = pd.read_csv(filepath_eqs_bonus, sep=",")
1✔
101
    # drop last row(s) of NaNs
102
    eqs_feynman_df = eqs_feynman_df[~eqs_feynman_df[eqs_feynman_df.columns[0]].isnull()]
1✔
103
    # Set types for int columns
104
    eqs_feynman_df = eqs_feynman_df.astype({'Number': int, '# variables': int})
1✔
105
    # Number of equations
106
    n_eqs = len(eqs_feynman_df)
1✔
107

108
    # ---- Correcting typos in the file ----
109
    # Equation test_12 takes 5 arguments not 4
110
    eqs_feynman_df.loc[eqs_feynman_df["Filename"] == "test_12",   "# variables"] = 5
1✔
111
    # Equation test_13 takes 5 arguments not 4
112
    eqs_feynman_df.loc[eqs_feynman_df["Filename"] == "test_13",   "# variables"] = 5
1✔
113
    # Equation test_18 takes 5 arguments not 4
114
    eqs_feynman_df.loc[eqs_feynman_df["Filename"] == "test_18",   "# variables"] = 5
1✔
115
    # Equation test_19 takes 6 arguments not 5
116
    eqs_feynman_df.loc[eqs_feynman_df["Filename"] == "test_19",   "# variables"] = 6
1✔
117

118
    # ---- Verifying number of variables for safety ----
119
    # Checking the number of variables declared in the file for each problem
120
    # Expected number of variables for each problem
121
    expected_n_vars = (~eqs_feynman_df[["v%i_name" % (i) for i in range(1, 11)]].isnull().to_numpy()).sum(axis=1)       # (n_eqs,)
1✔
122
    # Declared number of variables for each problem
123
    n_vars = eqs_feynman_df["# variables"].to_numpy()                                                                   # (n_eqs,)
1✔
124
    # Is nb of declared variable consistent with variables columns ?
125
    is_consistent = np.equal(expected_n_vars, n_vars)                                                                   # (n_eqs,)
1✔
126
    assert is_consistent.all(), "Nb. of filled variables columns not consistent with declared nb. of variables for " \
1✔
127
                                "problems:\n %s"%(str(eqs_feynman_df.loc[~is_consistent]))
128

129
    # ---- Making bulk and bonus datasets consistent ----
130

131
    # Input variable related columns names: 'v1_name', 'v1_low', 'v1_high', 'v2_name' etc.
132
    variables_columns_names = np.array([['v%i_name'%(i), 'v%i_low'%(i), 'v%i_high'%(i)] for i in range (1,11)]).flatten()
1✔
133
    # Essential equations related columns names: 'Output', 'Formula', '# variables', 'v1_name', 'v1_low', etc.
134
    essential_columns_names = ['Output', 'Formula', '# variables'] + variables_columns_names.tolist()
1✔
135

136
    # Adding columns
137
    # Adding set columns indicating from which file these equations come from (bulk file or bonus file)
138
    eqs_feynman_df["Set"] = "bonus"
1✔
139

140
    # Columns to keep: 'Filename', 'Name', 'Set', 'Number', 'Output', 'Formula', '# variables', 'v1_name', 'v1_low',etc.
141
    columns_to_keep_names = ['Filename', 'Name', 'Set', 'Number'] + essential_columns_names
1✔
142
    # Selecting
143
    eqs_feynman_df = eqs_feynman_df[columns_to_keep_names]
1✔
144

145
    return eqs_feynman_df
1✔
146

147

148
def load_feynman_all_equations_csv (filepath_eqs ="FeynmanEquations.csv", filepath_eqs_bonus = "BonusEquations.csv"):
1✔
149
    """
150
    Loads FeynmanEquations.csv and BonusEquations.csv into a clean pd.DataFrame (corrects typos).
151
    Source files can be found here: https://space.mit.edu/home/tegmark/aifeynman.html
152
    Parameters
153
    ----------
154
    filepath_eqs : str
155
        Path to FeynmanEquations.csv.
156
    filepath_eqs_bonus : str
157
        Path to BonusEquations.csv.
158
    Returns
159
    -------
160
    eqs_feynman_df : pd.DataFrame
161
    """
162
    bulk_eqs_feynman_df  = load_feynman_bulk_equations_csv  (filepath_eqs       = filepath_eqs       )
1✔
163
    bonus_eqs_feynman_df = load_feynman_bonus_equations_csv (filepath_eqs_bonus = filepath_eqs_bonus )
1✔
164

165
    eqs_feynman_df = pd.concat((bulk_eqs_feynman_df, bonus_eqs_feynman_df),
1✔
166
                                # True so to get index going from 0 to 119 instead of 0 to 99 and then 0 to 19
167
                               ignore_index=True)
168
    return eqs_feynman_df
1✔
169

170

171
def load_feynman_units_csv (filepath = "units.csv"):
1✔
172
    """
173
    Loads units.csv into a clean pd.DataFrame and corrects known errors.
174
    Source file can be found here: https://space.mit.edu/home/tegmark/aifeynman.html
175
    Parameters
176
    ----------
177
    filepath : str
178
        Path to units.csv.
179
    Returns
180
    -------
181
    units_df : pd.DataFrame
182
    """
183
    units_df = pd.read_csv(filepath, sep=",")
1✔
184
    # drop last row(s) of NaNs
185
    units_df = units_df[~units_df[units_df.columns[0]].isnull()]
1✔
186
    # drop last column as it contains nothing
187
    units_df = units_df.iloc[:, :-1]
1✔
188

189
    # ---- Correcting errors in the file ----
190
    # Variable mu_drift should have units s.kg-1 not s-1.kg
191
    # This variable only appears in eq. I.43.16 : velocity = mu_drift.force = mu_drift.(q.E/d)
192
    # Hence m.s-1 = (s.kg-1).(m.s-2.kg) i.e. [mu_drift] = s.kg-1
193
    # (see https://www.feynmanlectures.caltech.edu/I_43.html)
194
    # NB: charge [q] = A.s = (m2.s-3.kg.V-1).s = m2.s-2.kg.V-1 so this is fine.
195
    units_df.loc[units_df["Variable"] == "mu_drift", ["s", "kg"]] = [1, -1]
1✔
196
    return units_df
1✔
197

198
EQS_FEYNMAN_DF = load_feynman_all_equations_csv (filepath_eqs       = PATH_FEYNMAN_EQS_CSV,
1✔
199
                                                 filepath_eqs_bonus = PATH_FEYNMAN_EQS_BONUS_CSV,
200
                                                 )
201
UNITS_DF       = load_feynman_units_csv     (filepath           = PATH_UNITS_CSV)
1✔
202

203
# Size of units vector
204
FEYN_UNITS_VECTOR_SIZE = UNITS_DF.shape[1] - 2
1✔
205

206
# Number of equations in dataset
207
N_EQS = EQS_FEYNMAN_DF.shape[0]
1✔
208

209
# ---------------------------------------------------------------------------------------------------------------------
210
# --------------------------------------------------- UNITS UTILS  ----------------------------------------------------
211
# ---------------------------------------------------------------------------------------------------------------------
212

213
# Gets units from variable name
214
def get_units (var_name):
1✔
215
    """
216
    Gets units of variable var_name. Example: get_units("kb")
217
    Parameters
218
    ----------
219
    var_name : str
220
        original variable name.
221
    Returns
222
    -------
223
    units : numpy.array of shape (FEYN_UNITS_VECTOR_SIZE,) of floats
224
        Units of variable.
225
    """
226
    assert not pd.isnull(var_name), "Can not get the units of %s as it is a null."%(var_name)
1✔
227
    try:
1✔
228
        units = UNITS_DF[UNITS_DF["Variable"] == var_name].to_numpy()[0][2:].astype(float)
1✔
229
    except:
×
230
        raise IndexError("Could not load units of %s"%(var_name))
×
231
    return units
1✔
232

233

234
# ---------------------------------------------------------------------------------------------------------------------
235
# ------------------------------------------------- FEYNMAN PROBLEM  --------------------------------------------------
236
# ---------------------------------------------------------------------------------------------------------------------
237
CONST_LOCAL_DICT = {"pi" : np.pi}
1✔
238

239
class FeynmanProblem:
1✔
240
    """
241
    Represents a single Feynman benchmark problem.
242
    (See https://arxiv.org/abs/1905.11481 and https://space.mit.edu/home/tegmark/aifeynman.html for details).
243
    Attributes
244
    ----------
245
    i_eq : int
246
        Equation number in the set of equations (e.g. 0 to 99 for bulk eqs and 100 to 119 for bonus eqs).
247
    eq_name : str
248
        Equation name in the set of equations (e.g. I.6.2a).
249
    n_vars : int
250
        Number of input variables.
251
    eq_df : pandas.core.series.Series
252
        Underlying pandas dataframe line of this equation.
253
    original_var_names : bool
254
        Using original variable names (e.g. theta, sigma etc.) and original output variable name (e.g. f, E etc.) if
255
        True, using x0, x1 ... as input variable names and y as output variable name otherwise.
256

257
    y_name_original : str
258
        Name of output variable as in the Feynman dataset.
259
    y_name : str
260
        Name of output variable.
261
    y_units : array_like of shape (FEYN_UNITS_VECTOR_SIZE,) of floats
262
        Units of output variables.
263

264
    X_names_original : array_like of shape (n_vars,) of str
265
        Names of input variables as in the Feynman dataset.
266
    X_names : array_like of shape (n_vars,) of str
267
        Names of input variables.
268
    X_lows : array_like of shape (n_vars,) of floats
269
        Lowest values taken by input variables.
270
    X_highs : array_like of shape (n_vars,) of floats
271
        Highest values taken by input variables.
272
    X_units :  array_like of shape (n_vars, FEYN_UNITS_VECTOR_SIZE,) of floats
273
        Units of input variables.
274

275
    formula_original : str
276
        Formula as in the Feynman dataset.
277
    X_sympy_symbols : array_like of shape (n_vars,) of sympy.Symbol
278
        Sympy symbols representing each input variables with assumptions (negative, positive etc.).
279
    sympy_X_symbols_dict : dict of {str : sympy.Symbol}
280
        Input variables names to sympy symbols (w assumptions), can be passed to sympy.parsing.sympy_parser.parse_expr
281
        as local_dict.
282
    local_dict : dict of {str : sympy.Symbol or float}
283
        Input variables names to sympy symbols (w assumptions) and constants (eg. pi : np.pi etc.), can be passed to
284
        sympy.parsing.sympy_parser.parse_expr as local_dict.
285
    formula_sympy : sympy expression
286
        Formula in sympy.
287
    formula_sympy_eval : sympy expression
288
        Formula in sympy with evaluated fixed constants (eg. pi -> 3.14... etc).
289
    formula_latex : str
290
        Formula in latex.
291
    """
292

293
    def __init__(self, i_eq = None, eq_name = None, original_var_names = False):
1✔
294
        """
295
        Loads a Feynman problem based on its number in the set or its equation name
296
        Parameters
297
        ----------
298
        i_eq : int
299
            Equation number in the whole set of equations (0 to 99 for bulk eqs and 100 to 119 for bonus eqs).
300
        eq_name : str
301
            Equation name in the set of equations (e.g. I.6.2a).
302
        original_var_names : bool
303
            Using original variable names (e.g. theta, sigma etc.) and original output variable name (e.g. f, E etc.) if
304
            True, using x0, x1 ... as input variable names and y as output variable name otherwise.
305
        """
306
        # Selecting equation line in dataframe
307
        if i_eq is not None:
1✔
308
            self.eq_df  = EQS_FEYNMAN_DF.iloc[i_eq]                                     # pandas.core.series.Series
1✔
309
        elif eq_name is not None:
1✔
310
            self.eq_df = EQS_FEYNMAN_DF[EQS_FEYNMAN_DF ["Name"] == eq_name ].iloc[0]    # pandas.core.series.Series
1✔
311
        else:
312
            raise ValueError("At least one of equation number (i_eq) or equation name (eq_name) should be specified to select a Feynman problem.")
×
313

314
        # Equation number (0 to 99 for bulk eqs and 100 to 119 for bonus eqs)
315
        self.i_eq = i_eq                                                     # int
1✔
316
        # Equation number in individual datasets (1 to 100 for bulk eqs and 1 to 20 for bonus eqs)
317
        self.i_eq_feyn = self.eq_df["Number"]                                # int
1✔
318
        # Code name of equation (eg. 'I.6.2a')
319
        self.eq_name = self.eq_df["Name"]                                    # str
1✔
320
        # Filename column in the Feynman dataset
321
        self.eq_filename = self.eq_df["Filename"]                            # str
1✔
322
        # SRBench style name
323
        self.SRBench_name = "feynman_" + self.eq_filename.replace('.', '_')  # str
1✔
324
        # Number of input variables
325
        self.n_vars = int(self.eq_df["# variables"])                         # int
1✔
326
        # Using x0, x1 ... and y names or original names (e.g. theta, sigma, f etc.)
327
        self.original_var_names = original_var_names                         # bool
1✔
328

329
        # ----------- y : output variable -----------
330
        # Name of output variable
331
        self.y_name_original = self.eq_df["Output"]                              # str
1✔
332
        # Name of output variable : y or original name (eg. f, E etc.)
333
        self.y_name = self.y_name_original if self.original_var_names else 'y'   # str
1✔
334
        # Units of output variables
335
        self.y_units = get_units(self.y_name_original)                           # (FEYN_UNITS_VECTOR_SIZE,)
1✔
336

337
        # ----------- X : input variables -----------
338
        # Utils id of input variables v1, v2 etc. in .csv
339
        var_ids_str = np.array( [ "v%i"%(i_var) for i_var in range(1, self.n_vars+1) ]   ).astype(str)                     # (n_vars,)
1✔
340
        # Names of input variables
341
        self.X_names_original = np.array( [ self.eq_df[ id + "_name" ] for id in var_ids_str  ]   ).astype(str)            # (n_vars,)
1✔
342
        X_names_xi_style      = np.array( [ "x%i"%(i_var) for i_var in range(self.n_vars)     ]   ).astype(str)            # (n_vars,)
1✔
343
        self.X_names          = self.X_names_original if self.original_var_names else X_names_xi_style                     # (n_vars,)
1✔
344
        # Lowest values taken by input variables
345
        self.X_lows           = np.array( [ self.eq_df[ id + "_low"  ] for id in var_ids_str ]    ).astype(float)          # (n_vars,)
1✔
346
        # Highest values taken by input variables
347
        self.X_highs          = np.array( [ self.eq_df[ id + "_high" ] for id in var_ids_str  ]   ).astype(float)          # (n_vars,)
1✔
348
        # Units of input variables
349
        self.X_units          = np.array( [ get_units(self.eq_df[ id + "_name" ]) for id in var_ids_str ] ).astype(float)  # (n_vars, FEYN_UNITS_VECTOR_SIZE,)
1✔
350

351
        # ----------- Formula -----------
352
        self.formula_original = self.eq_df["Formula"] # (str)
1✔
353

354
        # Input variables as sympy symbols
355
        self.X_sympy_symbols = []
1✔
356
        for i in range(self.n_vars):
1✔
357
            self.X_sympy_symbols.append (su.sympy_symbol_with_assumptions_from_range(name = self.X_names[i],
1✔
358
                                                                                     low  = self.X_lows [i],
359
                                                                                     high = self.X_highs[i],
360
                                                                                    ))
361

362
        # Input variables names to sympy symbols dict
363
        self.sympy_X_symbols_dict = {self.X_names[i] : self.X_sympy_symbols[i] for i in range(self.n_vars)}                     #  (n_vars,)
1✔
364
        # Dict to use to read original feynman dataset formula
365
        # Original names to symbols in usage (i.e. symbols having original names or not)
366
        # eg. 'theta' -> theta symbol etc. (if original_var_names=True) or 'theta' -> x0 symbol etc. (else)
367
        self.sympy_original_to_X_symbols_dict = {self.X_names_original[i] : self.X_sympy_symbols[i] for i in range(self.n_vars)} #  (n_vars,)
1✔
368
        # NB: if original_var_names=True, then self.sympy_X_symbols_dict = self.sympy_original_to_X_symbols_dict
369

370
        # Declaring input variables via local_dict to avoid confusion
371
        # Eg. So sympy knows that we are referring to gamma as a variable and not the function etc.
372
        # evaluate = False avoids eg. sin(theta) = 0 when theta domain = [0,5] ie. nonzero=False, but no need for this
373
        # if nonzero assumption is not used
374
        evaluate = False
1✔
375
        self.formula_sympy   = sympy.parsing.sympy_parser.parse_expr(self.formula_original,
1✔
376
                                                                     local_dict = self.sympy_original_to_X_symbols_dict,
377
                                                                     evaluate   = evaluate)
378
        # Local dict : dict of input variables (sympy_original_to_X_symbols_dict) and fixed constants (pi -> 3.14.. etc)
379
        self.local_dict = {}
1✔
380
        self.local_dict.update(self.sympy_original_to_X_symbols_dict)
1✔
381
        self.local_dict.update(CONST_LOCAL_DICT)
1✔
382
        self.formula_sympy_eval = sympy.parsing.sympy_parser.parse_expr(self.formula_original,
1✔
383
                                                                     local_dict = self.local_dict,
384
                                                                     evaluate   = evaluate)
385
        # Latex formula
386
        self.formula_latex   = sympy.printing.latex(self.formula_sympy)
1✔
387
        return None
1✔
388

389
    def target_function(self, X):
1✔
390
        """
391
        Evaluates X with target function.
392
        Parameters
393
        ----------
394
        X : numpy.array of shape (n_vars, ?,) of floats
395
        Returns
396
        -------
397
        y : numpy.array of shape (?,) of floats
398
        """
399
        # Getting sympy function
400
        f = sympy.lambdify(self.X_sympy_symbols, self.formula_sympy, "numpy")
1✔
401
        # Mapping between variables names and their data value
402
        mapping_var_name_to_X = {self.X_names[i]: X[i] for i in range(self.n_vars)}
1✔
403
        # Evaluation
404
        # Forcing float type so if some symbols are not evaluated as floats (eg. if some variables are not declared
405
        # properly in source file) resulting partly symbolic expressions will not be able to be converted to floats
406
        # and an error can be raised).
407
        # This is also useful for detecting issues such as sin(theta) = 0 because theta.is_nonzero = False -> the result
408
        # is just an int of float
409
        y = f(**mapping_var_name_to_X).astype(float)
1✔
410
        return y
1✔
411

412
    def generate_data_points (self, n_samples = 1_000_000):
1✔
413
        """
414
        Generates data points accordingly for this Feynman problem.
415
        Parameters
416
        ----------
417
        n_samples : int
418
            Number of samples to draw. By default, 1e6  as this is the number of data points for each problem in the
419
            files in https://space.mit.edu/home/tegmark/aifeynman.html
420
            Note that SRBench https://arxiv.org/abs/2107.14351 uses 1e5.
421
        Returns
422
        -------
423
        X, y : numpy.array of shape (n_vars, ?,) of floats, numpy.array of shape (?,) of floats
424
        """
425
        X = np.stack([np.random.uniform(self.X_lows[i_var], self.X_highs[i_var], n_samples) for i_var in range(self.n_vars)])
1✔
426
        y = self.target_function(X)
1✔
427
        return X,y
1✔
428

429
    def show_sample(self, n_samples = 100, do_show = True, save_path = None):
1✔
430
        X_array, y_array = self.generate_data_points(n_samples = n_samples)
×
431
        n_dim = X_array.shape[0]
×
432
        fig, ax = plt.subplots(n_dim, 1, figsize=(10, n_dim * 4))
×
433
        fig.suptitle(self.formula_original)
×
434
        for i in range(n_dim):
×
435
            curr_ax = ax if n_dim == 1 else ax[i]
×
436
            curr_ax.plot(X_array[i], y_array, 'k.', markersize=1.)
×
437
            curr_ax.set_xlabel("%s : %s" % (self.X_names[i], self.X_units[i]))
×
438
            curr_ax.set_ylabel("%s : %s" % (self.y_name    , self.y_units))
×
439
        if save_path is not None:
×
440
            fig.savefig(save_path)
×
441
        if do_show:
×
442
            plt.show()
×
443

444
    def compare_expression (self, trial_expr,
1✔
445
                            handle_trigo            = True,
446
                            prevent_zero_frac       = True,
447
                            prevent_inf_equivalence = True,
448
                            verbose=False):
449
        """
450
        Checks if trial_expr is symbolically equivalent to the target expression of this Feynman problem, following a
451
        similar methodology as SRBench (see https://github.com/cavalab/srbench).
452
        I.e, it is deemed equivalent if:
453
            - the symbolic difference simplifies to 0
454
            - OR the symbolic difference is a constant
455
            - OR the symbolic ratio simplifies to a constant
456
        Parameters
457
        ----------
458
        trial_expr : Sympy Expression
459
            Trial sympy expression with evaluated numeric free constants and assumptions regarding variables
460
            (positivity etc.) encoded in expression.
461
        handle_trigo : bool
462
            Tries replacing floats by rationalized factors of pi and simplify with that.
463
        prevent_zero_frac : bool
464
            If fraction = 0 does not consider expression equivalent.
465
        prevent_inf_equivalence: bool
466
            If symbolic error or fraction is infinite does not consider expression equivalent.
467
        verbose : bool
468
            Verbose.
469
        Returns
470
        -------
471
        is_equivalent, report : bool, dict
472
            Is the expression equivalent, A dict containing details about the equivalence SRBench style.
473
        """
474

475
        # Cleaning target expression like SRBench
476
        target_expr = su.clean_sympy_expr(self.formula_sympy_eval)
×
477

478
        is_equivalent, report = su.compare_expression (trial_expr  = trial_expr,
×
479
                                                       target_expr = target_expr,
480
                                                       handle_trigo            = handle_trigo,
481
                                                       prevent_zero_frac       = prevent_zero_frac,
482
                                                       prevent_inf_equivalence = prevent_inf_equivalence,
483
                                                       verbose                 = verbose,)
484

485
        return is_equivalent, report
×
486

487
    def trial_function (self, trial_expr, X):
1✔
488
        """
489
        Evaluates X on a trial expression mapping X to input variables names in sympy.
490
        Parameters
491
        ----------
492
        trial_expr : Sympy Expression
493
            Trial sympy expression with evaluated numeric free constants and assumptions regarding variables
494
            (positivity etc.) encoded in expression.
495
        X : numpy.array of shape (n_vars, ?,) of floats
496
        Returns
497
        -------
498
        y : numpy.array of shape (?,) of floats
499
        """
500
        # Getting sympy function
501
        f = sympy.lambdify(self.X_sympy_symbols, trial_expr, "numpy")
×
502
        # Mapping between variables names and their data value
503
        mapping_var_name_to_X = {self.X_names[i]: X[i] for i in range(self.n_vars)}
×
504
        # Evaluation
505
        # Forcing float type so if some symbols are not evaluated as floats (eg. if some variables are not declared
506
        # properly in source file) resulting partly symbolic expressions will not be able to be converted to floats
507
        # and an error can be raised).
508
        # This is also useful for detecting issues such as sin(theta) = 0 because theta.is_nonzero = False -> the result
509
        # is just an int of float
510
        y = f(**mapping_var_name_to_X)
×
511
        # forcing float type only if result is not already a single float (can happen if expression is a constant)
512
        if not isinstance(y, float):
×
513
            y = y.astype(float)
×
514
        return y
×
515

516
    def get_prefix_expression (self):
1✔
517
        """
518
        Gets the prefix expression of the formula.
519
        Returns
520
        -------
521
        dict :
522
            tokens_str : numpy.array of str
523
                List of tokens in the expression.
524
            arities : numpy.array of int
525
                List of arities of the tokens.
526
            tokens : numpy.array of sympy.core
527
                List of sympy tokens.
528
        """
529
        return su.sympy_to_prefix(self.formula_sympy)
×
530

531
    def __str__(self):
1✔
532
        return "FeynmanProblem : %s : %s\n%s"%(self.eq_filename, self.eq_name, str(self.formula_sympy))
×
533

534
    def __repr__(self):
1✔
535
        return str(self)
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc