19585701810

Committed 21 Nov 2025 10:53PM UTC coverage: 90.86% (-0.01%) from 90.87%

Build # 19585701810

Build Type

Pull #177

github

Committed by

web-flow

Commit Message

Merge 03b963eec into 21b0b5f92

Pull Request Pull Request #177: Robustifying TVR

Run Details

22 of 22 new or added lines in 4 files covered. (100.0%)

5 existing lines in 1 file now uncovered.

845 of 930 relevant lines covered (90.86%)

0.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.21

/pynumdiff/optimize/_optimize.py

"""Optimization"""
import scipy.optimize
import numpy as np
from itertools import product
from functools import partial
from warnings import filterwarnings, warn
from multiprocessing import Pool, Manager
from hashlib import sha1
from tqdm import tqdm

from ..utils import evaluate, utility
from ..finite_difference import finitediff, first_order, second_order, fourth_order
from ..smooth_finite_difference import kerneldiff, mediandiff, meandiff, gaussiandiff, friedrichsdiff, butterdiff
from ..polynomial_fit import polydiff, savgoldiff, splinediff
from ..basis_fit import spectraldiff, rbfdiff
from ..total_variation_regularization import tvrdiff, velocity, acceleration, jerk, iterative_velocity, smooth_acceleration
from ..kalman_smooth import rtsdiff, constant_velocity, constant_acceleration, constant_jerk, robustdiff
from ..linear_model import lineardiff

# Map from method -> (search_space, bounds_low_hi)
method_params_and_bounds = {
    kerneldiff: ({'kernel': {'mean', 'median', 'gaussian', 'friedrichs'},
             'window_size': [5, 15, 30, 50],
          'num_iterations': [1, 5, 10]},
            {'window_size': (1, 1e6),
          'num_iterations': (1, 100)}),
    meandiff: ({'window_size': [5, 15, 30, 50], # Deprecated method
             'num_iterations': [1, 5, 10]},
               {'window_size': (1, 1e6),
             'num_iterations': (1, 100)}),
    butterdiff: ({'filter_order': set(i for i in range(1,11)), # categorical to save us from doing double work by guessing between orders
                   'cutoff_freq': [0.0001, 0.001, 0.005, 0.01, 0.1, 0.5],
                'num_iterations': [1, 5, 10]},
                  {'cutoff_freq': (1e-4, 1-1e-2),
                'num_iterations': (1, 1000)}),
    finitediff: ({'num_iterations': [5, 10, 30, 50],
                           'order': {2, 4}}, # order is categorical here, because it can't be 3
                 {'num_iterations': (1, 1000)}),
    first_order: ({'num_iterations': [5, 10, 30, 50]}, # Separated because optimizing over this one is rare due to shifted answer
                  {'num_iterations': (1, 1000)}),
    polydiff: ({'step_size': [1, 2, 5],
                   'kernel': {'friedrichs', 'gaussian'}, # categorical
                   'degree': [2, 3, 5, 7],
              'window_size': [11, 31, 51, 91, 131]},
               {'step_size': (1, 100),
                   'degree': (1, 8),
              'window_size': (10, 1000)}),
    savgoldiff: ({'degree': [2, 3, 5, 7, 10],
             'window_size': [3, 10, 30, 50, 90, 130, 200, 300],
           'smoothing_win': [3, 10, 30, 50, 90, 130, 200, 300]},
                 {'degree': (1, 12),
             'window_size': (3, 1000),
           'smoothing_win': (3, 1000)}),
    splinediff: ({'degree': {3, 4, 5}, # categorical, because degree is whole number, and there aren't many choices
                       's': [0.2, 0.5, 0.75, 0.9, 1, 10],
          'num_iterations': [1, 5, 10]},
                      {'s': (1e-2, 1e4),
          'num_iterations': (1, 10)}),
    spectraldiff: ({'even_extension': {True, False}, # give categorical params in a set
                  'pad_to_zero_dxdt': {True, False},
                  'high_freq_cutoff': [1e-3, 5e-2, 1e-2, 5e-2, 1e-1]}, # give numerical params in a list to scipy.optimize over them
                 {'high_freq_cutoff': (1e-5, 1-1e-5)}),
    rbfdiff: ({'sigma': [1e-2, 1e-1, 1],
                'lmbd': [1e-3, 1e-2, 1e-1]},
              {'sigma': (1e-2, 1e3),
                'lmbd': (1e-3, 0.5)}),
    tvrdiff: ({'gamma': [1e-2, 1e-1, 1, 10, 100, 1000],
               'order': {1, 2, 3}}, # warning: order 1 hacks the loss function when tvgamma is used, tends to win but is usually suboptimal choice in terms of true RMSE
              {'gamma': (1e-4, 1e7)}),
    velocity: ({'gamma': [1e-2, 1e-1, 1, 10, 100, 1000]}, # Deprecated method
               {'gamma': (1e-4, 1e7)}),
    iterative_velocity: ({'scale': 'small', # Rare to optimize this one, because it's longer-running than convex version
                 'num_iterations': [1, 5, 10],
                          'gamma': [1e-2, 1e-1, 1, 10, 100, 1000]},
                {'num_iterations': (1, 100), # gets expensive with more iterations
                          'gamma': (1e-4, 1e7)}),
    smooth_acceleration: ({'gamma': [1e-2, 1e-1, 1, 10, 100, 1000],
                     'window_size': [3, 10, 30, 50, 90, 130]},
                          {'gamma': (1e-4, 1e7),
                     'window_size': (3, 1000)}),
    rtsdiff: ({'forwardbackward': {True, False},
                         'order': {1, 2, 3}, # for this few options, the optimization works better if this is categorical
                  'log_qr_ratio': [float(k) for k in range(-9, 10, 2)] + [12, 16]},
                 {'log_qr_ratio': (-10, 20)}), # qr_ratio is usually >>1
    constant_velocity: ({'q': [1e-8, 1e-4, 1e-1, 1e1, 1e4, 1e8], # Deprecated method
                         'r': [1e-8, 1e-4, 1e-1, 1e1, 1e4, 1e8],
           'forwardbackward': {True, False}},
                        {'q': (1e-10, 1e10),
                         'r': (1e-10, 1e10)}),
    robustdiff: ({'order': {1, 2, 3}, # warning: order 1 hacks the loss function when tvgamma is used, tends to win but is usually suboptimal choice in terms of true RMSE
                  'log_q': [1., 4, 7, 10, 13], # decimal after first entry ensure this is treated as float type
                  'log_r': [-1., 2, 5, 8, 11],
            'proc_huberM': [0., 2, 6], # 0 is l1 norm, 1.345 is Huber 95% "efficiency", 2 assumes about 5% outliers,
            'meas_huberM': [0., 2, 6]}, # 6 assumes basically no outliers per outlier_portion = (1 - norm.cdf(M))*2
                 {'log_q': (-5, 16),
                  'log_r': (-5, 16),
            'proc_huberM': (0, 6),
            'meas_huberM': (0, 6)}),
    lineardiff: ({'kernel': 'gaussian',
                   'order': 3,
                   'gamma': [1e-1, 1, 10, 100],
             'window_size': [10, 30, 50, 90, 130]},
                  {'order': (1, 5),
                   'gamma': (1e-3, 1000),
             'window_size': (15, 1000)})
} # Methods with nonunique parameter sets are aliased in the dictionary below
for method in [second_order, fourth_order]: # Deprecated, redundant methods
    method_params_and_bounds[method] = method_params_and_bounds[first_order]
for method in [mediandiff, gaussiandiff, friedrichsdiff]: # Deprecated methods
    method_params_and_bounds[method] = method_params_and_bounds[meandiff]
for method in [acceleration, jerk]: # Deprecated, redundant methods
    method_params_and_bounds[method] = method_params_and_bounds[velocity]
for method in [constant_acceleration, constant_jerk]: # Deprecated, redundant methods
    method_params_and_bounds[method] = method_params_and_bounds[constant_velocity]


# This function has to be at the top level for multiprocessing but is only used by optimize.
def _objective_function(point, func, x, dt, singleton_params, categorical_params, search_space_types,
    dxdt_truth, metric, tvgamma, padding, cache, huberM):
    """Function minimized by scipy.optimize.minimize, needs to have the form: (point, *args) -> float
    This is mildly complicated, because "point" controls the settings of a differentiation function, but
    the method may have numerical and non-numerical parameters, and all such parameters are now passed by
    keyword arguments. So the encoded `point` has to be decoded to dict.

    :param np.array point: a numerical vector scipy chooses to try in the objective function
    :param dict singleton_params: maps parameter names to singleton values
    :param dict categorical_params: maps parameter names to values
    :param dict search_space_types: maps parameter names to types, for turning float vector point into dict point_params
    :param multiprocessing.manager.dict cache: available across processes to save results and work
    Other params documented in `optimize`

    :return: float, cost of this objective at the point
    """
    # Short circuit if this hyperparam combo has already been queried, ~10% savings per #160
    key = sha1((''.join(f"{v:.3e}" for v in point) + # This hash is stable across processes. Takes bytes
               ''.join(str(v) for k,v in sorted(categorical_params.items()))).encode()).digest()
    if key in cache: return cache[key]

    # Query the differentiation method at this choice of hyperparameters
    point_params = {k:(v if search_space_types[k] == float else int(np.round(v)))
                        for k,v in zip(search_space_types, point)} # point -> dict
    # add back in singletons and categorical choices the Nelder-Mead isn't searching over
    try: x_hat, dxdt_hat = func(x, dt, **point_params, **singleton_params, **categorical_params) # estimate x and dxdt
    except np.linalg.LinAlgError: cache[key] = 1e10; return 1e10 # some methods can fail numerically

    # Evaluate estimate according to a loss function
    if dxdt_truth is not None:
        if metric == 'rmse': # minimize ||dxdt_hat - dxdt_truth||_2
            rmse_dxdt = evaluate.rmse(dxdt_truth, dxdt_hat, padding=padding)
            cache[key] = rmse_dxdt; return rmse_dxdt
        elif metric == 'error_correlation':
            ec = evaluate.error_correlation(dxdt_truth, dxdt_hat, padding=padding)
            cache[key] = ec; return ec
    else: # then minimize L(Phi) = (RMSE(trapz(dxdt_hat) + c - x) || sqrt{2*Mean(Huber((trapz(dxdt_hat) + c - x)/sigma, M))}*sigma) + gamma*TV(dxdt_hat)
        # It seems like we should be able to use x_hat rather than the trapz integral of dxdt_hat + constant, but the latter is more reliable,
        # because it accounts for the accuracy of the derivative directly, not through the generating algorithm's smooth signal estimate.
        rec_x_hat = utility.integrate_dxdt_hat(dxdt_hat, dt)
        rec_x_hat += utility.estimate_integration_constant(x, rec_x_hat, M=huberM)
        # rubust_rme(,M=inf) = rmse(), so just use the simpler function if M=inf
        cost = evaluate.rmse(x, rec_x_hat, padding=padding) if huberM == float('inf') else evaluate.robust_rme(x, rec_x_hat, padding=padding, M=huberM)
        cost += tvgamma*evaluate.total_variation(dxdt_hat, padding=padding)
        cache[key] = cost; return cost


def optimize(func, x, dt, dxdt_truth=None, tvgamma=1e-2, search_space_updates={}, metric='rmse',
    padding=0, opt_method='Nelder-Mead', maxiter=10, parallel=True, huberM=6):
    """Find the optimal hyperparameters for a given differentiation method.

    :param function func: differentiation method to optimize parameters for, e.g. kalman_smooth.rtsdiff
    :param np.array[float] x: data to differentiate
    :param float dt: step size
    :param np.array[float] dxdt_truth: actual time series of the derivative of x, if known
    :param float tvgamma: Only used if :code:`dxdt_truth` is *not* given. Regularization value used to select for parameters
                    that yield a smooth derivative. Larger value results in a smoother derivative.
    :param dict search_space_updates: Each method has a default search space of parameter settings, structured as
                    :code:`{param1:[numerical, values], param2:{categorical, values}, param3:value, ...}` (defined in
                    :code:`_optimize.py`). The Cartesian product of values are used as initial starting points in optimization.
                    If left None, the default search space is used, if :code:`{param1:[different,values]}`, these are applied.
    :param str metric: either :code:`'rmse'` or :code:`'error_correlation'`, only applies if :code:`dxdt_truth` is given
    :param int padding: number of steps to ignore at the beginning and end of the data series, or :code:`'auto'` to ignore
                    2.5% at each end. Larger value causes the optimization to emphasize the accuracy in the series middle.
    :param str opt_method: Optimization technique used by :code:`scipy.minimize`, the workhorse
    :param int maxiter: passed down to :code:`scipy.minimize`, maximum iterations
    :param bool parallel: whether to use multiple processes to optimize, typically faster for single optimizations.
                    For experiments, it is often a better use of resources to parallelize at that level, meaning
                    each must run in its own process, since spawned processes are not allowed to further spawn.
    :param float huberM: For ground-truth-less situation, if :math:`M < \\infty`, use outlier-robust, Huber-based accuracy
                    metric in objective. :math:`M` is in units akin to standard deviation (see :code:`evaluate.robust_rme`),
                    so transition from quadratic to linear regime for errors lying :math:`>\\!M\\sigma` away from mean error.

    :return: - **opt_params** (dict) -- best parameter settings for the differentation method
             - **opt_value** (float) -- lowest value found for objective function
    """
    if metric not in ['rmse','error_correlation']:
        raise ValueError('`metric` should either be `rmse` or `error_correlation`.')

    default_search_space, bounds = method_params_and_bounds[func]
    search_space = {**default_search_space, **search_space_updates} # applies updates without mutating default

    # No need to optimize over singletons, just pass them through
    singleton_params = {k:v for k,v in search_space.items() if not isinstance(v, (list, set))}

    # To handle categoricals, find their combination, and then pass each set individually
    categorical_params = {k for k,v in search_space.items() if isinstance(v, set)}
    categorical_combos = [dict(zip(categorical_params, combo)) for combo in
        product(*[search_space[k] for k in categorical_params])] # ends up [{}] if there are no categorical params

    # The Nelder-Mead's search space is the dimensions where multiple numerical options are given in a list
    search_space_types = {k:type(v[0]) for k,v in search_space.items() if isinstance(v, list)} # map param name -> type, for converting to and from point
    if any(v not in [float, int] for v in search_space_types.values()):
        raise ValueError("To optimize over categorical strings or bools, put them in a tuple, not a list.")
    # Cast ints to floats, and we're good to go
    starting_points = list(product(*[np.array(search_space[k]).astype(float) for k in search_space_types]))
    # The numerical space should have bounds
    bounds = [bounds[k] if k in bounds else # pass these to minimize(). It should respect them.
            None for k,v in search_space_types.items()] # None means no bound on a dimension

    results = []
    filterwarnings("ignore", '', UserWarning) # An extra filtering call, because some worker work can actually be done in the main process
    if parallel:
        with Manager() as manager:
            cache = manager.dict() # cache answers to avoid expensive repeat queries
            with Pool(initializer=filterwarnings, initargs=["ignore", '', UserWarning]) as pool: # The heavy lifting
                for categorical_combo in categorical_combos:
                    # wrap the objective and scipy.optimize.minimize to pass kwargs and a host of other things that remain the same
                    _obj_fun = partial(_objective_function, func=func, x=x, dt=dt, singleton_params=singleton_params,
                        categorical_params=categorical_combo, search_space_types=search_space_types, dxdt_truth=dxdt_truth,
                        metric=metric, tvgamma=tvgamma, padding=padding, cache=cache, huberM=huberM)
                    _minimize = partial(scipy.optimize.minimize, _obj_fun, method=opt_method, bounds=bounds, options={'maxiter':maxiter})
                    results += pool.map(_minimize, starting_points) # returns a bunch of OptimizeResult objects
    else: # For experiments, where I want to parallelize optimization calls and am not allowed to have each spawn further processes
        cache = dict()
        for categorical_combo in categorical_combos:
            _obj_fun = partial(_objective_function, func=func, x=x, dt=dt, singleton_params=singleton_params,
                categorical_params=categorical_combo, search_space_types=search_space_types, dxdt_truth=dxdt_truth,
                metric=metric, tvgamma=tvgamma, padding=padding, cache=cache, huberM=huberM)
            _minimize = partial(scipy.optimize.minimize, _obj_fun, method=opt_method, bounds=bounds, options={'maxiter':maxiter})
            results += [_minimize(p) for p in starting_points]

    opt_idx = np.nanargmin([r.fun for r in results])
    opt_point = results[opt_idx].x
    # results are going to be floats, but that may not be allowed, so convert back to a dict
    opt_params = {k:(v if search_space_types[k] == float else 
                    int(np.round(v)) if search_space_types[k] == int else
                    v > 0.5) for k,v in zip(search_space_types, opt_point)}
    opt_params.update(singleton_params)
    opt_params.update(categorical_combos[opt_idx//len(starting_points)]) # there are |starting_points| results for each combo

    return opt_params, results[opt_idx].fun


def suggest_method(x, dt, dxdt_truth=None, cutoff_frequency=None):
    """This is meant as an easy-to-use, automatic way for users with some time on their hands to determine
    a good method and settings for their data. It calls the optimizer over (almost) all methods in the repo
    using default search spaces defined at the top of the :code:`pynumdiff/optimize/_optimize.py` file.
    This routine will take a few minutes to run, especially due to `robustdiff`.
    
    Excluded:
        - ``first_order``, because iterating causes drift
        - ``lineardiff``, ``iterative_velocity``, and ``jerk_sliding``, because they either take too long,
          can be fragile, or tend not to do best
        - all ``cvxpy``-based methods if it is not installed
        - ``velocity`` because it tends to not be best but dominates the optimization process by directly
          optimizing the second term of the metric :math:`L = \\text{RMSE} \\Big( \\text{trapz}(\\mathbf{
          \\hat{\\dot{x}}}(\\Phi)) + \\mu, \\mathbf{y} \\Big) + \\gamma \\Big({TV}\\big(\\mathbf{\\hat{
          \\dot{x}}}(\\Phi)\\big)\\Big)`

    :param np.array[float] x: data to differentiate
    :param float dt: step size, because most methods are not designed to work with variable step sizes
    :param np.array[float] dxdt_truth: if known, you can pass true derivative values; otherwise you must use
            :code: `cutoff_frequency`
    :param float cutoff_frequency: in Hz, the highest dominant frequency of interest in the signal,
            used to find parameter :math:`\\gamma` for regularization of the optimization process
            in the absence of ground truth. See https://ieeexplore.ieee.org/document/9241009.
            Estimate by (a) counting real number of peaks per second in the data, (b) looking at
            power spectrum and choosing a cutoff, or (c) making an educated guess.

    :return: tuple[callable, dict] of\n
            - **method** -- a reference to the function handle of the differentiation method that worked best
            - **opt_params** -- optimal parameter settings for the differentation method
    """
    tvgamma = None
    if dxdt_truth is None: # parameter checking
        if cutoff_frequency is None:
            raise ValueError('Either dxdt_truth or cutoff_frequency must be provided.')
        tvgamma = np.exp(-1.6*np.log(cutoff_frequency) -0.71*np.log(dt) - 5.1) # See https://ieeexplore.ieee.org/document/9241009

    methods = [meandiff, mediandiff, gaussiandiff, friedrichsdiff, butterdiff,
        polydiff, savgoldiff, splinediff, spectraldiff, rbfdiff, finitediff, rtsdiff]
    try: # optionally skip some methods
        import cvxpy
        methods += [tvrdiff, smooth_acceleration, robustdiff]
    except ImportError:
        warn("CVXPY not installed, skipping tvrdiff, smooth_acceleration, and robustdiff")

    best_value = float('inf') # core loop
    for func in tqdm(methods):
        p, v = optimize(func, x, dt, dxdt_truth=dxdt_truth, tvgamma=tvgamma, search_space_updates=(
            {'order':{2,3}} if func in [tvrdiff, robustdiff] else {})) # convex-based with order 1 hack the cost function
        if v < best_value:
            method = func
            best_value = v
            opt_params = p

    return method, opt_params

1	"""Optimization"""
2	import scipy.optimize	1✔
3	import numpy as np	1✔
4	from itertools import product	1✔
5	from functools import partial	1✔
6	from warnings import filterwarnings, warn	1✔
7	from multiprocessing import Pool, Manager	1✔
8	from hashlib import sha1	1✔
9	from tqdm import tqdm	1✔
10
11	from ..utils import evaluate, utility	1✔
12	from ..finite_difference import finitediff, first_order, second_order, fourth_order	1✔
13	from ..smooth_finite_difference import kerneldiff, mediandiff, meandiff, gaussiandiff, friedrichsdiff, butterdiff	1✔
14	from ..polynomial_fit import polydiff, savgoldiff, splinediff	1✔
15	from ..basis_fit import spectraldiff, rbfdiff	1✔
16	from ..total_variation_regularization import tvrdiff, velocity, acceleration, jerk, iterative_velocity, smooth_acceleration	1✔
17	from ..kalman_smooth import rtsdiff, constant_velocity, constant_acceleration, constant_jerk, robustdiff	1✔
18	from ..linear_model import lineardiff	1✔
19
20	# Map from method -> (search_space, bounds_low_hi)
21	method_params_and_bounds = {	1✔
22	kerneldiff: ({'kernel': {'mean', 'median', 'gaussian', 'friedrichs'},
23	'window_size': [5, 15, 30, 50],
24	'num_iterations': [1, 5, 10]},
25	{'window_size': (1, 1e6),
26	'num_iterations': (1, 100)}),
27	meandiff: ({'window_size': [5, 15, 30, 50], # Deprecated method
28	'num_iterations': [1, 5, 10]},
29	{'window_size': (1, 1e6),
30	'num_iterations': (1, 100)}),
31	butterdiff: ({'filter_order': set(i for i in range(1,11)), # categorical to save us from doing double work by guessing between orders
32	'cutoff_freq': [0.0001, 0.001, 0.005, 0.01, 0.1, 0.5],
33	'num_iterations': [1, 5, 10]},
34	{'cutoff_freq': (1e-4, 1-1e-2),
35	'num_iterations': (1, 1000)}),
36	finitediff: ({'num_iterations': [5, 10, 30, 50],
37	'order': {2, 4}}, # order is categorical here, because it can't be 3
38	{'num_iterations': (1, 1000)}),
39	first_order: ({'num_iterations': [5, 10, 30, 50]}, # Separated because optimizing over this one is rare due to shifted answer
40	{'num_iterations': (1, 1000)}),
41	polydiff: ({'step_size': [1, 2, 5],
42	'kernel': {'friedrichs', 'gaussian'}, # categorical
43	'degree': [2, 3, 5, 7],
44	'window_size': [11, 31, 51, 91, 131]},
45	{'step_size': (1, 100),
46	'degree': (1, 8),
47	'window_size': (10, 1000)}),
48	savgoldiff: ({'degree': [2, 3, 5, 7, 10],
49	'window_size': [3, 10, 30, 50, 90, 130, 200, 300],
50	'smoothing_win': [3, 10, 30, 50, 90, 130, 200, 300]},
51	{'degree': (1, 12),
52	'window_size': (3, 1000),
53	'smoothing_win': (3, 1000)}),
54	splinediff: ({'degree': {3, 4, 5}, # categorical, because degree is whole number, and there aren't many choices
55	's': [0.2, 0.5, 0.75, 0.9, 1, 10],
56	'num_iterations': [1, 5, 10]},
57	{'s': (1e-2, 1e4),
58	'num_iterations': (1, 10)}),
59	spectraldiff: ({'even_extension': {True, False}, # give categorical params in a set
60	'pad_to_zero_dxdt': {True, False},
61	'high_freq_cutoff': [1e-3, 5e-2, 1e-2, 5e-2, 1e-1]}, # give numerical params in a list to scipy.optimize over them
62	{'high_freq_cutoff': (1e-5, 1-1e-5)}),
63	rbfdiff: ({'sigma': [1e-2, 1e-1, 1],
64	'lmbd': [1e-3, 1e-2, 1e-1]},
65	{'sigma': (1e-2, 1e3),
66	'lmbd': (1e-3, 0.5)}),
67	tvrdiff: ({'gamma': [1e-2, 1e-1, 1, 10, 100, 1000],
68	'order': {1, 2, 3}}, # warning: order 1 hacks the loss function when tvgamma is used, tends to win but is usually suboptimal choice in terms of true RMSE
69	{'gamma': (1e-4, 1e7)}),
70	velocity: ({'gamma': [1e-2, 1e-1, 1, 10, 100, 1000]}, # Deprecated method
71	{'gamma': (1e-4, 1e7)}),
72	iterative_velocity: ({'scale': 'small', # Rare to optimize this one, because it's longer-running than convex version
73	'num_iterations': [1, 5, 10],
74	'gamma': [1e-2, 1e-1, 1, 10, 100, 1000]},
75	{'num_iterations': (1, 100), # gets expensive with more iterations
76	'gamma': (1e-4, 1e7)}),
77	smooth_acceleration: ({'gamma': [1e-2, 1e-1, 1, 10, 100, 1000],
78	'window_size': [3, 10, 30, 50, 90, 130]},
79	{'gamma': (1e-4, 1e7),
80	'window_size': (3, 1000)}),
81	rtsdiff: ({'forwardbackward': {True, False},
82	'order': {1, 2, 3}, # for this few options, the optimization works better if this is categorical
83	'log_qr_ratio': [float(k) for k in range(-9, 10, 2)] + [12, 16]},
84	{'log_qr_ratio': (-10, 20)}), # qr_ratio is usually >>1
85	constant_velocity: ({'q': [1e-8, 1e-4, 1e-1, 1e1, 1e4, 1e8], # Deprecated method
86	'r': [1e-8, 1e-4, 1e-1, 1e1, 1e4, 1e8],
87	'forwardbackward': {True, False}},
88	{'q': (1e-10, 1e10),
89	'r': (1e-10, 1e10)}),
90	robustdiff: ({'order': {1, 2, 3}, # warning: order 1 hacks the loss function when tvgamma is used, tends to win but is usually suboptimal choice in terms of true RMSE
91	'log_q': [1., 4, 7, 10, 13], # decimal after first entry ensure this is treated as float type
92	'log_r': [-1., 2, 5, 8, 11],
93	'proc_huberM': [0., 2, 6], # 0 is l1 norm, 1.345 is Huber 95% "efficiency", 2 assumes about 5% outliers,
94	'meas_huberM': [0., 2, 6]}, # 6 assumes basically no outliers per outlier_portion = (1 - norm.cdf(M))*2
95	{'log_q': (-5, 16),
96	'log_r': (-5, 16),
97	'proc_huberM': (0, 6),
98	'meas_huberM': (0, 6)}),
99	lineardiff: ({'kernel': 'gaussian',
100	'order': 3,
101	'gamma': [1e-1, 1, 10, 100],
102	'window_size': [10, 30, 50, 90, 130]},
103	{'order': (1, 5),
104	'gamma': (1e-3, 1000),
105	'window_size': (15, 1000)})
106	} # Methods with nonunique parameter sets are aliased in the dictionary below
107	for method in [second_order, fourth_order]: # Deprecated, redundant methods	1✔
108	method_params_and_bounds[method] = method_params_and_bounds[first_order]	1✔
109	for method in [mediandiff, gaussiandiff, friedrichsdiff]: # Deprecated methods	1✔
110	method_params_and_bounds[method] = method_params_and_bounds[meandiff]	1✔
111	for method in [acceleration, jerk]: # Deprecated, redundant methods	1✔
112	method_params_and_bounds[method] = method_params_and_bounds[velocity]	1✔
113	for method in [constant_acceleration, constant_jerk]: # Deprecated, redundant methods	1✔
114	method_params_and_bounds[method] = method_params_and_bounds[constant_velocity]	1✔
115
116
117	# This function has to be at the top level for multiprocessing but is only used by optimize.
118	def _objective_function(point, func, x, dt, singleton_params, categorical_params, search_space_types,	1✔
119	dxdt_truth, metric, tvgamma, padding, cache, huberM):
120	"""Function minimized by scipy.optimize.minimize, needs to have the form: (point, *args) -> float
121	This is mildly complicated, because "point" controls the settings of a differentiation function, but
122	the method may have numerical and non-numerical parameters, and all such parameters are now passed by
123	keyword arguments. So the encoded `point` has to be decoded to dict.
124
125	:param np.array point: a numerical vector scipy chooses to try in the objective function
126	:param dict singleton_params: maps parameter names to singleton values
127	:param dict categorical_params: maps parameter names to values
128	:param dict search_space_types: maps parameter names to types, for turning float vector point into dict point_params
129	:param multiprocessing.manager.dict cache: available across processes to save results and work
130	Other params documented in `optimize`
131
132	:return: float, cost of this objective at the point
133	"""
134	# Short circuit if this hyperparam combo has already been queried, ~10% savings per #160
135	key = sha1((''.join(f"{v:.3e}" for v in point) + # This hash is stable across processes. Takes bytes	1✔
136	''.join(str(v) for k,v in sorted(categorical_params.items()))).encode()).digest()
137	if key in cache: return cache[key]	1✔
138
139	# Query the differentiation method at this choice of hyperparameters
140	point_params = {k:(v if search_space_types[k] == float else int(np.round(v)))	1✔
141	for k,v in zip(search_space_types, point)} # point -> dict
142	# add back in singletons and categorical choices the Nelder-Mead isn't searching over
143	try: x_hat, dxdt_hat = func(x, dt, point_params, singleton_params, **categorical_params) # estimate x and dxdt	1✔
UNCOV 144	except np.linalg.LinAlgError: cache[key] = 1e10; return 1e10 # some methods can fail numerically	×
145
146	# Evaluate estimate according to a loss function
147	if dxdt_truth is not None:	1✔
148	if metric == 'rmse': # minimize \|\|dxdt_hat - dxdt_truth\|\|_2	1✔
149	rmse_dxdt = evaluate.rmse(dxdt_truth, dxdt_hat, padding=padding)	1✔
150	cache[key] = rmse_dxdt; return rmse_dxdt	1✔
UNCOV 151	elif metric == 'error_correlation':	×
UNCOV 152	ec = evaluate.error_correlation(dxdt_truth, dxdt_hat, padding=padding)	×
153	cache[key] = ec; return ec	×
154	else: # then minimize L(Phi) = (RMSE(trapz(dxdt_hat) + c - x) \|\| sqrt{2Mean(Huber((trapz(dxdt_hat) + c - x)/sigma, M))}sigma) + gamma*TV(dxdt_hat)
155	# It seems like we should be able to use x_hat rather than the trapz integral of dxdt_hat + constant, but the latter is more reliable,
156	# because it accounts for the accuracy of the derivative directly, not through the generating algorithm's smooth signal estimate.
157	rec_x_hat = utility.integrate_dxdt_hat(dxdt_hat, dt)	1✔
158	rec_x_hat += utility.estimate_integration_constant(x, rec_x_hat, M=huberM)	1✔
159	# rubust_rme(,M=inf) = rmse(), so just use the simpler function if M=inf
160	cost = evaluate.rmse(x, rec_x_hat, padding=padding) if huberM == float('inf') else evaluate.robust_rme(x, rec_x_hat, padding=padding, M=huberM)	1✔
161	cost += tvgamma*evaluate.total_variation(dxdt_hat, padding=padding)	1✔
162	cache[key] = cost; return cost	1✔
163
164
165	def optimize(func, x, dt, dxdt_truth=None, tvgamma=1e-2, search_space_updates={}, metric='rmse',	1✔
166	padding=0, opt_method='Nelder-Mead', maxiter=10, parallel=True, huberM=6):
167	"""Find the optimal hyperparameters for a given differentiation method.
168
169	:param function func: differentiation method to optimize parameters for, e.g. kalman_smooth.rtsdiff
170	:param np.array[float] x: data to differentiate
171	:param float dt: step size
172	:param np.array[float] dxdt_truth: actual time series of the derivative of x, if known
173	:param float tvgamma: Only used if :code:`dxdt_truth` is not given. Regularization value used to select for parameters
174	that yield a smooth derivative. Larger value results in a smoother derivative.
175	:param dict search_space_updates: Each method has a default search space of parameter settings, structured as
176	:code:`{param1:[numerical, values], param2:{categorical, values}, param3:value, ...}` (defined in
177	:code:`_optimize.py`). The Cartesian product of values are used as initial starting points in optimization.
178	If left None, the default search space is used, if :code:`{param1:[different,values]}`, these are applied.
179	:param str metric: either :code:`'rmse'` or :code:`'error_correlation'`, only applies if :code:`dxdt_truth` is given
180	:param int padding: number of steps to ignore at the beginning and end of the data series, or :code:`'auto'` to ignore
181	2.5% at each end. Larger value causes the optimization to emphasize the accuracy in the series middle.
182	:param str opt_method: Optimization technique used by :code:`scipy.minimize`, the workhorse
183	:param int maxiter: passed down to :code:`scipy.minimize`, maximum iterations
184	:param bool parallel: whether to use multiple processes to optimize, typically faster for single optimizations.
185	For experiments, it is often a better use of resources to parallelize at that level, meaning
186	each must run in its own process, since spawned processes are not allowed to further spawn.
187	:param float huberM: For ground-truth-less situation, if :math:`M < \\infty`, use outlier-robust, Huber-based accuracy
188	metric in objective. :math:`M` is in units akin to standard deviation (see :code:`evaluate.robust_rme`),
189	so transition from quadratic to linear regime for errors lying :math:`>\\!M\\sigma` away from mean error.
190
191	:return: - opt_params (dict) -- best parameter settings for the differentation method
192	- opt_value (float) -- lowest value found for objective function
193	"""
194	if metric not in ['rmse','error_correlation']:	1✔
UNCOV 195	raise ValueError('`metric` should either be `rmse` or `error_correlation`.')	×
196
197	default_search_space, bounds = method_params_and_bounds[func]	1✔
198	search_space = {default_search_space, search_space_updates} # applies updates without mutating default	1✔
199
200	# No need to optimize over singletons, just pass them through
201	singleton_params = {k:v for k,v in search_space.items() if not isinstance(v, (list, set))}	1✔
202
203	# To handle categoricals, find their combination, and then pass each set individually
204	categorical_params = {k for k,v in search_space.items() if isinstance(v, set)}	1✔
205	categorical_combos = [dict(zip(categorical_params, combo)) for combo in	1✔
206	product(*[search_space[k] for k in categorical_params])] # ends up [{}] if there are no categorical params
207
208	# The Nelder-Mead's search space is the dimensions where multiple numerical options are given in a list
209	search_space_types = {k:type(v[0]) for k,v in search_space.items() if isinstance(v, list)} # map param name -> type, for converting to and from point	1✔
210	if any(v not in [float, int] for v in search_space_types.values()):	1✔
UNCOV 211	raise ValueError("To optimize over categorical strings or bools, put them in a tuple, not a list.")	×
212	# Cast ints to floats, and we're good to go
213	starting_points = list(product(*[np.array(search_space[k]).astype(float) for k in search_space_types]))	1✔
214	# The numerical space should have bounds
215	bounds = [bounds[k] if k in bounds else # pass these to minimize(). It should respect them.	1✔
216	None for k,v in search_space_types.items()] # None means no bound on a dimension
217
218	results = []	1✔
219	filterwarnings("ignore", '', UserWarning) # An extra filtering call, because some worker work can actually be done in the main process	1✔
220	if parallel:	1✔
221	with Manager() as manager:	1✔
222	cache = manager.dict() # cache answers to avoid expensive repeat queries	1✔
223	with Pool(initializer=filterwarnings, initargs=["ignore", '', UserWarning]) as pool: # The heavy lifting	1✔
224	for categorical_combo in categorical_combos:	1✔
225	# wrap the objective and scipy.optimize.minimize to pass kwargs and a host of other things that remain the same
226	_obj_fun = partial(_objective_function, func=func, x=x, dt=dt, singleton_params=singleton_params,	1✔
227	categorical_params=categorical_combo, search_space_types=search_space_types, dxdt_truth=dxdt_truth,
228	metric=metric, tvgamma=tvgamma, padding=padding, cache=cache, huberM=huberM)
229	_minimize = partial(scipy.optimize.minimize, _obj_fun, method=opt_method, bounds=bounds, options={'maxiter':maxiter})	1✔
230	results += pool.map(_minimize, starting_points) # returns a bunch of OptimizeResult objects	1✔
231	else: # For experiments, where I want to parallelize optimization calls and am not allowed to have each spawn further processes
232	cache = dict()	1✔
233	for categorical_combo in categorical_combos:	1✔
234	_obj_fun = partial(_objective_function, func=func, x=x, dt=dt, singleton_params=singleton_params,	1✔
235	categorical_params=categorical_combo, search_space_types=search_space_types, dxdt_truth=dxdt_truth,
236	metric=metric, tvgamma=tvgamma, padding=padding, cache=cache, huberM=huberM)
237	_minimize = partial(scipy.optimize.minimize, _obj_fun, method=opt_method, bounds=bounds, options={'maxiter':maxiter})	1✔
238	results += [_minimize(p) for p in starting_points]	1✔
239
240	opt_idx = np.nanargmin([r.fun for r in results])	1✔
241	opt_point = results[opt_idx].x	1✔
242	# results are going to be floats, but that may not be allowed, so convert back to a dict
243	opt_params = {k:(v if search_space_types[k] == float else	1✔
244	int(np.round(v)) if search_space_types[k] == int else
245	v > 0.5) for k,v in zip(search_space_types, opt_point)}
246	opt_params.update(singleton_params)	1✔
247	opt_params.update(categorical_combos[opt_idx//len(starting_points)]) # there are \|starting_points\| results for each combo	1✔
248
249	return opt_params, results[opt_idx].fun	1✔
250
251
252	def suggest_method(x, dt, dxdt_truth=None, cutoff_frequency=None):
253	"""This is meant as an easy-to-use, automatic way for users with some time on their hands to determine
254	a good method and settings for their data. It calls the optimizer over (almost) all methods in the repo
255	using default search spaces defined at the top of the :code:`pynumdiff/optimize/_optimize.py` file.
256	This routine will take a few minutes to run, especially due to `robustdiff`.
257
258	Excluded:
259	- ``first_order``, because iterating causes drift
260	- ``lineardiff``, ``iterative_velocity``, and ``jerk_sliding``, because they either take too long,
261	can be fragile, or tend not to do best
262	- all ``cvxpy``-based methods if it is not installed
263	- ``velocity`` because it tends to not be best but dominates the optimization process by directly
264	optimizing the second term of the metric :math:`L = \\text{RMSE} \\Big( \\text{trapz}(\\mathbf{
265	\\hat{\\dot{x}}}(\\Phi)) + \\mu, \\mathbf{y} \\Big) + \\gamma \\Big({TV}\\big(\\mathbf{\\hat{
266	\\dot{x}}}(\\Phi)\\big)\\Big)`
267
268	:param np.array[float] x: data to differentiate
269	:param float dt: step size, because most methods are not designed to work with variable step sizes
270	:param np.array[float] dxdt_truth: if known, you can pass true derivative values; otherwise you must use
271	:code: `cutoff_frequency`
272	:param float cutoff_frequency: in Hz, the highest dominant frequency of interest in the signal,
273	used to find parameter :math:`\\gamma` for regularization of the optimization process
274	in the absence of ground truth. See https://ieeexplore.ieee.org/document/9241009.
275	Estimate by (a) counting real number of peaks per second in the data, (b) looking at
276	power spectrum and choosing a cutoff, or (c) making an educated guess.
277
278	:return: tuple[callable, dict] of\n
279	- method -- a reference to the function handle of the differentiation method that worked best
280	- opt_params -- optimal parameter settings for the differentation method
281	"""
282	tvgamma = None
283	if dxdt_truth is None: # parameter checking
284	if cutoff_frequency is None:
285	raise ValueError('Either dxdt_truth or cutoff_frequency must be provided.')
286	tvgamma = np.exp(-1.6np.log(cutoff_frequency) -0.71np.log(dt) - 5.1) # See https://ieeexplore.ieee.org/document/9241009
287
288	methods = [meandiff, mediandiff, gaussiandiff, friedrichsdiff, butterdiff,
289	polydiff, savgoldiff, splinediff, spectraldiff, rbfdiff, finitediff, rtsdiff]
290	try: # optionally skip some methods
291	import cvxpy
292	methods += [tvrdiff, smooth_acceleration, robustdiff]
293	except ImportError:
294	warn("CVXPY not installed, skipping tvrdiff, smooth_acceleration, and robustdiff")
295
296	best_value = float('inf') # core loop
297	for func in tqdm(methods):
298	p, v = optimize(func, x, dt, dxdt_truth=dxdt_truth, tvgamma=tvgamma, search_space_updates=(
299	{'order':{2,3}} if func in [tvrdiff, robustdiff] else {})) # convex-based with order 1 hack the cost function
300	if v < best_value:
301	method = func
302	best_value = v
303	opt_params = p
304
305	return method, opt_params

florisvb / PyNumDiff / 19585701810

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous