6908420384

Committed 17 Nov 2023 07:48PM UTC coverage: 26.964% (-68.5%) from 95.437%

Build # 6908420384

Build Type

push

github

Committed by

mcuntz

Commit Message

Split ncread and ncinfo in separate files

Run Details

1 of 4 new or added lines in 1 file covered. (25.0%)

936 existing lines in 13 files now uncovered.

357 of 1324 relevant lines covered (26.96%)

1.02 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

14.1

/src/pyjams/kernel_regression.py

#!/usr/bin/env python
"""
Multi-dimensional non-parametric kernel regression

This module was written by Matthias Cuntz while at Department of
Computational Hydrosystems, Helmholtz Centre for Environmental
Research - UFZ, Leipzig, Germany, and continued while at Institut
National de Recherche pour l'Agriculture, l'Alimentation et
l'Environnement (INRAE), Nancy, France.

:copyright: Copyright 2012-2022 Matthias Cuntz, see AUTHORS.rst for details.
:license: MIT License, see LICENSE for details.

.. moduleauthor:: Matthias Cuntz

The following functions are provided

.. autosummary::
   kernel_regression_h
   kernel_regression

History
    * Written, Jun 2012 by  Matthias Cuntz - mc (at) macu (dot) de,
      inspired by Matlab routines of Yingying Dong, Boston College and
      Yi Cao, Cranfield University
    * Assert correct input, Apr 2014, Matthias Cuntz
    * Corrected bug in _boot_h: x.size->x.shape[0], Jan 2018, Matthias Cuntz
    * Code refactoring, Sep 2021, Matthias Cuntz
    * Use format strings, Apr 2022, Matthias Cuntz
    * Use minimize with method TNC instead of fmin_tnc,
      Apr 2022, Matthias Cuntz
    * Use helper function array2input to assure correct output type,
      Apr 2022, Matthias Cuntz
    * Return scalar h if 1-dimensional, Apr 2022, Matthias Cuntz
    * Output type is same as y instead of x or xout, Apr 2022, Matthias Cuntz

"""
import numpy as np
import scipy.optimize as opt
from .division import division
from .helper import array2input
# from division import division
# from helper import array2input


__all__ = ['kernel_regression_h', 'kernel_regression']


def _nadaraya_watson(z, y):
    """
    Helper function that calculates the Nadaraya-Watson estimator
    for a given kernel.
    Until now there is only the gaussian kernel.

    """
    kerf = 1. / np.sqrt(2. * np.pi) * np.exp(-0.5 * z * z)
    w = np.prod(kerf, 1)
    out = division(np.dot(w, y), np.sum(w), np.nan)

    return out


def _cross_valid_h(h, x, y):
    """
    Helper function that calculates cross-validation function for the
    Nadaraya-Watson estimator, which is basically the mean square error
    where model estimate is replaced by the jackknife estimate
    (Hardle and Muller 2000).

    """
    n = x.shape[0]
    # allocate output
    out = np.empty(n)
    # Loop through each regression point
    for i in range(n):
        # all-1 points
        xx = np.delete(x, i, axis=0)
        yy = np.delete(y, i, axis=0)
        z = (xx - x[i, :]) / h
        out[i] = _nadaraya_watson(z, yy)
    cv = np.sum((y - out)**2) / float(n)

    return cv


def _boot_h(h, x, y):
    """
    Helper function that calculates bootstrap function for the
    Nadaraya-Watson estimator, which is basically the mean square error
    where model estimate is replaced by the jackknife estimate
    (Hardle and Muller 2000).

    This does basically _cross_valid_h for 100 random points.

    """
    n = 100
    ind = np.random.randint(x.shape[0], size=n)
    # allocate output
    out = np.empty(n)
    # Loop through each bootstrap point
    for i in range(n):
        # all-1 points
        xx = np.delete(x, i, axis=0)
        yy = np.delete(y, i, axis=0)
        z = (xx - x[i, :]) / h
        out[i] = _nadaraya_watson(z, yy)
    cv = np.sum((y[ind] - out)**2) / float(n)

    return cv


def kernel_regression_h(x, y, silverman=False):
    """
    Optimal bandwidth for multi-dimensional non-parametric kernel regression

    Optimal bandwidth is determined using cross-validation or
    Silverman's rule-of-thumb.

    Parameters
    ----------
    x : array_like (n, k)
        Independent values
    y : array_like (n)
        Dependent values
    silverman : bool, optional
        Use Silverman's rule-of-thumb to calculate bandwidth *h* if True,
        otherwise determine *h* via cross-validation

    Returns
    -------
    float or array
        Optimal bandwidth *h*. If multidimensional regression then *h* is a
        1d-array, assuming a diagonal bandwidth matrix.

    References
    ----------
    Hardle W and Muller M (2000) Multivariate and semiparametric kernel
       regression. In MG Schimek (Ed.), Smoothing and regression: Approaches,
       computation, and application (pp. 357-392). Hoboken, NJ, USA: John Wiley
       & Sons, Inc. doi: 10.1002/9781118150658.ch12


    Examples
    --------
    >>> n = 10
    >>> x = np.zeros((n, 2))
    >>> x[:, 0] = np.arange(n, dtype=float) / float(n-1)
    >>> x[:, 1] = 1. / (np.arange(n, dtype=float) / float(n-1) + 0.1)
    >>> y = 1. + x[:, 0]**2 - np.sin(x[:, 1])**2

    >>> h = kernel_regression_h(x, y)
    >>> print(np.allclose(h, [0.172680, 9.516907], atol=0.0001))
    True

    >>> h = kernel_regression_h(x, y, silverman=True)
    >>> print(np.allclose(h, [0.229190, 1.903381], atol=0.0001))
    True

    >>> n = 10
    >>> x = np.arange(n, dtype=float) / float(n-1)
    >>> y = 1. + x**2 - np.sin(x)**2

    >>> h = kernel_regression_h(x, y)
    >>> print(np.around(h, 4))
    0.045

    >>> h = kernel_regression_h(x, y, silverman=True)
    >>> print(np.around(h, 4))
    0.2248

    """
    xx = np.array(x)
    yy = np.array(y)
    # Check input
    assert xx.shape[0] == yy.size, (
        f'size(x, 0) != size(y): {xx.shape[0]} != {yy.size}' )
    if xx.ndim == 1:  # deal with 1d-arrays
        xx = xx[:, np.newaxis]
    n = xx.shape[0]
    d = xx.shape[1]

    # Silverman (1986), Scott (1992), Bowman and Azzalini (1997)
    # Very similar to stats.gaussian_kde
    # h has dimension d
    h = ( (4. / float(d + 2) / float(n))**(1. / float(d + 4)) *
          np.std(xx, axis=0, ddof=1) )

    if not silverman:
        # Find the optimal h
        bounds = [(0.2*i, 5.0*i) for i in h]
        if n <= 100:
            res = opt.minimize(
                _cross_valid_h, h, args=(xx, yy), method='TNC', bounds=bounds,
                options={'ftol': 1e-10, 'xtol': 1e-10, 'maxfun': 1000})
            h = res.x
        else:
            res = opt.minimize(
                _boot_h, h, args=(xx, yy), method='TNC', bounds=bounds,
                options={'ftol': 1e-10, 'xtol': 1e-10, 'maxfun': 1000})
            h = res.x

    if len(h) == 1:
        h = h[0]

    return h


def kernel_regression(x, y, h=None, silverman=False, xout=None):
    """
    Multi-dimensional non-parametric kernel regression

    Optimal bandwidth can be estimated by cross-validation
    or by using Silverman's rule-of-thumb.

    Parameters
    ----------
    x : array_like (n, k)
        Independent values
    y : array_like (n)
        Dependent values
    h : float or array_like(k), optional
        Use *h* as bandwidth for calculating regression values if given,
        otherwise determine optimal *h* using cross-validation or by using
        Silverman's rule-of-thumb if *silverman==True*.
    silverman : bool, optional
        Use Silverman's rule-of-thumb to calculate bandwidth *h* if True,
        otherwise determine *h* via cross-validation.
        Only used if *h* is not given.
    xout : ndarray(n, k), optional
        Return fitted values at *xout* if given,
        otherwise return fitted values at *x*.

    Returns
    -------
    array_like with same type as *x*, or *xout* if given
        Fitted values at *x*, or at *xout* if given

    References
    ----------
    Hardle W and Muller M (2000) Multivariate and semiparametric kernel
       regression. In MG Schimek (Ed.), Smoothing and regression: Approaches,
       computation, and application (pp. 357-392). Hoboken, NJ, USA: John Wiley
       & Sons, Inc. doi: 10.1002/9781118150658.ch12

    Examples
    --------
    >>> n = 10
    >>> x = np.zeros((n, 2))
    >>> x[:, 0] = np.arange(n, dtype=float) / float(n-1)
    >>> x[:, 1] = 1. / (np.arange(n, dtype=float) / float(n-1) + 0.1)
    >>> y = 1. + x[:, 0]**2 - np.sin(x[:, 1])**2

    Separate determination of h and kernel regression

    >>> h = kernel_regression_h(x, y)
    >>> yk = kernel_regression(x, y, h)
    >>> print(np.allclose(yk[0:6],
    ...       [0.52241, 0.52570, 0.54180, 0.51781, 0.47644, 0.49230],
    ...       atol=0.0001))
    True

    Single call to kernel regression

    >>> yk = kernel_regression(x, y)
    >>> print(np.allclose(yk[0:6],
    ...       [0.52241, 0.52570, 0.54180, 0.51781, 0.47644, 0.49230],
    ...       atol=0.0001))
    True

    Single call to kernel regression using Silverman's rule-of-thumb for h

    >>> yk = kernel_regression(x, y, silverman=True)
    >>> print(np.allclose(yk[0:6],
    ...       [0.691153, 0.422809, 0.545844, 0.534315, 0.521494, 0.555426],
    ...       atol=0.0001))
    True

    >>> n = 5
    >>> xx = np.empty((n, 2))
    >>> xx[:, 0] = (np.amin(x[:, 0]) + (np.amax(x[:, 0]) - np.amin(x[:, 0])) *
    ...                                 np.arange(n, dtype=float) / float(n))
    >>> xx[:, 1] = (np.amin(x[:, 1]) + (np.amax(x[:, 1]) - np.amin(x[:, 1])) *
    ...                                 np.arange(n, dtype=float) / float(n))
    >>> yk = kernel_regression(x, y, silverman=True, xout=xx)
    >>> print(np.allclose(yk,
    ...       [0.605485, 0.555235, 0.509529, 0.491191, 0.553325],
    ...       atol=0.0001))
    True

    """
    xx = np.array(x)
    yy = np.array(y)
    # Check input
    assert xx.shape[0] == yy.size, (
        f'size(x,0) != size(y): {xx.shape[0]} != {yy.size}' )
    # deal with 1d-arrays and save 1d input type
    if xx.ndim == 1:
        xx = xx[:, np.newaxis]
    d = xx.shape[1]

    # determine h
    if h is None:
        hh = kernel_regression_h(xx, yy, silverman=silverman)
    else:
        if np.size(np.shape(h)) == 0:
            hh = np.repeat(h, d)  # use h for all dimensions if one h given
        else:
            hh = np.array(h)
        assert np.size(hh) == d, 'size(h) must be 1 or x.shape[1]'

    # Calc regression
    if xout is None:
        xxout = xx
    else:
        xxout = np.array(xout)
    if xxout.ndim == 1:
        xxout = xxout[:, np.newaxis]
    nout  = xxout.shape[0]
    dout  = xxout.shape[1]
    assert d == dout, f'size(x, 1) != size(xout, 1): {d} != {dout}'
    # allocate output
    out = np.empty(nout)
    # Loop through each regression point
    for i in range(nout):
        # scaled deference from regression point
        z = (xx - xxout[i, :]) / hh
        # nadaraya-watson estimator of gaussian multivariate kernel
        out[i] = _nadaraya_watson(z, y)

    out = array2input(out, y, undef=np.nan)

    return out


if __name__ == '__main__':
    import doctest
    doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)

    # nn = 1000
    # x = np.zeros((nn, 2))
    # x[:, 0] = np.arange(nn, dtype=float) / float(nn-1)
    # x[:, 1] = 1. / (x[:, 0] + 0.1)
    # y      = 1. + x[:, 0]**2 - np.sin(x[:, 1])**2
    # h = kernel_regression_h(x, y)
    # print(h)
    # yy = kernel_regression(x, y, h, xout=x)
    # print(yy[0], yy[-1])
    # print(kernel_regression(x, y))
    # h = kernel_regression_h(x, y, silverman=True)
    # print(h)
    # print(kernel_regression(x, y, h))
    # ss = np.shape(x)
    # nn = 5
    # xx = np.empty((nn, ss[1]))
    # xx[:,0] = (np.amin(x[:,0]) + (np.amax(x[:,0])-np.amin(x[:,0])) *
    #                               np.arange(nn,dtype=float)/float(nn))
    # xx[:,1] = (np.amin(x[:,1]) + (np.amax(x[:,1])-np.amin(x[:,1])) *
    #                               np.arange(nn,dtype=float)/float(nn))
    # print(kernel_regression(x, y, h, xout=xx))

1	#!/usr/bin/env python
2	"""	4✔
3	Multi-dimensional non-parametric kernel regression
4
5	This module was written by Matthias Cuntz while at Department of
6	Computational Hydrosystems, Helmholtz Centre for Environmental
7	Research - UFZ, Leipzig, Germany, and continued while at Institut
8	National de Recherche pour l'Agriculture, l'Alimentation et
9	l'Environnement (INRAE), Nancy, France.
10
11	:copyright: Copyright 2012-2022 Matthias Cuntz, see AUTHORS.rst for details.
12	:license: MIT License, see LICENSE for details.
13
14	.. moduleauthor:: Matthias Cuntz
15
16	The following functions are provided
17
18	.. autosummary::
19	kernel_regression_h
20	kernel_regression
21
22	History
23	* Written, Jun 2012 by Matthias Cuntz - mc (at) macu (dot) de,
24	inspired by Matlab routines of Yingying Dong, Boston College and
25	Yi Cao, Cranfield University
26	* Assert correct input, Apr 2014, Matthias Cuntz
27	* Corrected bug in _boot_h: x.size->x.shape[0], Jan 2018, Matthias Cuntz
28	* Code refactoring, Sep 2021, Matthias Cuntz
29	* Use format strings, Apr 2022, Matthias Cuntz
30	* Use minimize with method TNC instead of fmin_tnc,
31	Apr 2022, Matthias Cuntz
32	* Use helper function array2input to assure correct output type,
33	Apr 2022, Matthias Cuntz
34	* Return scalar h if 1-dimensional, Apr 2022, Matthias Cuntz
35	* Output type is same as y instead of x or xout, Apr 2022, Matthias Cuntz
36
37	"""
38	import numpy as np	4✔
39	import scipy.optimize as opt	4✔
40	from .division import division	4✔
41	from .helper import array2input	4✔
42	# from division import division
43	# from helper import array2input
44
45
46	__all__ = ['kernel_regression_h', 'kernel_regression']	4✔
47
48
49	def _nadaraya_watson(z, y):	4✔
50	"""
51	Helper function that calculates the Nadaraya-Watson estimator
52	for a given kernel.
53	Until now there is only the gaussian kernel.
54
55	"""
UNCOV 56	kerf = 1. / np.sqrt(2. * np.pi) * np.exp(-0.5 * z * z)	×
UNCOV 57	w = np.prod(kerf, 1)	×
UNCOV 58	out = division(np.dot(w, y), np.sum(w), np.nan)	×
59
UNCOV 60	return out	×
61
62
63	def _cross_valid_h(h, x, y):	4✔
64	"""
65	Helper function that calculates cross-validation function for the
66	Nadaraya-Watson estimator, which is basically the mean square error
67	where model estimate is replaced by the jackknife estimate
68	(Hardle and Muller 2000).
69
70	"""
UNCOV 71	n = x.shape[0]	×
72	# allocate output
UNCOV 73	out = np.empty(n)	×
74	# Loop through each regression point
UNCOV 75	for i in range(n):	×
76	# all-1 points
UNCOV 77	xx = np.delete(x, i, axis=0)	×
UNCOV 78	yy = np.delete(y, i, axis=0)	×
UNCOV 79	z = (xx - x[i, :]) / h	×
UNCOV 80	out[i] = _nadaraya_watson(z, yy)	×
UNCOV 81	cv = np.sum((y - out)**2) / float(n)	×
82
UNCOV 83	return cv	×
84
85
86	def _boot_h(h, x, y):	4✔
87	"""
88	Helper function that calculates bootstrap function for the
89	Nadaraya-Watson estimator, which is basically the mean square error
90	where model estimate is replaced by the jackknife estimate
91	(Hardle and Muller 2000).
92
93	This does basically _cross_valid_h for 100 random points.
94
95	"""
UNCOV 96	n = 100	×
UNCOV 97	ind = np.random.randint(x.shape[0], size=n)	×
98	# allocate output
UNCOV 99	out = np.empty(n)	×
100	# Loop through each bootstrap point
UNCOV 101	for i in range(n):	×
102	# all-1 points
UNCOV 103	xx = np.delete(x, i, axis=0)	×
UNCOV 104	yy = np.delete(y, i, axis=0)	×
UNCOV 105	z = (xx - x[i, :]) / h	×
UNCOV 106	out[i] = _nadaraya_watson(z, yy)	×
UNCOV 107	cv = np.sum((y[ind] - out)**2) / float(n)	×
108
UNCOV 109	return cv	×
110
111
112	def kernel_regression_h(x, y, silverman=False):	4✔
113	"""
114	Optimal bandwidth for multi-dimensional non-parametric kernel regression
115
116	Optimal bandwidth is determined using cross-validation or
117	Silverman's rule-of-thumb.
118
119	Parameters
120	----------
121	x : array_like (n, k)
122	Independent values
123	y : array_like (n)
124	Dependent values
125	silverman : bool, optional
126	Use Silverman's rule-of-thumb to calculate bandwidth h if True,
127	otherwise determine h via cross-validation
128
129	Returns
130	-------
131	float or array
132	Optimal bandwidth h. If multidimensional regression then h is a
133	1d-array, assuming a diagonal bandwidth matrix.
134
135	References
136	----------
137	Hardle W and Muller M (2000) Multivariate and semiparametric kernel
138	regression. In MG Schimek (Ed.), Smoothing and regression: Approaches,
139	computation, and application (pp. 357-392). Hoboken, NJ, USA: John Wiley
140	& Sons, Inc. doi: 10.1002/9781118150658.ch12
141
142
143	Examples
144	--------
145	>>> n = 10
146	>>> x = np.zeros((n, 2))
147	>>> x[:, 0] = np.arange(n, dtype=float) / float(n-1)
148	>>> x[:, 1] = 1. / (np.arange(n, dtype=float) / float(n-1) + 0.1)
149	>>> y = 1. + x[:, 0]2 - np.sin(x[:, 1])2
150
151	>>> h = kernel_regression_h(x, y)
152	>>> print(np.allclose(h, [0.172680, 9.516907], atol=0.0001))
153	True
154
155	>>> h = kernel_regression_h(x, y, silverman=True)
156	>>> print(np.allclose(h, [0.229190, 1.903381], atol=0.0001))
157	True
158
159	>>> n = 10
160	>>> x = np.arange(n, dtype=float) / float(n-1)
161	>>> y = 1. + x2 - np.sin(x)2
162
163	>>> h = kernel_regression_h(x, y)
164	>>> print(np.around(h, 4))
165	0.045
166
167	>>> h = kernel_regression_h(x, y, silverman=True)
168	>>> print(np.around(h, 4))
169	0.2248
170
171	"""
UNCOV 172	xx = np.array(x)	×
UNCOV 173	yy = np.array(y)	×
174	# Check input
UNCOV 175	assert xx.shape[0] == yy.size, (	×
176	f'size(x, 0) != size(y): {xx.shape[0]} != {yy.size}' )
UNCOV 177	if xx.ndim == 1: # deal with 1d-arrays	×
UNCOV 178	xx = xx[:, np.newaxis]	×
UNCOV 179	n = xx.shape[0]	×
UNCOV 180	d = xx.shape[1]	×
181
182	# Silverman (1986), Scott (1992), Bowman and Azzalini (1997)
183	# Very similar to stats.gaussian_kde
184	# h has dimension d
UNCOV 185	h = ( (4. / float(d + 2) / float(n))*(1. / float(d + 4))	×
186	np.std(xx, axis=0, ddof=1) )
187
UNCOV 188	if not silverman:	×
189	# Find the optimal h
UNCOV 190	bounds = [(0.2i, 5.0i) for i in h]	×
UNCOV 191	if n <= 100:	×
UNCOV 192	res = opt.minimize(	×
193	_cross_valid_h, h, args=(xx, yy), method='TNC', bounds=bounds,
194	options={'ftol': 1e-10, 'xtol': 1e-10, 'maxfun': 1000})
UNCOV 195	h = res.x	×
196	else:
UNCOV 197	res = opt.minimize(	×
198	_boot_h, h, args=(xx, yy), method='TNC', bounds=bounds,
199	options={'ftol': 1e-10, 'xtol': 1e-10, 'maxfun': 1000})
UNCOV 200	h = res.x	×
201
UNCOV 202	if len(h) == 1:	×
UNCOV 203	h = h[0]	×
204
UNCOV 205	return h	×
206
207
208	def kernel_regression(x, y, h=None, silverman=False, xout=None):	4✔
209	"""
210	Multi-dimensional non-parametric kernel regression
211
212	Optimal bandwidth can be estimated by cross-validation
213	or by using Silverman's rule-of-thumb.
214
215	Parameters
216	----------
217	x : array_like (n, k)
218	Independent values
219	y : array_like (n)
220	Dependent values
221	h : float or array_like(k), optional
222	Use h as bandwidth for calculating regression values if given,
223	otherwise determine optimal h using cross-validation or by using
224	Silverman's rule-of-thumb if silverman==True.
225	silverman : bool, optional
226	Use Silverman's rule-of-thumb to calculate bandwidth h if True,
227	otherwise determine h via cross-validation.
228	Only used if h is not given.
229	xout : ndarray(n, k), optional
230	Return fitted values at xout if given,
231	otherwise return fitted values at x.
232
233	Returns
234	-------
235	array_like with same type as x, or xout if given
236	Fitted values at x, or at xout if given
237
238	References
239	----------
240	Hardle W and Muller M (2000) Multivariate and semiparametric kernel
241	regression. In MG Schimek (Ed.), Smoothing and regression: Approaches,
242	computation, and application (pp. 357-392). Hoboken, NJ, USA: John Wiley
243	& Sons, Inc. doi: 10.1002/9781118150658.ch12
244
245	Examples
246	--------
247	>>> n = 10
248	>>> x = np.zeros((n, 2))
249	>>> x[:, 0] = np.arange(n, dtype=float) / float(n-1)
250	>>> x[:, 1] = 1. / (np.arange(n, dtype=float) / float(n-1) + 0.1)
251	>>> y = 1. + x[:, 0]2 - np.sin(x[:, 1])2
252
253	Separate determination of h and kernel regression
254
255	>>> h = kernel_regression_h(x, y)
256	>>> yk = kernel_regression(x, y, h)
257	>>> print(np.allclose(yk[0:6],
258	... [0.52241, 0.52570, 0.54180, 0.51781, 0.47644, 0.49230],
259	... atol=0.0001))
260	True
261
262	Single call to kernel regression
263
264	>>> yk = kernel_regression(x, y)
265	>>> print(np.allclose(yk[0:6],
266	... [0.52241, 0.52570, 0.54180, 0.51781, 0.47644, 0.49230],
267	... atol=0.0001))
268	True
269
270	Single call to kernel regression using Silverman's rule-of-thumb for h
271
272	>>> yk = kernel_regression(x, y, silverman=True)
273	>>> print(np.allclose(yk[0:6],
274	... [0.691153, 0.422809, 0.545844, 0.534315, 0.521494, 0.555426],
275	... atol=0.0001))
276	True
277
278	>>> n = 5
279	>>> xx = np.empty((n, 2))
280	>>> xx[:, 0] = (np.amin(x[:, 0]) + (np.amax(x[:, 0]) - np.amin(x[:, 0])) *
281	... np.arange(n, dtype=float) / float(n))
282	>>> xx[:, 1] = (np.amin(x[:, 1]) + (np.amax(x[:, 1]) - np.amin(x[:, 1])) *
283	... np.arange(n, dtype=float) / float(n))
284	>>> yk = kernel_regression(x, y, silverman=True, xout=xx)
285	>>> print(np.allclose(yk,
286	... [0.605485, 0.555235, 0.509529, 0.491191, 0.553325],
287	... atol=0.0001))
288	True
289
290	"""
UNCOV 291	xx = np.array(x)	×
UNCOV 292	yy = np.array(y)	×
293	# Check input
UNCOV 294	assert xx.shape[0] == yy.size, (	×
295	f'size(x,0) != size(y): {xx.shape[0]} != {yy.size}' )
296	# deal with 1d-arrays and save 1d input type
UNCOV 297	if xx.ndim == 1:	×
UNCOV 298	xx = xx[:, np.newaxis]	×
UNCOV 299	d = xx.shape[1]	×
300
301	# determine h
UNCOV 302	if h is None:	×
UNCOV 303	hh = kernel_regression_h(xx, yy, silverman=silverman)	×
304	else:
UNCOV 305	if np.size(np.shape(h)) == 0:	×
UNCOV 306	hh = np.repeat(h, d) # use h for all dimensions if one h given	×
307	else:
UNCOV 308	hh = np.array(h)	×
UNCOV 309	assert np.size(hh) == d, 'size(h) must be 1 or x.shape[1]'	×
310
311	# Calc regression
UNCOV 312	if xout is None:	×
UNCOV 313	xxout = xx	×
314	else:
UNCOV 315	xxout = np.array(xout)	×
UNCOV 316	if xxout.ndim == 1:	×
UNCOV 317	xxout = xxout[:, np.newaxis]	×
UNCOV 318	nout = xxout.shape[0]	×
UNCOV 319	dout = xxout.shape[1]	×
UNCOV 320	assert d == dout, f'size(x, 1) != size(xout, 1): {d} != {dout}'	×
321	# allocate output
UNCOV 322	out = np.empty(nout)	×
323	# Loop through each regression point
UNCOV 324	for i in range(nout):	×
325	# scaled deference from regression point
UNCOV 326	z = (xx - xxout[i, :]) / hh	×
327	# nadaraya-watson estimator of gaussian multivariate kernel
UNCOV 328	out[i] = _nadaraya_watson(z, y)	×
329
UNCOV 330	out = array2input(out, y, undef=np.nan)	×
331
UNCOV 332	return out	×
333
334
335	if __name__ == '__main__':
336	import doctest
337	doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
338
339	# nn = 1000
340	# x = np.zeros((nn, 2))
341	# x[:, 0] = np.arange(nn, dtype=float) / float(nn-1)
342	# x[:, 1] = 1. / (x[:, 0] + 0.1)
343	# y = 1. + x[:, 0]2 - np.sin(x[:, 1])2
344	# h = kernel_regression_h(x, y)
345	# print(h)
346	# yy = kernel_regression(x, y, h, xout=x)
347	# print(yy[0], yy[-1])
348	# print(kernel_regression(x, y))
349	# h = kernel_regression_h(x, y, silverman=True)
350	# print(h)
351	# print(kernel_regression(x, y, h))
352	# ss = np.shape(x)
353	# nn = 5
354	# xx = np.empty((nn, ss[1]))
355	# xx[:,0] = (np.amin(x[:,0]) + (np.amax(x[:,0])-np.amin(x[:,0])) *
356	# np.arange(nn,dtype=float)/float(nn))
357	# xx[:,1] = (np.amin(x[:,1]) + (np.amax(x[:,1])-np.amin(x[:,1])) *
358	# np.arange(nn,dtype=float)/float(nn))
359	# print(kernel_regression(x, y, h, xout=xx))

mcuntz / pyjams / 6908420384

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous