• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

morganjwilliams / pyrolite / 5564819928

pending completion
5564819928

push

github

morganjwilliams
Merge branch 'release/0.3.3' into main

249 of 270 new or added lines in 48 files covered. (92.22%)

217 existing lines in 33 files now uncovered.

5971 of 6605 relevant lines covered (90.4%)

10.84 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.93
/pyrolite/util/plot/density.py
1
"""
2
Functions for dealing with kernel density estimation.
3

4
Attributes
5
----------
6
USE_PCOLOR : :class:`bool`
7
    Option to use the :func:`matplotlib.pyplot.pcolor` function in place
8
    of :func:`matplotlib.pyplot.pcolormesh`.
9
"""
10
import matplotlib.pyplot as plt
12✔
11
import numpy as np
12✔
12
import scipy.interpolate
12✔
13
from numpy.linalg import LinAlgError
12✔
14

15
from ..distributions import sample_kde
12✔
16
from ..log import Handle
12✔
17
from ..math import interpolate_line, linspc_, logspc_
12✔
18
from ..meta import subkwargs
12✔
19
from .grid import bin_centres_to_edges
12✔
20

21
logger = Handle(__name__)
12✔
22

23
try:
12✔
24
    import statsmodels.api as sm
12✔
25

26
    HAVE_SM = True
12✔
27
except ImportError:
×
UNCOV
28
    HAVE_SM = False
×
29

30
USE_PCOLOR = False
12✔
31

32

33
def get_axis_density_methods(ax):
12✔
34
    """
35
    Get the relevant density and contouring methods for a given axis.
36

37
    Parameters
38
    -----------
39
    ax : :class:`matplotlib.axes.Axes` | :class:`mpltern.ternary.TernaryAxes`
40
        Axis to check.
41

42
    Returns
43
    --------
44
    pcolor, contour, contourf
45
        Relevant functions for this axis.
46
    """
47
    if ax.name == "ternary":
12✔
48
        pcolor = ax.tripcolor
12✔
49
        contour = ax.tricontour
12✔
50
        contourf = ax.tricontourf
12✔
51
    else:
52
        if USE_PCOLOR:
12✔
UNCOV
53
            pcolor = ax.pcolor
×
54
        else:
55
            pcolor = ax.pcolormesh
12✔
56
        contour = ax.contour
12✔
57
        contourf = ax.contourf
12✔
58
    return pcolor, contour, contourf
12✔
59

60

61
def percentile_contour_values_from_meshz(
12✔
62
    z, percentiles=[0.95, 0.66, 0.33], resolution=1000
63
):
64
    """
65
    Integrate a probability density distribution Z(X,Y) to obtain contours in Z which
66
    correspond to specified percentile contours. Contour values will be returned
67
    with the same order as the inputs.
68

69
    Parameters
70
    ----------
71
    z : :class:`numpy.ndarray`
72
        Probability density function over x, y.
73
    percentiles : :class:`numpy.ndarray`
74
        Percentile values for which to create contours.
75
    resolution : :class:`int`
76
        Number of bins for thresholds between 0. and max(Z)
77

78
    Returns
79
    -------
80
    labels : :class:`list`
81
        Labels for contours (percentiles, if above minimum z value).
82
    contours : :class:`list`
83
        Contour height values.
84

85
    Todo
86
    -----
87
    This may error for a list of percentiles where one or more requested
88
    values are below the miniumum threshold. The exception handling should
89
    be updated to cater for arrays - where some of the values may be above
90
    the minimum.
91
    """
92
    # Integral approach from https://stackoverflow.com/a/37932566
93
    t = np.linspace(0.0, z.max(), resolution)
12✔
94
    integral = ((z >= t[:, None, None]) * z).sum(axis=(1, 2))
12✔
95
    f = scipy.interpolate.interp1d(integral, t)
12✔
96
    try:
12✔
97
        t_contours = f(np.array(percentiles) * z.sum())
12✔
98
        return percentiles, t_contours
12✔
99
    except ValueError:
12✔
100
        # occurrs on the low-end of percentiles (high parts of distribution)
101
        # maximum positions of distributions are limited by the resolution
102
        # at some point there's a step down to zero
103
        logger.debug(
12✔
104
            "Percentile contour below minimum for given resolution"
105
            "Returning Minimium."
106
        )
107
        non_one = integral[~np.isclose(integral, np.ones_like(integral))]
12✔
108
        return ["min"], f(np.array([np.nanmax(non_one)]))
12✔
109

110

111
def plot_Z_percentiles(
12✔
112
    *coords,
113
    zi=None,
114
    percentiles=[0.95, 0.66, 0.33],
115
    ax=None,
116
    extent=None,
117
    fontsize=8,
118
    cmap=None,
119
    colors=None,
120
    linewidths=None,
121
    linestyles=None,
122
    contour_labels=None,
123
    label_contours=True,
124
    **kwargs
125
):
126
    """
127
    Plot percentile contours onto a 2D  (scaled or unscaled) probability density
128
    distribution Z over X,Y.
129

130
    Parameters
131
    ------------
132
    coords : :class:`numpy.ndarray`
133
        Arrays of (x, y) or (a, b, c) coordinates.
134
    z : :class:`numpy.ndarray`
135
        Probability density function over x, y.
136
    percentiles : :class:`list`
137
        Percentile values for which to create contours.
138
    ax : :class:`matplotlib.axes.Axes`, :code:`None`
139
        Axes on which to plot. If none given, will create a new Axes instance.
140
    extent : :class:`list`, :code:`None`
141
        List or np.ndarray in the form [-x, +x, -y, +y] over which the image extends.
142
    fontsize : :class:`float`
143
        Fontsize for the contour labels.
144
    cmap : :class:`matplotlib.colors.ListedColormap`
145
        Color map for the contours and contour labels.
146
    colors : :class:`str` | :class:`list`
147
        Colors for the contours, can optionally be specified *in place of* `cmap.`
148
    linewidths : :class:`str` | :class:`list`
149
        Widths of contour lines.
150
    linestyles : :class:`str` | :class:`list`
151
        Styles for contour lines.
152
    contour_labels : :class:`dict` | :class:`list`
153
        Labels to assign to contours, organised by level.
154
    label_contours :class:`bool`
155
        Whether to add text labels to individual contours.
156

157
    Returns
158
    -------
159
    :class:`matplotlib.contour.QuadContourSet`
160
        Plotted and formatted contour set.
161

162
    Notes
163
    -----
164
    When the contours are percentile based, high percentile contours tend to get
165
    washed our with colormapping - consider adding different controls on coloring,
166
    especially where there are only one or two contours specified. One way to do
167
    this would be via the string based keyword argument `colors` for plt.contour, with
168
    an adaption for non-string colours which post-hoc modifies the contour lines
169
    based on the specified colours?
170
    """
171
    if ax is None:
12✔
172
        fig, ax = plt.subplots(1, figsize=(6, 6))
12✔
173

174
    if extent is None:
12✔
175
        # if len(coords) == 2:  # currently won't work for ternary
176
        extent = np.array([[np.min(c), np.max(c)] for c in coords[:2]]).flatten()
12✔
177

178
    clabels, contour_values = percentile_contour_values_from_meshz(
12✔
179
        zi, percentiles=percentiles
180
    )
181

182
    pcolor, contour, contourf = get_axis_density_methods(ax)
12✔
183
    if colors is not None:  # colors are explicitly specified
12✔
184
        cmap = None
12✔
185

186
    # contours will need to increase for matplotlib, so we check the ordering here.
187
    ordering = np.argsort(contour_values)
12✔
188
    # sort out multi-object properties - reorder to fit the increasing order requirement
189
    cntr_config = {}
12✔
190
    for p, v in [
12✔
191
        ("colors", colors),
192
        ("linestyles", linestyles),
193
        ("linewidths", linewidths),
194
    ]:
195
        if v is not None:
12✔
196
            if isinstance(v, (list, tuple)):
12✔
197
                # reorder the list
198
                cntr_config[p] = [v[ix] for ix in ordering]
12✔
199
            else:
UNCOV
200
                cntr_config[p] = v
×
201

202
    cs = contour(
12✔
203
        *coords,
204
        zi,
205
        levels=contour_values[ordering],  # must increase
206
        cmap=cmap,
207
        **{**cntr_config, **kwargs}
208
    )
209
    if label_contours:
12✔
210
        fs = kwargs.pop("fontsize", None) or 8
12✔
211
        lbls = ax.clabel(cs, fontsize=fs, inline_spacing=0)
12✔
212
        z_contours = sorted(list(set([float(l.get_text()) for l in lbls])))
12✔
213
        trans = {
12✔
214
            float(t): str(p)
215
            for t, p in zip(z_contours, sorted(percentiles, reverse=True))
216
        }
217
        if contour_labels is None:
12✔
218
            _labels = [trans[float(l.get_text())] for l in lbls]
12✔
219
        else:
220
            if isinstance(contour_labels, dict):
12✔
221
                # get the labels from the dictionary provided
222
                contour_labels = {str(k): str(v) for k, v in contour_labels.items()}
×
UNCOV
223
                _labels = [contour_labels[trans[float(l.get_text())]] for l in lbls]
×
224
            else:  # a list is specified in the same order as the contours are drawn
225
                _labels = contour_labels
12✔
226

227
        for l, t in zip(lbls, _labels):
12✔
228
            l.set_text(t)
12✔
229
    return cs
12✔
230

231

232
def conditional_prob_density(
12✔
233
    y,
234
    x=None,
235
    logy=False,
236
    resolution=5,
237
    bins=50,
238
    yextent=None,
239
    rescale=True,
240
    mode="binkde",
241
    ret_centres=False,
242
    **kwargs
243
):
244
    """
245
    Estimate the conditional probability density of one dependent variable.
246

247
    Parameters
248
    -----------
249
    y : :class:`numpy.ndarray`
250
        Dependent variable for which to calculate conditional probability P(y | X=x)
251
    x : :class:`numpy.ndarray`, :code:`None`
252
        Optionally-specified independent index.
253
    logy : :class:`bool`
254
        Whether to use a logarithmic bin spacing on the y axis.
255
    resolution : :class:`int`
256
        Points added per segment via interpolation along the x axis.
257
    bins : :class:`int`
258
        Bins for histograms and grids along the independent axis.
259
    yextent : :class:`tuple`
260
        Extent in the y direction.
261
    rescale : :class:`bool`
262
        Whether to rescale bins to give the same max Z across x.
263
    mode : :class:`str`
264
        Mode of computation.
265

266
            If mode is :code:`"ckde"`, use
267
            :func:`statsmodels.nonparametric.KDEMultivariateConditional` to compute a
268
            conditional kernel density estimate. If mode is :code:`"kde"`, use a normal
269
            gaussian kernel density estimate. If mode is :code:`"binkde"`, use a gaussian
270
            kernel density estimate over y for each bin. If mode is :code:`"hist"`,
271
            compute a histogram.
272
    ret_centres : :class:`bool`
273
        Whether to return bin centres in addtion to histogram edges,
274
        e.g. for later contouring.
275

276
    Returns
277
    -------
278
    :class:`tuple` of :class:`numpy.ndarray`
279
        :code:`x` bin edges :code:`xe`, :code:`y` bin edges :code:`ye`, histogram/density
280
        estimates :code:`Z`. If :code:`ret_centres` is :code:`True`, the last two return
281
        values will contain the bin centres :code:`xi`, :code:`yi`.
282
    """
283
    # check for shapes
284
    assert not ((x is None) and (y is None))
12✔
285
    if y is None:  # Swap the variables. Create an index for x
12✔
286
        y = x
×
UNCOV
287
        x = None
×
288

289
    nvar = y.shape[1]
12✔
290
    if x is None:  # Create a simple arange-based index
12✔
UNCOV
291
        x = np.arange(nvar)
×
292

293
    if resolution:  # this is where REE previously broke down
12✔
294
        x, y = interpolate_line(x, y, n=resolution, logy=logy)
12✔
295

296
    if not x.shape == y.shape:
12✔
297
        try:  # x is an index to be tiled
12✔
298
            assert y.shape[1] == x.shape[0]
12✔
299
            x = np.tile(x, y.shape[0]).reshape(*y.shape)
12✔
UNCOV
300
        except AssertionError:
×
301
            # shape mismatch
UNCOV
302
            msg = "Mismatched shapes: x: {}, y: {}. Needs either ".format(
×
303
                x.shape, y.shape
304
            )
UNCOV
305
            raise AssertionError(msg)
×
306

307
    xx = x[0]
12✔
308
    if yextent is None:
12✔
309
        ymin, ymax = np.nanmin(y), np.nanmax(y)
12✔
310
    else:
UNCOV
311
        ymin, ymax = np.nanmin(yextent), np.nanmax(yextent)
×
312

313
    # remove non finite values for kde functions
314
    ystep = [(ymax - ymin) / bins, (ymax / ymin) / bins][logy]
12✔
315
    yy = [linspc_, logspc_][logy](ymin, ymax, step=ystep, bins=bins)
12✔
316
    if logy:  # make grid equally spaced, evaluate in log then transform back
12✔
317
        y, yy = np.log(y), np.log(yy)
12✔
318

319
    xi, yi = np.meshgrid(xx, yy)
12✔
320
    # bin centres may be off centre, but will be in the bins.
321
    xe, ye = np.meshgrid(bin_centres_to_edges(xx, sort=False), bin_centres_to_edges(yy))
12✔
322

323
    kde_kw = subkwargs(kwargs, sample_kde)
12✔
324

325
    if mode == "ckde":
12✔
326
        fltr = np.isfinite(y.flatten()) & np.isfinite(x.flatten())
12✔
327
        x, y = x.flatten()[fltr], y.flatten()[fltr]
12✔
328
        if HAVE_SM:
12✔
329
            dens_c = sm.nonparametric.KDEMultivariateConditional(
12✔
330
                endog=[y], exog=[x], dep_type="c", indep_type="c", bw="normal_reference"
331
            )
332
        else:
UNCOV
333
            raise ImportError("Requires statsmodels.")
×
334
        # statsmodels pdf takes values in reverse order
335
        zi = dens_c.pdf(yi.flatten(), xi.flatten()).reshape(xi.shape)
12✔
336
    elif mode == "binkde":  # calclate a kde per bin
12✔
337
        zi = np.zeros(xi.shape)
12✔
338
        for bin_index in range(x.shape[1]):  # bins along the x-axis
12✔
339
            # if np.isfinite(y[:, bin_index]).any(): # bins can be empty
340
            src = y[:, bin_index]
12✔
341
            sample_at = yi[:, bin_index]
12✔
342
            zi[:, bin_index] = sample_kde(src, sample_at, **kde_kw)
12✔
343
            # else:
344
            # pass
345
    elif mode == "kde":  # eqivalent to 2D KDE for scatter x,y * resolution
12✔
346
        xkde = sample_kde(x[0], x[0])  # marginal density along x
12✔
347
        src = np.vstack([x.flatten(), y.flatten()]).T
12✔
348
        sample_at = [xi, yi]  # meshgrid logistics dealt with by sample_kde
12✔
349
        try:
12✔
350
            zi = sample_kde(src, sample_at, **kde_kw)
12✔
351
        except LinAlgError:  # singular matrix, try adding miniscule noise on x?
×
352
            logger.warn("Singular Matrix")
×
UNCOV
353
            src[:, 0] += np.random.randn(*x.shape) * np.finfo(np.float64).eps
×
354
        zi = sample_kde(src, sample_at, **kde_kw)
12✔
355
        zi.reshape(xi.shape)
12✔
356
        zi /= xkde[np.newaxis, :]
12✔
357
    elif "hist" in mode.lower():  # simply compute the histogram
12✔
358
        # histogram monotonically increasing bins, requires logbins be transformed
359
        # calculate histogram in logy if needed
360
        bins = [bin_centres_to_edges(xx), bin_centres_to_edges(yy)]
12✔
361
        H, xe, ye = np.histogram2d(x.flatten(), y.flatten(), bins=bins)
12✔
362
        zi = H.T.reshape(xi.shape)
12✔
363
    else:
UNCOV
364
        raise NotImplementedError
×
365

366
    if rescale:  # rescale bins across x
12✔
367
        xzfactors = np.nanmax(zi) / np.nanmax(zi, axis=0)
12✔
368
        zi *= xzfactors[np.newaxis, :]
12✔
369

370
    if logy:
12✔
371
        yi, ye = np.exp(yi), np.exp(ye)
12✔
372
    if ret_centres:
12✔
373
        return xe, ye, zi, xi, yi
12✔
UNCOV
374
    return xe, ye, zi
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc