• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

morganjwilliams / pyrolite / 17569160869

09 Sep 2025 01:41AM UTC coverage: 91.465% (-0.1%) from 91.614%
17569160869

push

github

morganjwilliams
Add uncertainties, add optional deps for pyproject.toml; WIP demo NB

6226 of 6807 relevant lines covered (91.46%)

10.97 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.93
/pyrolite/util/plot/density.py
1
"""
2
Functions for dealing with kernel density estimation.
3

4
Attributes
5
----------
6
USE_PCOLOR : :class:`bool`
7
    Option to use the :func:`matplotlib.pyplot.pcolor` function in place
8
    of :func:`matplotlib.pyplot.pcolormesh`.
9
"""
10

11
import matplotlib.pyplot as plt
12✔
12
import numpy as np
12✔
13
import scipy.interpolate
12✔
14
from numpy.linalg import LinAlgError
12✔
15

16
from ..distributions import sample_kde
12✔
17
from ..log import Handle
12✔
18
from ..math import interpolate_line, linspc_, logspc_
12✔
19
from ..meta import subkwargs
12✔
20
from .grid import bin_centres_to_edges
12✔
21

22
logger = Handle(__name__)
12✔
23

24
try:
12✔
25
    import statsmodels.api as sm
12✔
26

27
    HAVE_SM = True
12✔
28
except ImportError:
×
29
    HAVE_SM = False
×
30

31
USE_PCOLOR = False
12✔
32

33

34
def get_axis_density_methods(ax):
12✔
35
    """
36
    Get the relevant density and contouring methods for a given axis.
37

38
    Parameters
39
    -----------
40
    ax : :class:`matplotlib.axes.Axes` | :class:`mpltern.ternary.TernaryAxes`
41
        Axis to check.
42

43
    Returns
44
    --------
45
    pcolor, contour, contourf
46
        Relevant functions for this axis.
47
    """
48
    if ax.name == "ternary":
12✔
49
        pcolor = ax.tripcolor
12✔
50
        contour = ax.tricontour
12✔
51
        contourf = ax.tricontourf
12✔
52
    else:
53
        if USE_PCOLOR:
12✔
54
            pcolor = ax.pcolor
×
55
        else:
56
            pcolor = ax.pcolormesh
12✔
57
        contour = ax.contour
12✔
58
        contourf = ax.contourf
12✔
59
    return pcolor, contour, contourf
12✔
60

61

62
def percentile_contour_values_from_meshz(
12✔
63
    z, percentiles=[0.95, 0.66, 0.33], resolution=1000
64
):
65
    """
66
    Integrate a probability density distribution Z(X,Y) to obtain contours in Z which
67
    correspond to specified percentile contours. Contour values will be returned
68
    with the same order as the inputs.
69

70
    Parameters
71
    ----------
72
    z : :class:`numpy.ndarray`
73
        Probability density function over x, y.
74
    percentiles : :class:`numpy.ndarray`
75
        Percentile values for which to create contours.
76
    resolution : :class:`int`
77
        Number of bins for thresholds between 0. and max(Z)
78

79
    Returns
80
    -------
81
    labels : :class:`list`
82
        Labels for contours (percentiles, if above minimum z value).
83
    contours : :class:`list`
84
        Contour height values.
85

86
    Todo
87
    -----
88
    This may error for a list of percentiles where one or more requested
89
    values are below the miniumum threshold. The exception handling should
90
    be updated to cater for arrays - where some of the values may be above
91
    the minimum.
92
    """
93
    # Integral approach from https://stackoverflow.com/a/37932566
94
    t = np.linspace(0.0, z.max(), resolution)
12✔
95
    integral = ((z >= t[:, None, None]) * z).sum(axis=(1, 2))
12✔
96
    f = scipy.interpolate.interp1d(integral, t)
12✔
97
    try:
12✔
98
        t_contours = f(np.array(percentiles) * z.sum())
12✔
99
        return percentiles, t_contours
12✔
100
    except ValueError:
12✔
101
        # occurrs on the low-end of percentiles (high parts of distribution)
102
        # maximum positions of distributions are limited by the resolution
103
        # at some point there's a step down to zero
104
        logger.debug(
12✔
105
            "Percentile contour below minimum for given resolution"
106
            "Returning Minimium."
107
        )
108
        non_one = integral[~np.isclose(integral, np.ones_like(integral))]
12✔
109
        return ["min"], f(np.array([np.nanmax(non_one)]))
12✔
110

111

112
def plot_Z_percentiles(
12✔
113
    *coords,
114
    zi=None,
115
    percentiles=[0.95, 0.66, 0.33],
116
    ax=None,
117
    extent=None,
118
    fontsize=8,
119
    cmap=None,
120
    colors=None,
121
    linewidths=None,
122
    linestyles=None,
123
    contour_labels=None,
124
    label_contours=True,
125
    **kwargs,
126
):
127
    """
128
    Plot percentile contours onto a 2D  (scaled or unscaled) probability density
129
    distribution Z over X,Y.
130

131
    Parameters
132
    ------------
133
    coords : :class:`numpy.ndarray`
134
        Arrays of (x, y) or (a, b, c) coordinates.
135
    z : :class:`numpy.ndarray`
136
        Probability density function over x, y.
137
    percentiles : :class:`list`
138
        Percentile values for which to create contours.
139
    ax : :class:`matplotlib.axes.Axes`, :code:`None`
140
        Axes on which to plot. If none given, will create a new Axes instance.
141
    extent : :class:`list`, :code:`None`
142
        List or np.ndarray in the form [-x, +x, -y, +y] over which the image extends.
143
    fontsize : :class:`float`
144
        Fontsize for the contour labels.
145
    cmap : :class:`matplotlib.colors.ListedColormap`
146
        Color map for the contours and contour labels.
147
    colors : :class:`str` | :class:`list`
148
        Colors for the contours, can optionally be specified *in place of* `cmap.`
149
    linewidths : :class:`str` | :class:`list`
150
        Widths of contour lines.
151
    linestyles : :class:`str` | :class:`list`
152
        Styles for contour lines.
153
    contour_labels : :class:`dict` | :class:`list`
154
        Labels to assign to contours, organised by level.
155
    label_contours :class:`bool`
156
        Whether to add text labels to individual contours.
157

158
    Returns
159
    -------
160
    :class:`matplotlib.contour.QuadContourSet`
161
        Plotted and formatted contour set.
162

163
    Notes
164
    -----
165
    When the contours are percentile based, high percentile contours tend to get
166
    washed our with colormapping - consider adding different controls on coloring,
167
    especially where there are only one or two contours specified. One way to do
168
    this would be via the string based keyword argument `colors` for plt.contour, with
169
    an adaption for non-string colours which post-hoc modifies the contour lines
170
    based on the specified colours?
171
    """
172
    if ax is None:
12✔
173
        fig, ax = plt.subplots(1, figsize=(6, 6))
12✔
174

175
    if extent is None:
12✔
176
        # if len(coords) == 2:  # currently won't work for ternary
177
        extent = np.array([[np.min(c), np.max(c)] for c in coords[:2]]).flatten()
12✔
178

179
    clabels, contour_values = percentile_contour_values_from_meshz(
12✔
180
        zi, percentiles=percentiles
181
    )
182

183
    pcolor, contour, contourf = get_axis_density_methods(ax)
12✔
184
    if colors is not None:  # colors are explicitly specified
12✔
185
        cmap = None
12✔
186

187
    # contours will need to increase for matplotlib, so we check the ordering here.
188
    ordering = np.argsort(contour_values)
12✔
189
    # sort out multi-object properties - reorder to fit the increasing order requirement
190
    cntr_config = {}
12✔
191
    for p, v in [
12✔
192
        ("colors", colors),
193
        ("linestyles", linestyles),
194
        ("linewidths", linewidths),
195
    ]:
196
        if v is not None:
12✔
197
            if isinstance(v, (list, tuple)):
12✔
198
                # reorder the list
199
                cntr_config[p] = [v[ix] for ix in ordering]
12✔
200
            else:
201
                cntr_config[p] = v
×
202

203
    cs = contour(
12✔
204
        *coords,
205
        zi,
206
        levels=contour_values[ordering],  # must increase
207
        cmap=cmap,
208
        **{**cntr_config, **kwargs},
209
    )
210
    if label_contours:
12✔
211
        fs = kwargs.pop("fontsize", None) or 8
12✔
212
        lbls = ax.clabel(cs, fontsize=fs, inline_spacing=0)
12✔
213
        z_contours = sorted(list(set([float(l.get_text()) for l in lbls])))
12✔
214
        trans = {
12✔
215
            float(t): str(p)
216
            for t, p in zip(z_contours, sorted(percentiles, reverse=True))
217
        }
218
        if contour_labels is None:
12✔
219
            _labels = [trans[float(l.get_text())] for l in lbls]
12✔
220
        else:
221
            if isinstance(contour_labels, dict):
12✔
222
                # get the labels from the dictionary provided
223
                contour_labels = {str(k): str(v) for k, v in contour_labels.items()}
×
224
                _labels = [contour_labels[trans[float(l.get_text())]] for l in lbls]
×
225
            else:  # a list is specified in the same order as the contours are drawn
226
                _labels = contour_labels
12✔
227

228
        for l, t in zip(lbls, _labels):
12✔
229
            l.set_text(t)
12✔
230
    return cs
12✔
231

232

233
def conditional_prob_density(
12✔
234
    y,
235
    x=None,
236
    logy=False,
237
    resolution=5,
238
    bins=50,
239
    yextent=None,
240
    rescale=True,
241
    mode="binkde",
242
    ret_centres=False,
243
    **kwargs,
244
):
245
    """
246
    Estimate the conditional probability density of one dependent variable.
247

248
    Parameters
249
    -----------
250
    y : :class:`numpy.ndarray`
251
        Dependent variable for which to calculate conditional probability P(y | X=x)
252
    x : :class:`numpy.ndarray`, :code:`None`
253
        Optionally-specified independent index.
254
    logy : :class:`bool`
255
        Whether to use a logarithmic bin spacing on the y axis.
256
    resolution : :class:`int`
257
        Points added per segment via interpolation along the x axis.
258
    bins : :class:`int`
259
        Bins for histograms and grids along the independent axis.
260
    yextent : :class:`tuple`
261
        Extent in the y direction.
262
    rescale : :class:`bool`
263
        Whether to rescale bins to give the same max Z across x.
264
    mode : :class:`str`
265
        Mode of computation.
266

267
            If mode is :code:`"ckde"`, use
268
            :func:`statsmodels.nonparametric.KDEMultivariateConditional` to compute a
269
            conditional kernel density estimate. If mode is :code:`"kde"`, use a normal
270
            gaussian kernel density estimate. If mode is :code:`"binkde"`, use a gaussian
271
            kernel density estimate over y for each bin. If mode is :code:`"hist"`,
272
            compute a histogram.
273
    ret_centres : :class:`bool`
274
        Whether to return bin centres in addtion to histogram edges,
275
        e.g. for later contouring.
276

277
    Returns
278
    -------
279
    :class:`tuple` of :class:`numpy.ndarray`
280
        :code:`x` bin edges :code:`xe`, :code:`y` bin edges :code:`ye`, histogram/density
281
        estimates :code:`Z`. If :code:`ret_centres` is :code:`True`, the last two return
282
        values will contain the bin centres :code:`xi`, :code:`yi`.
283
    """
284
    # check for shapes
285
    assert not ((x is None) and (y is None))
12✔
286
    if y is None:  # Swap the variables. Create an index for x
12✔
287
        y = x
×
288
        x = None
×
289

290
    nvar = y.shape[1]
12✔
291
    if x is None:  # Create a simple arange-based index
12✔
292
        x = np.arange(nvar)
×
293

294
    if resolution:  # this is where REE previously broke down
12✔
295
        x, y = interpolate_line(x, y, n=resolution, logy=logy)
12✔
296

297
    if not x.shape == y.shape:
12✔
298
        try:  # x is an index to be tiled
12✔
299
            assert y.shape[1] == x.shape[0]
12✔
300
            x = np.tile(x, y.shape[0]).reshape(*y.shape)
12✔
301
        except AssertionError:
×
302
            # shape mismatch
303
            msg = "Mismatched shapes: x: {}, y: {}. Needs either ".format(
×
304
                x.shape, y.shape
305
            )
306
            raise AssertionError(msg)
×
307

308
    xx = x[0]
12✔
309
    if yextent is None:
12✔
310
        ymin, ymax = np.nanmin(y), np.nanmax(y)
12✔
311
    else:
312
        ymin, ymax = np.nanmin(yextent), np.nanmax(yextent)
×
313

314
    # remove non finite values for kde functions
315
    ystep = [(ymax - ymin) / bins, (ymax / ymin) / bins][logy]
12✔
316
    yy = [linspc_, logspc_][logy](ymin, ymax, step=ystep, bins=bins)
12✔
317
    if logy:  # make grid equally spaced, evaluate in log then transform back
12✔
318
        y, yy = np.log(y), np.log(yy)
12✔
319

320
    xi, yi = np.meshgrid(xx, yy)
12✔
321
    # bin centres may be off centre, but will be in the bins.
322
    xe, ye = np.meshgrid(bin_centres_to_edges(xx, sort=False), bin_centres_to_edges(yy))
12✔
323

324
    kde_kw = subkwargs(kwargs, sample_kde)
12✔
325

326
    if mode == "ckde":
12✔
327
        fltr = np.isfinite(y.flatten()) & np.isfinite(x.flatten())
12✔
328
        x, y = x.flatten()[fltr], y.flatten()[fltr]
12✔
329
        if HAVE_SM:
12✔
330
            dens_c = sm.nonparametric.KDEMultivariateConditional(
12✔
331
                endog=[y], exog=[x], dep_type="c", indep_type="c", bw="normal_reference"
332
            )
333
        else:
334
            raise ImportError("Requires statsmodels.")
×
335
        # statsmodels pdf takes values in reverse order
336
        zi = dens_c.pdf(yi.flatten(), xi.flatten()).reshape(xi.shape)
12✔
337
    elif mode == "binkde":  # calclate a kde per bin
12✔
338
        zi = np.zeros(xi.shape)
12✔
339
        for bin_index in range(x.shape[1]):  # bins along the x-axis
12✔
340
            # if np.isfinite(y[:, bin_index]).any(): # bins can be empty
341
            src = y[:, bin_index]
12✔
342
            sample_at = yi[:, bin_index]
12✔
343
            zi[:, bin_index] = sample_kde(src, sample_at, **kde_kw)
12✔
344
            # else:
345
            # pass
346
    elif mode == "kde":  # eqivalent to 2D KDE for scatter x,y * resolution
12✔
347
        xkde = sample_kde(x[0], x[0])  # marginal density along x
12✔
348
        src = np.vstack([x.flatten(), y.flatten()]).T
12✔
349
        sample_at = [xi, yi]  # meshgrid logistics dealt with by sample_kde
12✔
350
        try:
12✔
351
            zi = sample_kde(src, sample_at, **kde_kw)
12✔
352
        except LinAlgError:  # singular matrix, try adding miniscule noise on x?
×
353
            logger.warn("Singular Matrix")
×
354
            src[:, 0] += np.random.randn(*x.shape) * np.finfo(np.float64).eps
×
355
        zi = sample_kde(src, sample_at, **kde_kw)
12✔
356
        zi.reshape(xi.shape)
12✔
357
        zi /= xkde[np.newaxis, :]
12✔
358
    elif "hist" in mode.lower():  # simply compute the histogram
12✔
359
        # histogram monotonically increasing bins, requires logbins be transformed
360
        # calculate histogram in logy if needed
361
        bins = [bin_centres_to_edges(xx), bin_centres_to_edges(yy)]
12✔
362
        H, xe, ye = np.histogram2d(x.flatten(), y.flatten(), bins=bins)
12✔
363
        zi = H.T.reshape(xi.shape)
12✔
364
    else:
365
        raise NotImplementedError
×
366

367
    if rescale:  # rescale bins across x
12✔
368
        xzfactors = np.nanmax(zi) / np.nanmax(zi, axis=0)
12✔
369
        zi *= xzfactors[np.newaxis, :]
12✔
370

371
    if logy:
12✔
372
        yi, ye = np.exp(yi), np.exp(ye)
12✔
373
    if ret_centres:
12✔
374
        return xe, ye, zi, xi, yi
12✔
375
    return xe, ye, zi
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc