• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

morganjwilliams / pyrolite / 18251526919

29 Oct 2024 02:20AM UTC coverage: 91.571% (-0.07%) from 91.64%
18251526919

push

github

morganjwilliams
Merge branch 'release/0.3.6' into main

53 of 62 new or added lines in 12 files covered. (85.48%)

3 existing lines in 2 files now uncovered.

6225 of 6798 relevant lines covered (91.57%)

10.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.71
/pyrolite/comp/codata.py
1
import warnings
12✔
2

3
import numpy as np
12✔
4
import pandas as pd
12✔
5
import scipy.special
12✔
6
import scipy.stats
12✔
7
import sympy
12✔
8

9
from ..util.log import Handle
12✔
10

11
# from .renorm import renormalise, close
12
from ..util.math import helmert_basis, symbolic_helmert_basis
12✔
13

14
logger = Handle(__name__)
12✔
15

16
__TRANSFORMS__ = {}
12✔
17

18
__sympy_protected_variables__ = {"S": "Ss"}
12✔
19

20

21
def close(X: np.ndarray, sumf=np.sum):
12✔
22
    """
23
    Closure operator for compositional data.
24

25
    Parameters
26
    -----------
27
    X : :class:`numpy.ndarray`
28
        Array to close.
29
    sumf : :class:`callable`, :func:`numpy.sum`
30
        Sum function to use for closure.
31

32
    Returns
33
    --------
34
    :class:`numpy.ndarray`
35
        Closed array.
36

37
    Notes
38
    ------
39
    Checks for non-positive entries and replaces zeros with NaN values.
40
    """
41

42
    if np.any(X <= 0):
12✔
43
        warnings.warn(
12✔
44
            "Non-positive entries found. Closure operation assumes all positive entries.",
45
            UserWarning,
46
        )
47

48
    if X.ndim == 2:
12✔
49
        C = np.array(sumf(X, axis=1), dtype=float)[:, np.newaxis]
12✔
50
    else:
51
        C = np.array(sumf(X), dtype=float)
12✔
52

53
    # Replace zero sums with NaN to prevent division by zero
54
    C[np.isclose(C, 0)] = np.nan
12✔
55

56
    # Return the array closed to sum to 1
57
    return np.divide(X, C)
12✔
58

59

60
def renormalise(df: pd.DataFrame, components: list = [], scale=100.0):
12✔
61
    """
62
    Renormalises compositional data to ensure closure.
63

64
    Parameters
65
    ------------
66
    df : :class:`pandas.DataFrame`
67
        Dataframe to renomalise.
68
    components : :class:`list`
69
        Option subcompositon to renormalise to 100. Useful for the use case
70
        where compostional data and non-compositional data are stored in the
71
        same dataframe.
72
    scale : :class:`float`, :code:`100.`
73
        Closure parameter. Typically either 100 or 1.
74

75
    Returns
76
    --------
77
    :class:`pandas.DataFrame`
78
        Renormalized dataframe.
79
    """
80

81
    dfc = df.copy(deep=True)
12✔
82
    if components:
12✔
83
        if not all(col in dfc.columns for col in components):
12✔
NEW
84
            raise ValueError("Not all specified components exist in the DataFrame.")
×
85
        dfc = dfc[components]
12✔
86

87
    if (dfc <= 0).any().any():
12✔
88
        warnings.warn(
12✔
89
            "Non-positive entries found in specified components. "
90
            "Negative values have been replaced with NaN. "
91
            "Renormalisation assumes all positive entries.",
92
            UserWarning,
93
        )
94

95
    # Replace negative values with NaN
96
    dfc[dfc < 0] = np.nan
12✔
97

98
    # Renormalise all columns if no components are specified
99
    sum_rows = dfc.sum(axis=1)
12✔
100
    # Handle division by zero by replacing zeros with NaN
101
    sum_rows.replace(0, np.nan, inplace=True)
12✔
102
    dfc = dfc.divide(sum_rows, axis=0) * scale
12✔
103

104
    return dfc
12✔
105

106

107
def ALR(X: np.ndarray, ind: int = -1, null_col=False):
12✔
108
    """
109
    Additive Log Ratio transformation.
110

111
    Parameters
112
    ---------------
113
    X: :class:`numpy.ndarray`
114
        Array on which to perform the transformation, of shape :code:`(N, D)`.
115
    ind: :class:`int`
116
        Index of column used as denominator.
117
    null_col : :class:`bool`
118
        Whether to keep the redundant column.
119

120
    Returns
121
    ---------
122
    :class:`numpy.ndarray`
123
        ALR-transformed array, of shape :code:`(N, D-1)`.
124
    """
125

126
    Y = X.copy()
12✔
127
    assert Y.ndim in [1, 2]
12✔
128
    dimensions = Y.shape[Y.ndim - 1]
12✔
129
    if ind < 0:
12✔
130
        ind += dimensions
12✔
131

132
    if Y.ndim == 2:
12✔
133
        Y = np.divide(Y, Y[:, ind][:, np.newaxis])
12✔
134
        if not null_col:
12✔
135
            Y = Y[:, [i for i in range(dimensions) if not i == ind]]
12✔
136
    else:
137
        Y = np.divide(X, X[ind])
×
138
        if not null_col:
×
139
            Y = Y[[i for i in range(dimensions) if not i == ind]]
×
140

141
    return np.log(Y)
12✔
142

143

144
def inverse_ALR(Y: np.ndarray, ind=-1, null_col=False):
12✔
145
    """
146
    Inverse Centred Log Ratio transformation.
147

148
    Parameters
149
    ---------------
150
    Y : :class:`numpy.ndarray`
151
        Array on which to perform the inverse transformation, of shape :code:`(N, D-1)`.
152
    ind : :class:`int`
153
        Index of column used as denominator.
154
    null_col : :class:`bool`, :code:`False`
155
        Whether the array contains an extra redundant column
156
        (i.e. shape is :code:`(N, D)`).
157

158
    Returns
159
    --------
160
    :class:`numpy.ndarray`
161
        Inverse-ALR transformed array, of shape :code:`(N, D)`.
162
    """
163
    assert Y.ndim in [1, 2]
12✔
164

165
    X = Y.copy()
12✔
166
    dimensions = X.shape[X.ndim - 1]
12✔
167
    if not null_col:
12✔
168
        idx = np.arange(0, dimensions + 1)
12✔
169

170
        if ind != -1:
12✔
171
            idx = np.array(list(idx[idx < ind]) + [-1] + list(idx[idx >= ind + 1] - 1))
12✔
172

173
        # Add a zero-column and reorder columns
174
        if Y.ndim == 2:
12✔
175
            X = np.concatenate((X, np.zeros((X.shape[0], 1))), axis=1)
12✔
176
            X = X[:, idx]
12✔
177
        else:
178
            X = np.append(X, np.array([0]))
12✔
179
            X = X[idx]
12✔
180

181
    # Inverse log and closure operations
182
    X = np.exp(X)
12✔
183
    X = close(X)
12✔
184
    return X
12✔
185

186

187
def CLR(X: np.ndarray):
12✔
188
    """
189
    Centred Log Ratio transformation.
190

191
    Parameters
192
    ---------------
193
    X : :class:`numpy.ndarray`
194
        2D array on which to perform the transformation, of shape :code:`(N, D)`.
195

196
    Returns
197
    ---------
198
    :class:`numpy.ndarray`
199
        CLR-transformed array, of shape :code:`(N, D)`.
200
    """
201
    X = np.array(X)
12✔
202
    X = np.divide(X, np.sum(X, axis=1).reshape(-1, 1))  # Closure operation
12✔
203
    Y = np.log(X)  # Log operation
12✔
204
    nvars = max(X.shape[1], 1)  # if the array is empty we'd get a div-by-0 error
12✔
205
    G = (1 / nvars) * np.nansum(Y, axis=1)[:, np.newaxis]
12✔
206
    Y -= G
12✔
207
    return Y
12✔
208

209

210
def inverse_CLR(Y: np.ndarray):
12✔
211
    """
212
    Inverse Centred Log Ratio transformation.
213

214
    Parameters
215
    ---------------
216
    Y : :class:`numpy.ndarray`
217
        Array on which to perform the inverse transformation, of shape :code:`(N, D)`.
218

219
    Returns
220
    ---------
221
    :class:`numpy.ndarray`
222
        Inverse-CLR transformed array, of shape :code:`(N, D)`.
223
    """
224
    # Inverse of log operation
225
    X = np.exp(Y)
12✔
226
    # Closure operation
227
    X = np.divide(X, np.nansum(X, axis=1)[:, np.newaxis])
12✔
228
    return X
12✔
229

230

231
def ILR(X: np.ndarray, psi=None, **kwargs):
12✔
232
    """
233
    Isometric Log Ratio transformation.
234

235
    Parameters
236
    ---------------
237
    X : :class:`numpy.ndarray`
238
        Array on which to perform the transformation, of shape :code:`(N, D)`.
239
    psi : :class:`numpy.ndarray`
240
        Array or matrix representing the ILR basis; optionally specified.
241

242
    Returns
243
    --------
244
    :class:`numpy.ndarray`
245
        ILR-transformed array, of shape :code:`(N, D-1)`.
246
    """
247
    d = X.shape[1]
12✔
248
    Y = CLR(X)
12✔
249
    if psi is None:
12✔
250
        psi = helmert_basis(D=d, **kwargs)  # Get a basis
12✔
251
    assert np.allclose(psi @ psi.T, np.eye(d - 1))
12✔
252
    return Y @ psi.T
12✔
253

254

255
def inverse_ILR(Y: np.ndarray, X: np.ndarray = None, psi=None, **kwargs):
12✔
256
    """
257
    Inverse Isometric Log Ratio transformation.
258

259
    Parameters
260
    ---------------
261
    Y : :class:`numpy.ndarray`
262
        Array on which to perform the inverse transformation, of shape :code:`(N, D-1)`.
263
    X : :class:`numpy.ndarray`, :code:`None`
264
        Optional specification for an array from which to derive the orthonormal basis,
265
        with shape :code:`(N, D)`.
266
    psi : :class:`numpy.ndarray`
267
        Array or matrix representing the ILR basis; optionally specified.
268

269
    Returns
270
    --------
271
    :class:`numpy.ndarray`
272
        Inverse-ILR transformed array, of shape :code:`(N, D)`.
273
    """
274
    if psi is None:
12✔
275
        psi = helmert_basis(D=Y.shape[1] + 1, **kwargs)
12✔
276
    C = Y @ psi
12✔
277
    X = inverse_CLR(C)  # Inverse log operation
12✔
278
    return X
12✔
279

280

281
def logratiomean(df, transform=CLR):
12✔
282
    """
283
    Take a mean of log-ratios along the index of a dataframe.
284

285
    Parameters
286
    -----------
287
    df : :class:`pandas.DataFrame`
288
        Dataframe from which to compute a mean along the index.
289
    transform : :class:`callable`
290
        Log transform to use.
291
    inverse_transform : :class:`callable`
292
        Inverse of log transform.
293

294
    Returns
295
    ---------
296
    :class:`pandas.Series`
297
        Mean values as a pandas series.
298
    """
299
    tfm, inv_tfm = get_transforms(transform)
12✔
300
    return pd.Series(
12✔
301
        inv_tfm(np.mean(tfm(df.values), axis=0)[np.newaxis, :])[0],
302
        index=df.columns,
303
    )
304

305

306
########################################################################################
307
# Logratio variable naming
308
########################################################################################
309

310

311
def _aggregate_sympy_constants(expr):
12✔
312
    """
313
    Aggregate constants and symbolic components within a sympy expression to separate
314
    sub-expressions.
315

316
    Parameters
317
    -----------
318
    expr : :class:`sympy.core.expr.Expr`
319
        Expression to aggregate. For matricies, use :func:`~sympy.Matrix.applyfunc`.
320

321
    Returns
322
    -------
323
    :class:`sympy.core.expr.Expr`
324
    """
325
    const = expr.func(*[term for term in expr.args if not term.free_symbols])
12✔
326
    vars = expr.func(*[term for term in expr.args if term.free_symbols])
12✔
327
    if const:
12✔
328
        return sympy.UnevaluatedExpr(const) * sympy.UnevaluatedExpr(vars)
12✔
329
    else:
330
        return sympy.UnevaluatedExpr(vars)
×
331

332

333
def get_ALR_labels(df, mode="simple", ind=-1, **kwargs):
12✔
334
    """
335
    Get symbolic labels for ALR coordinates based on dataframe columns.
336

337
    Parameters
338
    ----------
339
    df : :class:`pandas.DataFrame`
340
        Dataframe to generate ALR labels for.
341
    mode : :class:`str`
342
        Mode of label to return (:code:`LaTeX`, :code:`simple`).
343

344
    Returns
345
    -------
346
    :class:`list`
347
        List of ALR coordinates corresponding to dataframe columns.
348

349
    Notes
350
    ------
351
    Some variable names are protected in :mod:`sympy` and if used can result in errors.
352
    If one of these column names is found, it will be replaced with a title-cased
353
    duplicated version of itself (e.g. 'S' will be replaced by 'Ss').
354
    """
355

356
    names = [
12✔
357
        r"{} / {}".format(
358
            (
359
                c
360
                if c not in __sympy_protected_variables__
361
                else __sympy_protected_variables__[c]
362
            ),
363
            df.columns[ind],
364
        )
365
        for c in df.columns
366
    ]
367

368
    if mode.lower() == "latex":
12✔
369
        # edited to avoid issues with clashes between element names and latex (e.g. Ge)
370
        D = df.columns.size
12✔
371
        # encode symbolic variables
372
        vars = [sympy.var("c_{}".format(ix)) for ix in range(D)]
12✔
373
        expr = sympy.Matrix([[sympy.ln(v) for v in vars]])
12✔
374
        named_expr = expr.subs({k: v for (k, v) in zip(vars, names)})
12✔
375
        labels = [
12✔
376
            r"${}$".format(sympy.latex(l, mul_symbol="dot", ln_notation=True))
377
            for l in named_expr
378
        ]
379
    elif mode.lower() == "simple":
12✔
380
        labels = ["ALR({})".format(n) for n in names]
12✔
381
    else:
382
        msg = "Label mode {} not recognised.".format(mode)
12✔
383
        raise NotImplementedError(msg)
12✔
384
    return labels
12✔
385

386

387
def get_CLR_labels(df, mode="simple", **kwargs):
12✔
388
    """
389
    Get symbolic labels for CLR coordinates based on dataframe columns.
390

391
    Parameters
392
    ----------
393
    df : :class:`pandas.DataFrame`
394
        Dataframe to generate CLR labels for.
395
    mode : :class:`str`
396
        Mode of label to return (:code:`LaTeX`, :code:`simple`).
397

398
    Returns
399
    -------
400
    :class:`list`
401
        List of CLR coordinates corresponding to dataframe columns.
402

403
    Notes
404
    ------
405
    Some variable names are protected in :mod:`sympy` and if used can result in errors.
406
    If one of these column names is found, it will be replaced with a title-cased
407
    duplicated version of itself (e.g. 'S' will be replaced by 'Ss').
408
    """
409

410
    names = [
12✔
411
        r"{} / γ".format(
412
            (
413
                c
414
                if c not in __sympy_protected_variables__
415
                else __sympy_protected_variables__[c]
416
            ),
417
        )
418
        for c in df.columns
419
    ]
420
    D = df.columns.size
12✔
421

422
    if mode.lower() == "latex":
12✔
423
        # edited to avoid issues with clashes between element names and latex (e.g. Ge)
424
        D = df.columns.size
12✔
425
        # encode symbolic variables
426
        vars = [sympy.var("c_{}".format(ix)) for ix in range(D)]
12✔
427
        expr = sympy.Matrix([[sympy.ln(v) for v in vars]])
12✔
428
        named_expr = expr.subs({k: v for (k, v) in zip(vars, names)})
12✔
429
        labels = [
12✔
430
            r"${}$".format(sympy.latex(l, mul_symbol="dot", ln_notation=True))
431
            for l in named_expr
432
        ]
433
    elif mode.lower() == "simple":
12✔
434
        labels = ["CLR({}/G)".format(c) for c in df.columns]
12✔
435
    else:
436
        msg = "Label mode {} not recognised.".format(mode)
12✔
437
        raise NotImplementedError(msg)
12✔
438
    return labels
12✔
439

440

441
def get_ILR_labels(df, mode="latex", **kwargs):
12✔
442
    """
443
    Get symbolic labels for ILR coordinates based on dataframe columns.
444

445
    Parameters
446
    ----------
447
    df : :class:`pandas.DataFrame`
448
        Dataframe to generate ILR labels for.
449
    mode : :class:`str`
450
        Mode of label to return (:code:`LaTeX`, :code:`simple`).
451

452
    Returns
453
    -------
454
    :class:`list`
455
        List of ILR coordinates corresponding to dataframe columns.
456

457
    Notes
458
    ------
459
    Some variable names are protected in :mod:`sympy` and if used can result in errors.
460
    If one of these column names is found, it will be replaced with a title-cased
461
    duplicated version of itself (e.g. 'S' will be replaced by 'Ss').
462
    """
463
    D = df.columns.size
12✔
464
    # encode symbolic variables
465
    sym_vars = [sympy.var("c_{}".format(ix)) for ix in range(D)]
12✔
466
    arr = sympy.Matrix([[sympy.ln(v) for v in sym_vars]])
12✔
467

468
    # this is the CLR --> ILR transform
469
    helmert = symbolic_helmert_basis(D, **kwargs)
12✔
470
    expr = sympy.simplify(
12✔
471
        sympy.logcombine(sympy.simplify(arr @ helmert.transpose()), force=True)
472
    )
473
    expr = expr.applyfunc(_aggregate_sympy_constants)
12✔
474
    # sub in Phi (the CLR normalisation variable)
475
    names = [
12✔
476
        r"{} / γ".format(
477
            (
478
                c
479
                if c not in __sympy_protected_variables__
480
                else __sympy_protected_variables__[c]
481
            ),
482
        )
483
        for c in df.columns
484
    ]
485
    named_expr = expr.subs({k: v for (k, v) in zip(sym_vars, names)})
12✔
486
    # format latex labels
487
    if mode.lower() == "latex":
12✔
488
        labels = [
12✔
489
            r"${}$".format(sympy.latex(l, mul_symbol="dot", ln_notation=True))
490
            for l in named_expr
491
        ]
492
    elif mode.lower() == "simple":
12✔
493
        # here we could exclude scaling terms and just use ILR(A/B)
494
        unscaled_components = named_expr.applyfunc(
12✔
495
            lambda x: x.func(*[term for term in x.args if term.free_symbols])
496
        )
497
        labels = [str(l).replace("log", "ILR") for l in unscaled_components]
12✔
498
    else:
499
        msg = "Label mode {} not recognised.".format(mode)
12✔
500
        raise NotImplementedError(msg)
12✔
501
    return labels
12✔
502

503

504
########################################################################################
505
# Box-cox transforms
506
########################################################################################
507

508

509
def boxcox(
12✔
510
    X: np.ndarray,
511
    lmbda=None,
512
    lmbda_search_space=(-1, 5),
513
    search_steps=100,
514
    return_lmbda=False,
515
):
516
    """
517
    Box-Cox transformation.
518

519
    Parameters
520
    ---------------
521
    X : :class:`numpy.ndarray`
522
        Array on which to perform the transformation.
523
    lmbda : :class:`numpy.number`, :code:`None`
524
        Lambda value used to forward-transform values. If none, it will be calculated
525
        using the mean
526
    lmbda_search_space : :class:`tuple`
527
        Range tuple (min, max).
528
    search_steps : :class:`int`
529
        Steps for lambda search range.
530
    return_lmbda : :class:`bool`
531
        Whether to also return the lambda value.
532

533
    Returns
534
    -------
535
    :class:`numpy.ndarray` | :class:`numpy.ndarray`(:class:`float`)
536
        Box-Cox transformed array. If `return_lmbda` is true, tuple contains data and
537
        lambda value.
538
    """
539
    if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
12✔
540
        _X = X.values
12✔
541
    else:
542
        _X = X.copy()
12✔
543

544
    if lmbda is None:
12✔
545
        l_search = np.linspace(*lmbda_search_space, search_steps)
12✔
546
        llf = np.apply_along_axis(scipy.stats.boxcox_llf, 0, np.array([l_search]), _X.T)
12✔
547
        if llf.shape[0] == 1:
12✔
548
            mean_llf = llf[0]
12✔
549
        else:
550
            mean_llf = np.nansum(llf, axis=0)
12✔
551

552
        lmbda = l_search[mean_llf == np.nanmax(mean_llf)]
12✔
553
    if _X.ndim < 2:
12✔
554
        out = scipy.stats.boxcox(_X, lmbda)
12✔
555
    elif _X.shape[0] == 1:
12✔
556
        out = scipy.stats.boxcox(np.squeeze(_X), lmbda)
12✔
557
    else:
558
        out = np.apply_along_axis(scipy.stats.boxcox, 0, _X, lmbda)
12✔
559

560
    if isinstance(_X, pd.DataFrame) or isinstance(_X, pd.Series):
12✔
561
        _out = X.copy()
×
562
        _out.loc[:, :] = out
×
563
        out = _out
×
564

565
    if return_lmbda:
12✔
566
        return out, lmbda
12✔
567
    else:
568
        return out
12✔
569

570

571
def inverse_boxcox(Y: np.ndarray, lmbda):
12✔
572
    """
573
    Inverse Box-Cox transformation.
574

575
    Parameters
576
    ---------------
577
    Y : :class:`numpy.ndarray`
578
        Array on which to perform the transformation.
579
    lmbda : :class:`float`
580
        Lambda value used to forward-transform values.
581

582
    Returns
583
    -------
584
    :class:`numpy.ndarray`
585
        Inverse Box-Cox transformed array.
586
    """
587
    return scipy.special.inv_boxcox(Y, lmbda)
12✔
588

589

590
########################################################################################
591
# Functions for spherical coordinate transformation of compositional data.
592
########################################################################################
593
"""
9✔
594
The functions below were derived from the references below, but should be in line
595
with the work which preceeded them.
596

597
Neocleous, T., Aitken, C., Zadora, G., 2011. Transformations for compositional data
598
with zeros with an application to forensic evidence evaluation. Chemometrics and
599
Intelligent Laboratory Systems 109, 77–85. https://doi.org/10.1016/j.chemolab.2011.08.003
600

601
Wang, H., Liu, Q., Mok, H.M.K., Fu, L., Tse, W.M., 2007. A hyperspherical transformation
602
forecasting model for compositional data. European Journal of Operational Research 179,
603
459–468. https://doi.org/10.1016/j.ejor.2006.03.039
604
"""
605

606

607
def sphere(ys):
12✔
608
    r"""
609
    Spherical coordinate transformation for compositional data.
610

611
    Parameters
612
    ----------
613
    ys : :class:`numpy.ndarray`
614
        Compositional data to transform (shape (n, D)).
615

616
    Returns
617
    -------
618
    θ : :class:`numpy.ndarray`
619
        Array of angles in radians (:math:`(0, \pi / 2]`)
620

621
    Notes
622
    -----
623
    :func:`numpy.arccos` will return angles in the range :math:`(0, \pi)`. This shouldn't be
624
    an issue for this function given that the input values are all positive.
625
    """
626
    p = ys.shape[1] - 1
12✔
627
    _ys = np.sqrt(close(ys))  # closure operation
12✔
628
    θ = np.ones((ys.shape[0], p))
12✔
629

630
    indicies = np.arange(1, p + 1)[::-1]
12✔
631
    for ix in indicies:  # we have to recurse from p back down to #2
12✔
632
        if ix == p:
12✔
633
            S = 1
12✔
634
        else:
635
            # vector - the product of sin components
636
            S = np.prod(np.sin(θ[:, ix:]), axis=1)
12✔
637
            # where this evaluates to zero, the composition is all in the first component
638
            S[np.isclose(S, 0.0)] = 1.0
12✔
639

640
        ratios = _ys[:, ix] / S
12✔
641
        # where this looks like it could be slightly higher than 1
642
        # np.arcos will return np.nan, so we can filter these.
643
        ratios[np.isclose(ratios, 1.0)] = 1.0
12✔
644
        θ[:, ix - 1] = np.arccos(ratios)
12✔
645
    return θ
12✔
646

647

648
def inverse_sphere(θ):
12✔
649
    """
650
    Inverse spherical coordinate transformation to revert back to compositional data
651
    in the simplex.
652

653
    Parameters
654
    ----------
655
    θ : :class:`numpy.ndarray`
656
        Angular coordinates to revert.
657

658
    Returns
659
    -------
660
    ys : :class:`numpy.ndarray`
661
        Compositional (simplex) coordinates, normalised to 1.
662
    """
663
    p = θ.shape[1]
12✔
664
    n = θ.shape[0]
12✔
665
    y = np.ones((θ.shape[0], p + 1)) * np.pi / 2
12✔
666

667
    sinθ, cosθ = np.sin(θ), np.cos(θ)
12✔
668

669
    indicies = np.arange(0, p + 1)
12✔
670
    for ix in indicies:
12✔
671
        if ix == 0:
12✔
672
            C = 1.0
12✔
673
        else:
674
            C = cosθ[:, ix - 1]
12✔
675

676
        if ix == p:
12✔
677
            S = 1.0
12✔
678
        else:
679
            S = np.prod(sinθ[:, ix:], axis=1)
12✔
680
        y[:, ix] = C * S
12✔
681

682
    ys = y**2
12✔
683
    return ys
12✔
684

685

686
################################################################################
687

688

689
def compositional_cosine_distances(arr):
12✔
690
    """
691
    Calculate a distance matrix corresponding to the angles between a number
692
    of compositional vectors.
693

694
    Parameters
695
    ----------
696
    arr: :class:`numpy.ndarray`
697
        Array of n-dimensional compositions of shape (n_samples, n).
698

699
    Returns
700
    -------
701
    :class:`numpy.ndarray`
702
        Array of angular distances of shape (n_samples, n_samples).
703
    """
704
    # all vectors are unit vectors where we start with closed compositions
705
    _closed = close(arr)
×
706
    # and we can then calculate the cosine similarity
707
    cosine_sim = np.dot(
×
708
        np.sqrt(np.expand_dims(_closed, axis=1)),
709
        np.sqrt(np.expand_dims(_closed, axis=2)),
710
    ).squeeze()
711
    # finally, we convert the cosines back to angules
712
    return np.arccos(np.clip(cosine_sim, -1.0, 1.0))
×
713

714

715
########################################################################################
716
# Meta-functions for accessing transformations.
717
########################################################################################
718

719

720
def get_transforms(name):
12✔
721
    """
722
    Lookup a transform-inverse transform pair by name.
723

724
    Parameters
725
    ----------
726
    name : :class:`str`
727
        Name of of the transform pairs (e.g. :code:``'CLR'``).
728

729
    Returns
730
    -------
731
    tfm, inv_tfm : :class:`callable`
732
        Transform and inverse transform functions.
733
    """
734
    if callable(name):  #  callable
12✔
735
        name = name.__name__
12✔
736

737
    tfm, inv_tfm = __TRANSFORMS__.get(name)
12✔
738
    return tfm, inv_tfm
12✔
739

740

741
def _load_transforms():
12✔
742
    """
743
    Load the transform pairs into the module level variable for later lookup.
744

745
    Returns
746
    -------
747
    :class:`dict`
748
    """
749
    return {
12✔
750
        f: (globals().get(f), globals().get("inverse_{}".format(f)))
751
        for f in globals().keys()
752
        if "inverse_{}".format(f) in globals().keys()
753
    }
754

755

756
__TRANSFORMS__.update(_load_transforms())
12✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc