• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

morganjwilliams / pyrolite / 9531758865

26 Feb 2024 05:52AM UTC coverage: 91.64%. Remained the same
9531758865

push

github

morganjwilliams
Merge branch 'release/0.3.5.post0' into main

6204 of 6770 relevant lines covered (91.64%)

2.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.97
/pyrolite/comp/codata.py
1
import numpy as np
3✔
2
import pandas as pd
3✔
3
import scipy.special
3✔
4
import scipy.stats
3✔
5
import sympy
3✔
6

7
from ..util.log import Handle
3✔
8

9
# from .renorm import renormalise, close
10
from ..util.math import helmert_basis, symbolic_helmert_basis
3✔
11

12
logger = Handle(__name__)
3✔
13

14
__TRANSFORMS__ = {}
3✔
15

16
__sympy_protected_variables__ = {"S": "Ss"}
3✔
17

18

19
def close(X: np.ndarray, sumf=np.sum):
3✔
20
    """
21
    Closure operator for compositional data.
22

23
    Parameters
24
    -----------
25
    X : :class:`numpy.ndarray`
26
        Array to close.
27
    sumf : :class:`callable`, :func:`numpy.sum`
28
        Sum function to use for closure.
29

30
    Returns
31
    --------
32
    :class:`numpy.ndarray`
33
        Closed array.
34

35
    Notes
36
    ------
37
        * Does not check for non-positive entries.
38
    """
39

40
    if X.ndim == 2:
3✔
41
        C = np.array(sumf(X, axis=1))[:, np.newaxis]
3✔
42
    else:
43
        C = np.array(sumf(X, axis=0))
3✔
44

45
    C[np.isclose(C, 0)] = np.nan
3✔
46
    return np.divide(X, C)
3✔
47

48

49
def renormalise(df: pd.DataFrame, components: list = [], scale=100.0):
3✔
50
    """
51
    Renormalises compositional data to ensure closure.
52

53
    Parameters
54
    ------------
55
    df : :class:`pandas.DataFrame`
56
        Dataframe to renomalise.
57
    components : :class:`list`
58
        Option subcompositon to renormalise to 100. Useful for the use case
59
        where compostional data and non-compositional data are stored in the
60
        same dataframe.
61
    scale : :class:`float`, :code:`100.`
62
        Closure parameter. Typically either 100 or 1.
63

64
    Returns
65
    --------
66
    :class:`pandas.DataFrame`
67
        Renormalized dataframe.
68
    """
69
    dfc = df.copy(deep=True)
3✔
70
    if components:
3✔
71
        cmpnts = [c for c in components if c in dfc.columns]
3✔
72
        dfc.loc[:, cmpnts] = scale * dfc.loc[:, cmpnts].divide(
3✔
73
            dfc.loc[:, cmpnts].sum(axis=1).replace(0, np.nan), axis=0
74
        )
75
        return dfc
3✔
76
    else:
77
        dfc = dfc.divide(dfc.sum(axis=1).replace(0, 100.0), axis=0) * scale
3✔
78
        return dfc
3✔
79

80

81
def ALR(X: np.ndarray, ind: int = -1, null_col=False):
3✔
82
    """
83
    Additive Log Ratio transformation.
84

85
    Parameters
86
    ---------------
87
    X: :class:`numpy.ndarray`
88
        Array on which to perform the transformation, of shape :code:`(N, D)`.
89
    ind: :class:`int`
90
        Index of column used as denominator.
91
    null_col : :class:`bool`
92
        Whether to keep the redundant column.
93

94
    Returns
95
    ---------
96
    :class:`numpy.ndarray`
97
        ALR-transformed array, of shape :code:`(N, D-1)`.
98
    """
99

100
    Y = X.copy()
3✔
101
    assert Y.ndim in [1, 2]
3✔
102
    dimensions = Y.shape[Y.ndim - 1]
3✔
103
    if ind < 0:
3✔
104
        ind += dimensions
3✔
105

106
    if Y.ndim == 2:
3✔
107
        Y = np.divide(Y, Y[:, ind][:, np.newaxis])
3✔
108
        if not null_col:
3✔
109
            Y = Y[:, [i for i in range(dimensions) if not i == ind]]
3✔
110
    else:
111
        Y = np.divide(X, X[ind])
×
112
        if not null_col:
×
113
            Y = Y[[i for i in range(dimensions) if not i == ind]]
×
114

115
    return np.log(Y)
3✔
116

117

118
def inverse_ALR(Y: np.ndarray, ind=-1, null_col=False):
3✔
119
    """
120
    Inverse Centred Log Ratio transformation.
121

122
    Parameters
123
    ---------------
124
    Y : :class:`numpy.ndarray`
125
        Array on which to perform the inverse transformation, of shape :code:`(N, D-1)`.
126
    ind : :class:`int`
127
        Index of column used as denominator.
128
    null_col : :class:`bool`, :code:`False`
129
        Whether the array contains an extra redundant column
130
        (i.e. shape is :code:`(N, D)`).
131

132
    Returns
133
    --------
134
    :class:`numpy.ndarray`
135
        Inverse-ALR transformed array, of shape :code:`(N, D)`.
136
    """
137
    assert Y.ndim in [1, 2]
3✔
138

139
    X = Y.copy()
3✔
140
    dimensions = X.shape[X.ndim - 1]
3✔
141
    if not null_col:
3✔
142
        idx = np.arange(0, dimensions + 1)
3✔
143

144
        if ind != -1:
3✔
145
            idx = np.array(list(idx[idx < ind]) + [-1] + list(idx[idx >= ind + 1] - 1))
3✔
146

147
        # Add a zero-column and reorder columns
148
        if Y.ndim == 2:
3✔
149
            X = np.concatenate((X, np.zeros((X.shape[0], 1))), axis=1)
3✔
150
            X = X[:, idx]
3✔
151
        else:
152
            X = np.append(X, np.array([0]))
3✔
153
            X = X[idx]
3✔
154

155
    # Inverse log and closure operations
156
    X = np.exp(X)
3✔
157
    X = close(X)
3✔
158
    return X
3✔
159

160

161
def CLR(X: np.ndarray):
3✔
162
    """
163
    Centred Log Ratio transformation.
164

165
    Parameters
166
    ---------------
167
    X : :class:`numpy.ndarray`
168
        2D array on which to perform the transformation, of shape :code:`(N, D)`.
169

170
    Returns
171
    ---------
172
    :class:`numpy.ndarray`
173
        CLR-transformed array, of shape :code:`(N, D)`.
174
    """
175
    X = np.array(X)
3✔
176
    X = np.divide(X, np.sum(X, axis=1).reshape(-1, 1))  # Closure operation
3✔
177
    Y = np.log(X)  # Log operation
3✔
178
    nvars = max(X.shape[1], 1)  # if the array is empty we'd get a div-by-0 error
3✔
179
    G = (1 / nvars) * np.nansum(Y, axis=1)[:, np.newaxis]
3✔
180
    Y -= G
3✔
181
    return Y
3✔
182

183

184
def inverse_CLR(Y: np.ndarray):
3✔
185
    """
186
    Inverse Centred Log Ratio transformation.
187

188
    Parameters
189
    ---------------
190
    Y : :class:`numpy.ndarray`
191
        Array on which to perform the inverse transformation, of shape :code:`(N, D)`.
192

193
    Returns
194
    ---------
195
    :class:`numpy.ndarray`
196
        Inverse-CLR transformed array, of shape :code:`(N, D)`.
197
    """
198
    # Inverse of log operation
199
    X = np.exp(Y)
3✔
200
    # Closure operation
201
    X = np.divide(X, np.nansum(X, axis=1)[:, np.newaxis])
3✔
202
    return X
3✔
203

204

205
def ILR(X: np.ndarray, psi=None, **kwargs):
3✔
206
    """
207
    Isometric Log Ratio transformation.
208

209
    Parameters
210
    ---------------
211
    X : :class:`numpy.ndarray`
212
        Array on which to perform the transformation, of shape :code:`(N, D)`.
213
    psi : :class:`numpy.ndarray`
214
        Array or matrix representing the ILR basis; optionally specified.
215

216
    Returns
217
    --------
218
    :class:`numpy.ndarray`
219
        ILR-transformed array, of shape :code:`(N, D-1)`.
220
    """
221
    d = X.shape[1]
3✔
222
    Y = CLR(X)
3✔
223
    if psi is None:
3✔
224
        psi = helmert_basis(D=d, **kwargs)  # Get a basis
3✔
225
    assert np.allclose(psi @ psi.T, np.eye(d - 1))
3✔
226
    return Y @ psi.T
3✔
227

228

229
def inverse_ILR(Y: np.ndarray, X: np.ndarray = None, psi=None, **kwargs):
3✔
230
    """
231
    Inverse Isometric Log Ratio transformation.
232

233
    Parameters
234
    ---------------
235
    Y : :class:`numpy.ndarray`
236
        Array on which to perform the inverse transformation, of shape :code:`(N, D-1)`.
237
    X : :class:`numpy.ndarray`, :code:`None`
238
        Optional specification for an array from which to derive the orthonormal basis,
239
        with shape :code:`(N, D)`.
240
    psi : :class:`numpy.ndarray`
241
        Array or matrix representing the ILR basis; optionally specified.
242

243
    Returns
244
    --------
245
    :class:`numpy.ndarray`
246
        Inverse-ILR transformed array, of shape :code:`(N, D)`.
247
    """
248
    if psi is None:
3✔
249
        psi = helmert_basis(D=Y.shape[1] + 1, **kwargs)
3✔
250
    C = Y @ psi
3✔
251
    X = inverse_CLR(C)  # Inverse log operation
3✔
252
    return X
3✔
253

254

255
def logratiomean(df, transform=CLR):
3✔
256
    """
257
    Take a mean of log-ratios along the index of a dataframe.
258

259
    Parameters
260
    -----------
261
    df : :class:`pandas.DataFrame`
262
        Dataframe from which to compute a mean along the index.
263
    transform : :class:`callable`
264
        Log transform to use.
265
    inverse_transform : :class:`callable`
266
        Inverse of log transform.
267

268
    Returns
269
    ---------
270
    :class:`pandas.Series`
271
        Mean values as a pandas series.
272
    """
273
    tfm, inv_tfm = get_transforms(transform)
3✔
274
    return pd.Series(
3✔
275
        inv_tfm(np.mean(tfm(df.values), axis=0)[np.newaxis, :])[0],
276
        index=df.columns,
277
    )
278

279

280
########################################################################################
281
# Logratio variable naming
282
########################################################################################
283

284

285
def _aggregate_sympy_constants(expr):
3✔
286
    """
287
    Aggregate constants and symbolic components within a sympy expression to separate
288
    sub-expressions.
289

290
    Parameters
291
    -----------
292
    expr : :class:`sympy.core.expr.Expr`
293
        Expression to aggregate. For matricies, use :func:`~sympy.Matrix.applyfunc`.
294

295
    Returns
296
    -------
297
    :class:`sympy.core.expr.Expr`
298
    """
299
    const = expr.func(*[term for term in expr.args if not term.free_symbols])
3✔
300
    vars = expr.func(*[term for term in expr.args if term.free_symbols])
3✔
301
    if const:
3✔
302
        return sympy.UnevaluatedExpr(const) * sympy.UnevaluatedExpr(vars)
3✔
303
    else:
304
        return sympy.UnevaluatedExpr(vars)
×
305

306

307
def get_ALR_labels(df, mode="simple", ind=-1, **kwargs):
3✔
308
    """
309
    Get symbolic labels for ALR coordinates based on dataframe columns.
310

311
    Parameters
312
    ----------
313
    df : :class:`pandas.DataFrame`
314
        Dataframe to generate ALR labels for.
315
    mode : :class:`str`
316
        Mode of label to return (:code:`LaTeX`, :code:`simple`).
317

318
    Returns
319
    -------
320
    :class:`list`
321
        List of ALR coordinates corresponding to dataframe columns.
322

323
    Notes
324
    ------
325
    Some variable names are protected in :mod:`sympy` and if used can result in errors.
326
    If one of these column names is found, it will be replaced with a title-cased
327
    duplicated version of itself (e.g. 'S' will be replaced by 'Ss').
328
    """
329

330
    names = [
3✔
331
        r"{} / {}".format(
332
            c
333
            if c not in __sympy_protected_variables__
334
            else __sympy_protected_variables__[c],
335
            df.columns[ind],
336
        )
337
        for c in df.columns
338
    ]
339

340
    if mode.lower() == "latex":
3✔
341
        # edited to avoid issues with clashes between element names and latex (e.g. Ge)
342
        D = df.columns.size
3✔
343
        # encode symbolic variables
344
        vars = [sympy.var("c_{}".format(ix)) for ix in range(D)]
3✔
345
        expr = sympy.Matrix([[sympy.ln(v) for v in vars]])
3✔
346
        named_expr = expr.subs({k: v for (k, v) in zip(vars, names)})
3✔
347
        labels = [
3✔
348
            r"${}$".format(sympy.latex(l, mul_symbol="dot", ln_notation=True))
349
            for l in named_expr
350
        ]
351
    elif mode.lower() == "simple":
3✔
352
        labels = ["ALR({})".format(n) for n in names]
3✔
353
    else:
354
        msg = "Label mode {} not recognised.".format(mode)
3✔
355
        raise NotImplementedError(msg)
3✔
356
    return labels
3✔
357

358

359
def get_CLR_labels(df, mode="simple", **kwargs):
3✔
360
    """
361
    Get symbolic labels for CLR coordinates based on dataframe columns.
362

363
    Parameters
364
    ----------
365
    df : :class:`pandas.DataFrame`
366
        Dataframe to generate CLR labels for.
367
    mode : :class:`str`
368
        Mode of label to return (:code:`LaTeX`, :code:`simple`).
369

370
    Returns
371
    -------
372
    :class:`list`
373
        List of CLR coordinates corresponding to dataframe columns.
374

375
    Notes
376
    ------
377
    Some variable names are protected in :mod:`sympy` and if used can result in errors.
378
    If one of these column names is found, it will be replaced with a title-cased
379
    duplicated version of itself (e.g. 'S' will be replaced by 'Ss').
380
    """
381

382
    names = [
3✔
383
        r"{} / γ".format(
384
            c
385
            if c not in __sympy_protected_variables__
386
            else __sympy_protected_variables__[c],
387
        )
388
        for c in df.columns
389
    ]
390
    D = df.columns.size
3✔
391

392
    if mode.lower() == "latex":
3✔
393
        # edited to avoid issues with clashes between element names and latex (e.g. Ge)
394
        D = df.columns.size
3✔
395
        # encode symbolic variables
396
        vars = [sympy.var("c_{}".format(ix)) for ix in range(D)]
3✔
397
        expr = sympy.Matrix([[sympy.ln(v) for v in vars]])
3✔
398
        named_expr = expr.subs({k: v for (k, v) in zip(vars, names)})
3✔
399
        labels = [
3✔
400
            r"${}$".format(sympy.latex(l, mul_symbol="dot", ln_notation=True))
401
            for l in named_expr
402
        ]
403
    elif mode.lower() == "simple":
3✔
404
        labels = ["CLR({}/G)".format(c) for c in df.columns]
3✔
405
    else:
406
        msg = "Label mode {} not recognised.".format(mode)
3✔
407
        raise NotImplementedError(msg)
3✔
408
    return labels
3✔
409

410

411
def get_ILR_labels(df, mode="latex", **kwargs):
3✔
412
    """
413
    Get symbolic labels for ILR coordinates based on dataframe columns.
414

415
    Parameters
416
    ----------
417
    df : :class:`pandas.DataFrame`
418
        Dataframe to generate ILR labels for.
419
    mode : :class:`str`
420
        Mode of label to return (:code:`LaTeX`, :code:`simple`).
421

422
    Returns
423
    -------
424
    :class:`list`
425
        List of ILR coordinates corresponding to dataframe columns.
426

427
    Notes
428
    ------
429
    Some variable names are protected in :mod:`sympy` and if used can result in errors.
430
    If one of these column names is found, it will be replaced with a title-cased
431
    duplicated version of itself (e.g. 'S' will be replaced by 'Ss').
432
    """
433
    D = df.columns.size
3✔
434
    # encode symbolic variables
435
    sym_vars = [sympy.var("c_{}".format(ix)) for ix in range(D)]
3✔
436
    arr = sympy.Matrix([[sympy.ln(v) for v in sym_vars]])
3✔
437

438
    # this is the CLR --> ILR transform
439
    helmert = symbolic_helmert_basis(D, **kwargs)
3✔
440
    expr = sympy.simplify(
3✔
441
        sympy.logcombine(sympy.simplify(arr @ helmert.transpose()), force=True)
442
    )
443
    expr = expr.applyfunc(_aggregate_sympy_constants)
3✔
444
    # sub in Phi (the CLR normalisation variable)
445
    names = [
3✔
446
        r"{} / γ".format(
447
            c
448
            if c not in __sympy_protected_variables__
449
            else __sympy_protected_variables__[c],
450
        )
451
        for c in df.columns
452
    ]
453
    named_expr = expr.subs({k: v for (k, v) in zip(sym_vars, names)})
3✔
454
    # format latex labels
455
    if mode.lower() == "latex":
3✔
456
        labels = [
3✔
457
            r"${}$".format(sympy.latex(l, mul_symbol="dot", ln_notation=True))
458
            for l in named_expr
459
        ]
460
    elif mode.lower() == "simple":
3✔
461
        # here we could exclude scaling terms and just use ILR(A/B)
462
        unscaled_components = named_expr.applyfunc(
3✔
463
            lambda x: x.func(*[term for term in x.args if term.free_symbols])
464
        )
465
        labels = [str(l).replace("log", "ILR") for l in unscaled_components]
3✔
466
    else:
467
        msg = "Label mode {} not recognised.".format(mode)
3✔
468
        raise NotImplementedError(msg)
3✔
469
    return labels
3✔
470

471

472
########################################################################################
473
# Box-cox transforms
474
########################################################################################
475

476

477
def boxcox(
3✔
478
    X: np.ndarray,
479
    lmbda=None,
480
    lmbda_search_space=(-1, 5),
481
    search_steps=100,
482
    return_lmbda=False,
483
):
484
    """
485
    Box-Cox transformation.
486

487
    Parameters
488
    ---------------
489
    X : :class:`numpy.ndarray`
490
        Array on which to perform the transformation.
491
    lmbda : :class:`numpy.number`, :code:`None`
492
        Lambda value used to forward-transform values. If none, it will be calculated
493
        using the mean
494
    lmbda_search_space : :class:`tuple`
495
        Range tuple (min, max).
496
    search_steps : :class:`int`
497
        Steps for lambda search range.
498
    return_lmbda : :class:`bool`
499
        Whether to also return the lambda value.
500

501
    Returns
502
    -------
503
    :class:`numpy.ndarray` | :class:`numpy.ndarray`(:class:`float`)
504
        Box-Cox transformed array. If `return_lmbda` is true, tuple contains data and
505
        lambda value.
506
    """
507
    if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
3✔
508
        _X = X.values
3✔
509
    else:
510
        _X = X.copy()
3✔
511

512
    if lmbda is None:
3✔
513
        l_search = np.linspace(*lmbda_search_space, search_steps)
3✔
514
        llf = np.apply_along_axis(scipy.stats.boxcox_llf, 0, np.array([l_search]), _X.T)
3✔
515
        if llf.shape[0] == 1:
3✔
516
            mean_llf = llf[0]
3✔
517
        else:
518
            mean_llf = np.nansum(llf, axis=0)
3✔
519

520
        lmbda = l_search[mean_llf == np.nanmax(mean_llf)]
3✔
521
    if _X.ndim < 2:
3✔
522
        out = scipy.stats.boxcox(_X, lmbda)
3✔
523
    elif _X.shape[0] == 1:
3✔
524
        out = scipy.stats.boxcox(np.squeeze(_X), lmbda)
3✔
525
    else:
526
        out = np.apply_along_axis(scipy.stats.boxcox, 0, _X, lmbda)
3✔
527

528
    if isinstance(_X, pd.DataFrame) or isinstance(_X, pd.Series):
3✔
529
        _out = X.copy()
×
530
        _out.loc[:, :] = out
×
531
        out = _out
×
532

533
    if return_lmbda:
3✔
534
        return out, lmbda
3✔
535
    else:
536
        return out
3✔
537

538

539
def inverse_boxcox(Y: np.ndarray, lmbda):
3✔
540
    """
541
    Inverse Box-Cox transformation.
542

543
    Parameters
544
    ---------------
545
    Y : :class:`numpy.ndarray`
546
        Array on which to perform the transformation.
547
    lmbda : :class:`float`
548
        Lambda value used to forward-transform values.
549

550
    Returns
551
    -------
552
    :class:`numpy.ndarray`
553
        Inverse Box-Cox transformed array.
554
    """
555
    return scipy.special.inv_boxcox(Y, lmbda)
3✔
556

557

558
########################################################################################
559
# Functions for spherical coordinate transformation of compositional data.
560
########################################################################################
561
"""
562
The functions below were derived from the references below, but should be in line
563
with the work which preceeded them.
564

565
Neocleous, T., Aitken, C., Zadora, G., 2011. Transformations for compositional data
566
with zeros with an application to forensic evidence evaluation. Chemometrics and
567
Intelligent Laboratory Systems 109, 77–85. https://doi.org/10.1016/j.chemolab.2011.08.003
568

569
Wang, H., Liu, Q., Mok, H.M.K., Fu, L., Tse, W.M., 2007. A hyperspherical transformation
570
forecasting model for compositional data. European Journal of Operational Research 179,
571
459–468. https://doi.org/10.1016/j.ejor.2006.03.039
572
"""
573

574

575
def sphere(ys):
3✔
576
    r"""
577
    Spherical coordinate transformation for compositional data.
578

579
    Parameters
580
    ----------
581
    ys : :class:`numpy.ndarray`
582
        Compositional data to transform (shape (n, D)).
583

584
    Returns
585
    -------
586
    θ : :class:`numpy.ndarray`
587
        Array of angles in radians (:math:`(0, \pi / 2]`)
588

589
    Notes
590
    -----
591
    :func:`numpy.arccos` will return angles in the range :math:`(0, \pi)`. This shouldn't be
592
    an issue for this function given that the input values are all positive.
593
    """
594
    p = ys.shape[1] - 1
3✔
595
    _ys = np.sqrt(close(ys))  # closure operation
3✔
596
    θ = np.ones((ys.shape[0], p))
3✔
597

598
    indicies = np.arange(1, p + 1)[::-1]
3✔
599
    for ix in indicies:  # we have to recurse from p back down to #2
3✔
600
        if ix == p:
3✔
601
            S = 1
3✔
602
        else:
603
            # vector - the product of sin components
604
            S = np.prod(np.sin(θ[:, ix:]), axis=1)
3✔
605
            # where this evaluates to zero, the composition is all in the first component
606
            S[np.isclose(S, 0.0)] = 1.0
3✔
607

608
        ratios = _ys[:, ix] / S
3✔
609
        # where this looks like it could be slightly higher than 1
610
        # np.arcos will return np.nan, so we can filter these.
611
        ratios[np.isclose(ratios, 1.0)] = 1.0
3✔
612
        θ[:, ix - 1] = np.arccos(ratios)
3✔
613
    return θ
3✔
614

615

616
def inverse_sphere(θ):
3✔
617
    """
618
    Inverse spherical coordinate transformation to revert back to compositional data
619
    in the simplex.
620

621
    Parameters
622
    ----------
623
    θ : :class:`numpy.ndarray`
624
        Angular coordinates to revert.
625

626
    Returns
627
    -------
628
    ys : :class:`numpy.ndarray`
629
        Compositional (simplex) coordinates, normalised to 1.
630
    """
631
    p = θ.shape[1]
3✔
632
    n = θ.shape[0]
3✔
633
    y = np.ones((θ.shape[0], p + 1)) * np.pi / 2
3✔
634

635
    sinθ, cosθ = np.sin(θ), np.cos(θ)
3✔
636

637
    indicies = np.arange(0, p + 1)
3✔
638
    for ix in indicies:
3✔
639
        if ix == 0:
3✔
640
            C = 1.0
3✔
641
        else:
642
            C = cosθ[:, ix - 1]
3✔
643

644
        if ix == p:
3✔
645
            S = 1.0
3✔
646
        else:
647
            S = np.product(sinθ[:, ix:], axis=1)
3✔
648
        y[:, ix] = C * S
3✔
649

650
    ys = y**2
3✔
651
    return ys
3✔
652

653

654
################################################################################
655

656

657
def compositional_cosine_distances(arr):
3✔
658
    """
659
    Calculate a distance matrix corresponding to the angles between a number
660
    of compositional vectors.
661

662
    Parameters
663
    ----------
664
    arr: :class:`numpy.ndarray`
665
        Array of n-dimensional compositions of shape (n_samples, n).
666

667
    Returns
668
    -------
669
    :class:`numpy.ndarray`
670
        Array of angular distances of shape (n_samples, n_samples).
671
    """
672
    # all vectors are unit vectors where we start with closed compositions
673
    _closed = close(arr)
×
674
    # and we can then calculate the cosine similarity
675
    cosine_sim = np.dot(
×
676
        np.sqrt(np.expand_dims(_closed, axis=1)),
677
        np.sqrt(np.expand_dims(_closed, axis=2)),
678
    ).squeeze()
679
    # finally, we convert the cosines back to angules
680
    return np.arccos(np.clip(cosine_sim, -1.0, 1.0))
×
681

682

683
########################################################################################
684
# Meta-functions for accessing transformations.
685
########################################################################################
686

687

688
def get_transforms(name):
3✔
689
    """
690
    Lookup a transform-inverse transform pair by name.
691

692
    Parameters
693
    ----------
694
    name : :class:`str`
695
        Name of of the transform pairs (e.g. :code:``'CLR'``).
696

697
    Returns
698
    -------
699
    tfm, inv_tfm : :class:`callable`
700
        Transform and inverse transform functions.
701
    """
702
    if callable(name):  #  callable
3✔
703
        name = name.__name__
3✔
704

705
    tfm, inv_tfm = __TRANSFORMS__.get(name)
3✔
706
    return tfm, inv_tfm
3✔
707

708

709
def _load_transforms():
3✔
710
    """
711
    Load the transform pairs into the module level variable for later lookup.
712

713
    Returns
714
    -------
715
    :class:`dict`
716
    """
717
    return {
3✔
718
        f: (globals().get(f), globals().get("inverse_{}".format(f)))
719
        for f in globals().keys()
720
        if "inverse_{}".format(f) in globals().keys()
721
    }
722

723

724
__TRANSFORMS__.update(_load_transforms())
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc