• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OpenCOMPES / sed / 13419398366

19 Feb 2025 06:09PM UTC coverage: 91.6% (-0.6%) from 92.174%
13419398366

Pull #534

github

web-flow
Merge df78f6964 into 6b927a2db
Pull Request #534: Hextof lab loader

71 of 124 new or added lines in 7 files covered. (57.26%)

3 existing lines in 1 file now uncovered.

7731 of 8440 relevant lines covered (91.6%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.62
/src/sed/binning/numba_bin.py
1
"""This file contains code for binning using numba precompiled code for the
2
sed.binning module
3
"""
4
from __future__ import annotations
1✔
5

6
from collections.abc import Sequence
1✔
7
from typing import Any
1✔
8
from typing import cast
1✔
9

10
import numba
1✔
11
import numpy as np
1✔
12

13

14
@numba.jit(nogil=True, nopython=True)
1✔
15
def _hist_from_bin_range(
1✔
16
    sample: np.ndarray,
17
    bins: Sequence[int],
18
    ranges: np.ndarray,
19
) -> np.ndarray:
20
    """N dimensional binning function, pre-compiled by Numba for performance.
21
    Behaves much like numpy.histogramdd, but calculates and returns unsigned 32
22
    bit integers.
23

24
    Args:
25
        sample (np.ndarray): The data to be histogram'd with shape N,D.
26
        bins (Sequence[int]): The number of bins for each dimension D.
27
        ranges (np.ndarray): A sequence of length D, each an optional (lower,
28
            upper) tuple giving the outer bin edges to be used if the edges are
29
            not given explicitly in bins.
30

31
    Raises:
32
        ValueError: In case of dimension mismatch.
33

34
    Returns:
35
        np.ndarray: The computed histogram.
36
    """
37
    ndims = len(bins)
1✔
38
    if sample.shape[1] != ndims:
1✔
39
        raise ValueError(
1✔
40
            "The dimension of bins is not equal to the dimension of the sample x",
41
        )
42

43
    hist = np.zeros(bins, np.uint32)
1✔
44
    hist_flat = hist.ravel()
1✔
45
    delta = np.zeros(ndims, np.float64)
1✔
46
    strides = np.zeros(ndims, np.int64)
1✔
47

48
    for i in range(ndims):
1✔
49
        delta[i] = 1 / ((ranges[i, 1] - ranges[i, 0]) / bins[i])
1✔
50
        strides[i] = hist.strides[i] // hist.itemsize
1✔
51

52
    for t in range(sample.shape[0]):
1✔
53
        is_inside = True
1✔
54
        flatidx = 0
1✔
55
        for i in range(ndims):
1✔
56
            # strip off numerical rounding errors
57
            j = round((sample[t, i] - ranges[i, 0]) * delta[i], 11)
1✔
58
            # add counts on last edge
59
            if j == bins[i]:
1✔
60
                j = bins[i] - 1
×
61
            is_inside = is_inside and (0 <= j < bins[i])
1✔
62
            flatidx += int(j) * strides[i]
1✔
63
            # don't check all axes if you already know you're out of the range
64
            if not is_inside:
1✔
65
                break
1✔
66
        if is_inside:
1✔
67
            hist_flat[flatidx] += int(is_inside)
1✔
68

69
    return hist
1✔
70

71

72
@numba.jit(nogil=True, parallel=False, nopython=True)
1✔
73
def binsearch(bins: np.ndarray, val: float) -> int:
1✔
74
    """Bisection index search function.
75

76
    Finds the index of the bin with the highest value below val, i.e. the left edge.
77
    returns -1 when the value is outside the bin range.
78

79
    Args:
80
        bins (np.ndarray): the array on which
81
        val (float): value to search for
82

83
    Returns:
84
        int: index of the bin array, returns -1 when value is outside the bins range
85
    """
86
    if np.isnan(val):
1✔
87
        return -1
1✔
88
    low, high = 0, len(bins) - 1
1✔
89
    mid = high // 2
1✔
90
    if val == bins[high]:
1✔
91
        return high - 1
1✔
92
    if (val < bins[low]) | (val > bins[high]):
1✔
93
        return -1
1✔
94

95
    while True:
1✔
96
        if val < bins[mid]:
1✔
UNCOV
97
            high = mid
×
98
        elif val < bins[mid + 1]:
1✔
99
            return mid
1✔
100
        else:
UNCOV
101
            low = mid
×
UNCOV
102
        mid = (low + high) // 2
×
103

104

105
@numba.jit(nopython=True, nogil=True, parallel=False)
1✔
106
def _hist_from_bins(
1✔
107
    sample: np.ndarray,
108
    bins: Sequence[np.ndarray],
109
    shape: tuple,
110
) -> np.ndarray:
111
    """Numba powered binning method, similar to np.histogramdd.
112

113
    Computes the histogram on pre-defined bins.
114

115
    Args:
116
        sample (np.ndarray) : the array of shape (N,D) on which to compute the histogram
117
        bins (Sequence[np.ndarray]): array of shape (N,D) defining the D bins on which
118
            to compute the histogram, i.e. the desired output axes.
119
        shape (tuple): shape of the resulting array. Workaround for the fact numba
120
            does not allow to create tuples.
121
    Returns:
122
        hist: the computed n-dimensional histogram
123
    """
124
    ndims = len(bins)
1✔
125
    if sample.shape[1] != ndims:
1✔
126
        raise ValueError(
1✔
127
            "The dimension of bins is not equal to the dimension of the sample x",
128
        )
129
    hist = np.zeros(shape, np.uint32)
1✔
130
    hist_flat = hist.ravel()
1✔
131

132
    strides = np.zeros(ndims, np.int64)
1✔
133

134
    for i in range(ndims):
1✔
135
        strides[i] = hist.strides[i] // hist.itemsize  # pylint: disable=E1136
1✔
136
    for t in range(sample.shape[0]):
1✔
137
        is_inside = True
1✔
138
        flatidx = 0
1✔
139
        for i in range(ndims):
1✔
140
            j = binsearch(bins[i], sample[t, i])
1✔
141
            # binsearch returns -1 when the value is outside the bin range
142
            is_inside = is_inside and (j >= 0)
1✔
143
            flatidx += int(j) * strides[i]
1✔
144
            # don't check all axes if you already know you're out of the range
145
            if not is_inside:
1✔
146
                break
1✔
147
        if is_inside:
1✔
148
            hist_flat[flatidx] += int(is_inside)
1✔
149

150
    return hist
1✔
151

152

153
def numba_histogramdd(
1✔
154
    sample: np.ndarray,
155
    bins: int | Sequence[int] | Sequence[np.ndarray] | np.ndarray,
156
    ranges: Sequence = None,
157
) -> tuple[np.ndarray, list[np.ndarray]]:
158
    """Multidimensional histogram function, powered by Numba.
159

160
    Behaves in total much like numpy.histogramdd. Returns uint32 arrays.
161
    This was chosen because it has a significant performance improvement over
162
    uint64 for large binning volumes. Be aware that this can cause overflows
163
    for very large sample sets exceeding 3E9 counts in a single bin. This
164
    should never happen in a realistic photoemission experiment with useful bin
165
    sizes.
166

167
    Args:
168
        sample (np.ndarray): The data to be histogram'd with shape N,D
169
        bins (int | Sequence[int] | Sequence[np.ndarray] | np.ndarray): The number
170
            of bins for each dimension D, or a sequence of bin edges on which to calculate
171
            the histogram.
172
        ranges (Sequence, optional): The range(s) to use for binning when bins is a sequence
173
            of integers or sequence of arrays. Defaults to None.
174

175
    Raises:
176
        ValueError: In case of dimension mismatch.
177
        TypeError: Wrong type for bins.
178
        ValueError: In case of wrong shape of bins
179
        RuntimeError: Internal shape error after binning
180

181
    Returns:
182
        tuple[np.ndarray, list[np.ndarray]]: 2-element tuple of The computed histogram
183
        and s list of D arrays describing the bin edges for each dimension.
184

185
        - **hist**: The computed histogram
186
        - **edges**: A list of D arrays describing the bin edges for
187
          each dimension.
188
    """
189
    try:
1✔
190
        # Sample is an ND-array.
191
        num_rows, num_cols = sample.shape  # pylint: disable=unused-variable
1✔
192
    except (AttributeError, ValueError):
×
193
        # Sample is a sequence of 1D arrays.
194
        sample = np.atleast_2d(sample).T
×
195
        num_rows, num_cols = sample.shape  # pylint: disable=unused-variable
×
196

197
    if isinstance(bins, (int, np.int_)):  # bins provided as a single number
1✔
198
        bins = num_cols * [bins]
1✔
199
    num_bins = len(bins)  # Number of dimensions in bins
1✔
200

201
    if num_bins != num_cols:  # check number of dimensions
1✔
202
        raise ValueError(
×
203
            "The dimension of bins must be equal to the dimension of the sample x.",
204
        )
205

206
    if not isinstance(bins[0], (int, np.int_, np.ndarray)):
1✔
207
        raise TypeError(
×
208
            f"bins must be int, np.ndarray or a sequence of the two. "
209
            f"Found {type(bins[0])} instead",
210
        )
211

212
    # method == "array"
213
    if isinstance(bins[0], np.ndarray):
1✔
214
        bins = cast(list[np.ndarray], list(bins))
1✔
215
        hist = _hist_from_bins(
1✔
216
            sample,
217
            tuple(bins),
218
            tuple(b.size - 1 for b in bins),
219
        )
220
        return hist, bins
1✔
221

222
    # method == "int"
223
    assert isinstance(bins[0], (int, np.int_))
1✔
224
    # normalize the range argument
225
    if ranges is None:
1✔
226
        raise ValueError(
×
227
            "must define a value for ranges when bins is the number of bins",
228
        )
229
    if num_cols == 1 and isinstance(ranges[0], (int, float)):
1✔
230
        ranges = (ranges,)
×
231
    elif len(ranges) != num_cols:
1✔
232
        raise ValueError(
×
233
            "range argument must have one entry per dimension",
234
        )
235

236
    # ranges = np.asarray(ranges)
237
    bins = tuple(bins)
1✔
238

239
    # Create edge arrays
240
    edges: list[Any] = []
1✔
241
    nbin = np.empty(num_cols, int)
1✔
242

243
    for i in range(num_cols):
1✔
244
        edges.append(np.linspace(ranges[i][0], ranges[i][1], bins[i] + 1))
1✔
245

246
        nbin[i] = len(edges[i]) + 1  # includes an outlier on each end
1✔
247

248
    hist = _hist_from_bin_range(sample, bins, np.asarray(ranges))
1✔
249

250
    if (hist.shape != nbin - 2).any():
1✔
251
        raise RuntimeError("Internal Shape Error")
×
252

253
    return hist, edges
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc