• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OpenCOMPES / sed / 6520232780

14 Oct 2023 10:12PM UTC coverage: 90.267% (-0.3%) from 90.603%
6520232780

Pull #181

github

rettigl
define jitter_amps as single amplitude in default config
Pull Request #181: define jitter_amps as single amplitude in default config

4229 of 4685 relevant lines covered (90.27%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.74
/sed/binning/numba_bin.py
1
"""This file contains code for binning using numba precompiled code for the
2
sed.binning module
3

4
"""
5
from typing import Any
1✔
6
from typing import cast
1✔
7
from typing import List
1✔
8
from typing import Sequence
1✔
9
from typing import Tuple
1✔
10
from typing import Union
1✔
11

12
import numba
1✔
13
import numpy as np
1✔
14

15

16
@numba.jit(nogil=True, nopython=True)
1✔
17
def _hist_from_bin_range(
1✔
18
    sample: np.ndarray,
19
    bins: Sequence[int],
20
    ranges: np.ndarray,
21
) -> np.ndarray:
22
    """N dimensional binning function, pre-compiled by Numba for performance.
23
    Behaves much like numpy.histogramdd, but calculates and returns unsigned 32
24
    bit integers.
25

26
    Args:
27
        sample (np.ndarray): The data to be histogrammed with shape N,D.
28
        bins (Sequence[int]): The number of bins for each dimension D.
29
        ranges (np.ndarray): A sequence of length D, each an optional (lower,
30
            upper) tuple giving the outer bin edges to be used if the edges are
31
            not given explicitly in bins.
32

33
    Raises:
34
        ValueError: In case of dimension mismatch.
35

36
    Returns:
37
        np.ndarray: The computed histogram.
38
    """
39
    ndims = len(bins)
1✔
40
    if sample.shape[1] != ndims:
1✔
41
        raise ValueError(
1✔
42
            "The dimension of bins is not equal to the dimension of the sample x",
43
        )
44

45
    hist = np.zeros(bins, np.uint32)
1✔
46
    hist_flat = hist.ravel()
1✔
47
    delta = np.zeros(ndims, np.float64)
1✔
48
    strides = np.zeros(ndims, np.int64)
1✔
49

50
    for i in range(ndims):
1✔
51
        delta[i] = 1 / ((ranges[i, 1] - ranges[i, 0]) / bins[i])
1✔
52
        strides[i] = hist.strides[i] // hist.itemsize  # pylint: disable=E1136
1✔
53

54
    for t in range(sample.shape[0]):
1✔
55
        is_inside = True
1✔
56
        flatidx = 0
1✔
57
        for i in range(ndims):
1✔
58
            # strip off numerical rounding errors
59
            j = round((sample[t, i] - ranges[i, 0]) * delta[i], 11)
1✔
60
            # add counts on last edge
61
            if j == bins[i]:
1✔
62
                j = bins[i] - 1
×
63
            is_inside = is_inside and (0 <= j < bins[i])
1✔
64
            flatidx += int(j) * strides[i]
1✔
65
            # don't check all axes if you already know you're out of the range
66
            if not is_inside:
1✔
67
                break
1✔
68
        if is_inside:
1✔
69
            hist_flat[flatidx] += int(is_inside)
1✔
70

71
    return hist
1✔
72

73

74
@numba.jit(nogil=True, parallel=False, nopython=True)
1✔
75
def binsearch(bins: np.ndarray, val: float) -> int:
1✔
76
    """Bisection index search function.
77

78
    Finds the index of the bin with the highest value below val, i.e. the left edge.
79
    returns -1 when the value is outside the bin range.
80

81
    Args:
82
        bins (np.ndarray): the array on which
83
        val (float): value to search for
84

85
    Returns:
86
        int: index of the bin array, returns -1 when value is outside the bins range
87
    """
88
    if np.isnan(val):
1✔
89
        return -1
1✔
90
    low, high = 0, len(bins) - 1
1✔
91
    mid = high // 2
1✔
92
    if val == bins[high]:
1✔
93
        return high - 1
1✔
94
    if (val < bins[low]) | (val > bins[high]):
1✔
95
        return -1
1✔
96

97
    while True:
98
        if val < bins[mid]:
1✔
99
            high = mid
×
100
        elif val < bins[mid + 1]:
1✔
101
            return mid
1✔
102
        else:
103
            low = mid
×
104
        mid = (low + high) // 2
×
105

106

107
@numba.jit(nopython=True, nogil=True, parallel=False)
1✔
108
def _hist_from_bins(
1✔
109
    sample: np.ndarray,
110
    bins: Sequence[np.ndarray],
111
    shape: Tuple,
112
) -> np.ndarray:
113
    """Numba powered binning method, similar to np.histogramdd.
114

115
    Computes the histogram on pre-defined bins.
116

117
    Args:
118
        sample (np.ndarray) : the array of shape (N,D) on which to compute the histogram
119
        bins (Sequence[np.ndarray]): array of shape (N,D) defining the D bins on which
120
            to compute the histogram, i.e. the desired output axes.
121
        shape (Tuple): shape of the resulting array. Workaround for the fact numba
122
            does not allow to create tuples.
123
    Returns:
124
        hist: the computed n-dimensional histogram
125
    """
126
    ndims = len(bins)
1✔
127
    if sample.shape[1] != ndims:
1✔
128
        raise ValueError(
1✔
129
            "The dimension of bins is not equal to the dimension of the sample x",
130
        )
131
    hist = np.zeros(shape, np.uint32)
1✔
132
    hist_flat = hist.ravel()
1✔
133

134
    strides = np.zeros(ndims, np.int64)
1✔
135

136
    for i in range(ndims):
1✔
137
        strides[i] = hist.strides[i] // hist.itemsize  # pylint: disable=E1136
1✔
138
    for t in range(sample.shape[0]):
1✔
139
        is_inside = True
1✔
140
        flatidx = 0
1✔
141
        for i in range(ndims):
1✔
142
            j = binsearch(bins[i], sample[t, i])
1✔
143
            # binsearch returns -1 when the value is outside the bin range
144
            is_inside = is_inside and (j >= 0)
1✔
145
            flatidx += int(j) * strides[i]
1✔
146
            # don't check all axes if you already know you're out of the range
147
            if not is_inside:
1✔
148
                break
1✔
149
        if is_inside:
1✔
150
            hist_flat[flatidx] += int(is_inside)
1✔
151

152
    return hist
1✔
153

154

155
def numba_histogramdd(
1✔
156
    sample: np.ndarray,
157
    bins: Union[int, Sequence[int], Sequence[np.ndarray], np.ndarray],
158
    ranges: Sequence = None,
159
) -> Tuple[np.ndarray, List[np.ndarray]]:
160
    """Multidimensional histogramming function, powered by Numba.
161

162
    Behaves in total much like numpy.histogramdd. Returns uint32 arrays.
163
    This was chosen because it has a significant performance improvement over
164
    uint64 for large binning volumes. Be aware that this can cause overflows
165
    for very large sample sets exceeding 3E9 counts in a single bin. This
166
    should never happen in a realistic photoemission experiment with useful bin
167
    sizes.
168

169
    Args:
170
        sample (np.ndarray): The data to be histogrammed with shape N,D
171
        bins (Union[int, Sequence[int], Sequence[np.ndarray], np.ndarray]): The number
172
            of bins for each dimension D, or a sequence of bin edges on which to calculate
173
            the histogram.
174
        ranges (Sequence, optional): The range(s) to use for binning when bins is a sequence
175
            of integers or sequence of arrays. Defaults to None.
176

177
    Raises:
178
        ValueError: In case of dimension mismatch.
179
        TypeError: Wrong type for bins.
180
        ValueError: In case of wrong shape of bins
181
        RuntimeError: Internal shape error after binning
182

183
    Returns:
184
        Tuple[np.ndarray, List[np.ndarray]]: 2-element tuple of The computed histogram
185
        and s list of D arrays describing the bin edges for each dimension.
186

187
        - **hist**: The computed histogram
188
        - **edges**: A list of D arrays describing the bin edges for
189
          each dimension.
190
    """
191
    try:
1✔
192
        # Sample is an ND-array.
193
        num_rows, num_cols = sample.shape  # pylint: disable=unused-variable
1✔
194
    except (AttributeError, ValueError):
×
195
        # Sample is a sequence of 1D arrays.
196
        sample = np.atleast_2d(sample).T
×
197
        num_rows, num_cols = sample.shape  # pylint: disable=unused-variable
×
198

199
    if isinstance(bins, (int, np.int_)):  # bins provided as a single number
1✔
200
        bins = num_cols * [bins]
1✔
201
    num_bins = len(bins)  # Number of dimensions in bins
1✔
202

203
    if num_bins != num_cols:  # check number of dimensions
1✔
204
        raise ValueError(
×
205
            "The dimension of bins must be equal to the dimension of the sample x.",
206
        )
207

208
    if not isinstance(bins[0], (int, np.int_, np.ndarray)):
1✔
209
        raise TypeError(
×
210
            f"bins must be int, np.ndarray or a sequence of the two. "
211
            f"Found {type(bins[0])} instead",
212
        )
213

214
    # method == "array"
215
    if isinstance(bins[0], np.ndarray):
1✔
216
        bins = cast(List[np.ndarray], list(bins))
1✔
217
        hist = _hist_from_bins(
1✔
218
            sample,
219
            tuple(bins),
220
            tuple(b.size - 1 for b in bins),
221
        )
222
        return hist, bins
1✔
223

224
    # method == "int"
225
    assert isinstance(bins[0], (int, np.int_))
1✔
226
    # normalize the range argument
227
    if ranges is None:
1✔
228
        raise ValueError(
×
229
            "must define a value for ranges when bins is the number of bins",
230
        )
231
    if num_cols == 1 and isinstance(ranges[0], (int, float)):
1✔
232
        ranges = (ranges,)
×
233
    elif len(ranges) != num_cols:
1✔
234
        raise ValueError(
×
235
            "range argument must have one entry per dimension",
236
        )
237

238
    # ranges = np.asarray(ranges)
239
    bins = tuple(bins)
1✔
240

241
    # Create edge arrays
242
    edges: List[Any] = []
1✔
243
    nbin = np.empty(num_cols, int)
1✔
244

245
    for i in range(num_cols):
1✔
246
        edges.append(np.linspace(ranges[i][0], ranges[i][1], bins[i] + 1))
1✔
247

248
        nbin[i] = len(edges[i]) + 1  # includes an outlier on each end
1✔
249

250
    hist = _hist_from_bin_range(sample, bins, np.asarray(ranges))
1✔
251

252
    if (hist.shape != nbin - 2).any():
1✔
253
        raise RuntimeError("Internal Shape Error")
×
254

255
    return hist, edges
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc