• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Project-OSmOSE / OSEkit / 22315062293

23 Feb 2026 04:27PM UTC coverage: 98.778% (-0.06%) from 98.838%
22315062293

Pull #341

github

web-flow
Merge 082b02312 into e272be425
Pull Request #341: Soxr resample stream

103 of 105 new or added lines in 7 files covered. (98.1%)

2 existing lines in 1 file now uncovered.

4930 of 4991 relevant lines covered (98.78%)

0.99 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.23
/src/osekit/utils/audio_utils.py
1
from __future__ import annotations
1✔
2

3
import enum
1✔
4
from typing import Literal, Self
1✔
5

6
import numpy as np
1✔
7
import soxr
1✔
8
from pandas import Timedelta
1✔
9

10
from osekit.config import (
1✔
11
    resample_quality_settings,
12
)
13

14

15
def generate_sample_audio(
1✔
16
    nb_files: int,
17
    nb_samples: int,
18
    series_type: Literal["repeat", "increase", "sine", "noise"] = "repeat",
19
    sine_frequency: float = 1000.0,
20
    min_value: float = 0.0,
21
    max_value: float = 1.0,
22
    duration: float = 1.0,
23
    dtype: np.dtype = np.float64,
24
) -> list[np.ndarray]:
25
    """Generate sample audio data.
26

27
    Parameters
28
    ----------
29
    nb_files: int
30
        Number of audio data to generate.
31
    nb_samples: int
32
        Number of samples per audio data.
33
    series_type: Literal["repeat", "increase", "sine"] (Optional)
34
        ``"repeat"``: audio data contain the same linear values from ``min`` to ``max``.
35
        ``"increase"``: audio data contain increasing values from ``min`` to ``max``.
36
        ``"sine"``: audio data contain sine waves with a peak value of ``max_value``.
37
        ``"noise"``: audio data contains white gaussian noise (``mean=0.``, ``std=1.``)
38
        Defaults to ``"repeat"``.
39
    sine_frequency: float (Optional)
40
        Frequency of the sine waves.
41
        Has no effect if ``series_type != "sine"``.
42
    min_value: float
43
        Minimum value of the audio data.
44
    max_value: float
45
        Maximum value of the audio data.
46
    duration: float
47
        Duration of the audio data in seconds.
48
        Used to compute the frequency of sine waves.
49
    dtype: np.dtype
50
        The type of the output array.
51

52
    Returns
53
    -------
54
    list[numpy.ndarray]:
55
        The generated audio data.
56

57
    """
58
    if duration is None:
1✔
59
        duration = Timedelta(seconds=1)
×
60
    if series_type == "repeat":
1✔
61
        return np.split(
1✔
62
            np.tile(
63
                np.linspace(min_value, max_value, nb_samples, dtype=dtype),
64
                nb_files,
65
            ),
66
            nb_files,
67
        )
68
    if series_type == "increase":
1✔
69
        return np.split(
1✔
70
            np.linspace(min_value, max_value, nb_samples * nb_files, dtype=dtype),
71
            nb_files,
72
        )
73
    if series_type == "sine":
1✔
74
        t = np.linspace(0, duration, nb_samples)
1✔
75
        return np.split(
1✔
76
            np.tile(
77
                np.sin(2 * np.pi * sine_frequency * t, dtype=dtype) * max_value,
78
                nb_files,
79
            ),
80
            nb_files,
81
        )
82
    if series_type == "noise":
1✔
83
        generator = np.random.default_rng(seed=1)
1✔
84
        sig = generator.normal(0.0, 1.0, size=nb_samples)
1✔
85
        return np.split(
1✔
86
            sig,
87
            nb_files,
88
        )
89
    return np.split(np.empty(nb_samples * nb_files, dtype=dtype), nb_files)
×
90

91

92
def resample(data: np.ndarray, origin_sr: float, target_sr: float) -> np.ndarray:
1✔
93
    """Resample the audio data using ``soxr``.
94

95
    Parameters
96
    ----------
97
    data: np.ndarray
98
        The audio data to resample.
99
    origin_sr:
100
        The sampling rate of the audio data.
101
    target_sr:
102
        The sampling rate of the resampled audio data.
103

104
    Returns
105
    -------
106
    np.ndarray
107
        The resampled audio data.
108

109
    """
UNCOV
110
    quality = (
×
111
        resample_quality_settings["upsample"]
112
        if target_sr > origin_sr
113
        else resample_quality_settings["downsample"]
114
    )
UNCOV
115
    return soxr.resample(data, origin_sr, target_sr, quality=quality)
×
116

117

118
def normalize_raw(values: np.ndarray) -> np.ndarray:
1✔
119
    """No normalization of the audio data."""
120
    return values
×
121

122

123
def normalize_dc_reject(
1✔
124
    values: np.ndarray,
125
    dc_component: float | None = None,
126
) -> np.ndarray:
127
    """Reject the DC component of the audio data."""
128
    return values - (values.mean() if dc_component is None else dc_component)
1✔
129

130

131
def normalize_peak(values: np.ndarray, peak: float | None = None) -> np.ndarray:
1✔
132
    """Return values normalized so that the peak value is ``1.0``."""
133
    divisor = max(abs(values)) if peak is None else peak
1✔
134
    return values / (divisor if divisor else 1)
1✔
135

136

137
def normalize_zscore(
1✔
138
    values: np.ndarray,
139
    mean: float | None = None,
140
    std: float | None = None,
141
) -> np.ndarray:
142
    """Return normalized zscore from the audio data."""
143
    mean = values.mean() if mean is None else mean
1✔
144
    std = values.std() if std is None else std
1✔
145
    return (values - mean) / (std if std else 1)
1✔
146

147

148
class NormalizationValider(enum.EnumMeta):
1✔
149
    """Metaclass used for validating the normalization flag.
150

151
    This is used because only ``REJECT_DC`` can be combined with (exactly)
152
    one other normalization.
153

154
    """
155

156
    def __call__(cls, *args, **kwargs) -> Self:  # noqa: ANN002, ANN003
1✔
157
        """Overwrite the call dunder."""
158
        instance = super().__call__(*args, **kwargs)
1✔
159

160
        mask = instance.value & ~Normalization.DC_REJECT.value
1✔
161
        if mask & (mask - 1):
1✔
162
            message = (
1✔
163
                "Combined normalizations can only be DC_REJECT combined "
164
                "with exactly one other normalization type."
165
            )
166
            raise ValueError(message)
1✔
167

168
        return instance
1✔
169

170

171
class Normalization(enum.Flag, metaclass=NormalizationValider):
1✔
172
    """Normalization to apply to the audio data.
173

174
    ``RAW``: No normalization is done.
175

176
    ``DC_REJECT``: Reject the DC component of the audio data.
177

178
    ``PEAK``: Divide the data by the absolute peak so that the peak value is ``1.0``.
179

180
    ``ZSCORE``: Normalize the data to a z-score with a mean of ``0.0`` and a
181
    std of ``1.0``.
182

183
    """
184

185
    RAW = enum.auto()
1✔
186
    DC_REJECT = enum.auto()
1✔
187
    PEAK = enum.auto()
1✔
188
    ZSCORE = enum.auto()
1✔
189

190

191
def normalize(
1✔
192
    values: np.ndarray,
193
    normalization: Normalization,
194
    mean: float | None = None,
195
    peak: float | None = None,
196
    std: float | None = None,
197
) -> np.ndarray:
198
    """Normalize the audio data."""
199
    if Normalization.DC_REJECT in normalization:
1✔
200
        values = normalize_dc_reject(values=values, dc_component=mean)
1✔
201
    if Normalization.PEAK in normalization:
1✔
202
        values = normalize_peak(values=values, peak=peak)
1✔
203
    if Normalization.ZSCORE in normalization:
1✔
204
        values = normalize_zscore(values=values, mean=mean, std=std)
1✔
205
    return values
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc