• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Project-OSmOSE / OSEkit / 17982648752

24 Sep 2025 04:08PM UTC coverage: 92.374% (+0.08%) from 92.295%
17982648752

Pull #283

github

web-flow
Merge e6ebd4fae into 99d9299db
Pull Request #283: MSEED files support

39 of 42 new or added lines in 2 files covered. (92.86%)

2 existing lines in 1 file now uncovered.

3404 of 3685 relevant lines covered (92.37%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.15
/src/osekit/core_api/audio_data.py
1
"""AudioData represent audio data scattered through different AudioFiles.
2

3
The AudioData has a collection of AudioItem.
4
The data is accessed via an AudioItem object per AudioFile.
5
"""
6

7
from __future__ import annotations
1✔
8

9
from math import ceil
1✔
10
from typing import TYPE_CHECKING
1✔
11

12
import numpy as np
1✔
13
import soundfile as sf
1✔
14
from pandas import Timedelta, Timestamp
1✔
15

16
from osekit.config import (
1✔
17
    TIMESTAMP_FORMATS_EXPORTED_FILES,
18
)
19
from osekit.core_api.audio_file import AudioFile
1✔
20
from osekit.core_api.audio_item import AudioItem
1✔
21
from osekit.core_api.base_data import BaseData
1✔
22
from osekit.core_api.instrument import Instrument
1✔
23
from osekit.utils.audio_utils import resample, Normalization, normalize
1✔
24

25
if TYPE_CHECKING:
26
    from pathlib import Path
27

28

29
class AudioData(BaseData[AudioItem, AudioFile]):
1✔
30
    """AudioData represent audio data scattered through different AudioFiles.
31

32
    The AudioData has a collection of AudioItem.
33
    The data is accessed via an AudioItem object per AudioFile.
34
    """
35

36
    def __init__(
1✔
37
        self,
38
        items: list[AudioItem] | None = None,
39
        begin: Timestamp | None = None,
40
        end: Timestamp | None = None,
41
        sample_rate: int | None = None,
42
        instrument: Instrument | None = None,
43
        normalization: Normalization = Normalization.RAW,
44
        normalization_values: dict | None = None,
45
    ) -> None:
46
        """Initialize an AudioData from a list of AudioItems.
47

48
        Parameters
49
        ----------
50
        items: list[AudioItem]
51
            List of the AudioItem constituting the AudioData.
52
        sample_rate: int
53
            The sample rate of the audio data.
54
        begin: Timestamp | None
55
            Only effective if items is None.
56
            Set the begin of the empty data.
57
        end: Timestamp | None
58
            Only effective if items is None.
59
            Set the end of the empty data.
60
        instrument: Instrument | None
61
            Instrument that might be used to obtain acoustic pressure from
62
            the wav audio data.
63
        normalization: Normalization
64
            The type of normalization to apply to the audio data.
65

66
        """
67
        super().__init__(items=items, begin=begin, end=end)
1✔
68
        self._set_sample_rate(sample_rate=sample_rate)
1✔
69
        self.instrument = instrument
1✔
70
        self.normalization = normalization
1✔
71
        self.normalization_values = normalization_values
1✔
72

73
    @property
1✔
74
    def nb_channels(self) -> int:
1✔
75
        """Number of channels of the audio data."""
76
        return max(
1✔
77
            [1] + [item.nb_channels for item in self.items if type(item) is AudioItem],
78
        )
79

80
    @property
1✔
81
    def shape(self) -> tuple[int, ...] | int:
1✔
82
        """Shape of the audio data."""
83
        data_length = round(self.sample_rate * self.duration.total_seconds())
1✔
84
        return data_length if self.nb_channels <= 1 else (data_length, self.nb_channels)
1✔
85

86
    @property
1✔
87
    def normalization(self) -> Normalization:
1✔
88
        """The type of normalization to apply to the audio data."""
89
        return self._normalization
1✔
90

91
    @normalization.setter
1✔
92
    def normalization(self, value: Normalization) -> None:
1✔
93
        self._normalization = value
1✔
94

95
    @property
1✔
96
    def normalization_values(self) -> dict:
1✔
97
        """Mean, peak and std values used for normalization."""
98
        return self._normalization_values
1✔
99

100
    @normalization_values.setter
1✔
101
    def normalization_values(self, value: dict | None) -> None:
1✔
102
        self._normalization_values = (
1✔
103
            value
104
            if value
105
            else {
106
                "mean": None,
107
                "peak": None,
108
                "std": None,
109
            }
110
        )
111

112
    def get_normalization_values(self) -> dict:
1✔
113
        values = self.get_raw_value()
1✔
114
        return {
1✔
115
            "mean": values.mean(),
116
            "peak": values.max(),
117
            "std": values.std(),
118
        }
119

120
    def __eq__(self, other: AudioData) -> bool:
1✔
121
        """Override __eq__."""
122
        return self.sample_rate == other.sample_rate and super().__eq__(other)
1✔
123

124
    def _set_sample_rate(self, sample_rate: int | None = None) -> None:
1✔
125
        """Set the AudioFile sample rate.
126

127
        If the sample_rate is specified, it is set.
128
        If it is not specified, it is set to the sampling rate of the
129
        first item that has one.
130
        Else, it is set to None.
131
        """
132
        if sample_rate is not None:
1✔
133
            self.sample_rate = sample_rate
1✔
134
            return
1✔
135
        if sr := next(
1✔
136
            (item.sample_rate for item in self.items if item.sample_rate is not None),
137
            None,
138
        ):
139
            self.sample_rate = sr
1✔
140
            return
1✔
141
        self.sample_rate = None
1✔
142

143
    def get_raw_value(self) -> np.ndarray:
1✔
144
        """Return the raw value of the audio data before normalization.
145

146
        The data from the audio file will be resampled if necessary.
147

148
        Returns
149
        -------
150
        np.ndarray:
151
            The value of the audio data.
152

153
        """
154
        data = np.empty(shape=self.shape)
1✔
155
        idx = 0
1✔
156
        for item in self.items:
1✔
157
            item_data = self._get_item_value(item)
1✔
158
            item_data = item_data[: min(item_data.shape[0], data.shape[0] - idx)]
1✔
159
            data[idx : idx + len(item_data)] = item_data
1✔
160
            idx += len(item_data)
1✔
161
        return data
1✔
162

163
    def get_value(self) -> np.ndarray:
1✔
164
        """Return the value of the audio data.
165

166
        The data from the audio file will be resampled if necessary.
167

168
        Returns
169
        -------
170
        np.ndarray:
171
            The value of the audio data.
172

173
        """
174
        return normalize(
1✔
175
            values=self.get_raw_value(),
176
            normalization=self.normalization,
177
            **self.normalization_values,
178
        )
179

180
    def get_value_calibrated(self) -> np.ndarray:
1✔
181
        """Return the value of the audio data accounting for the calibration factor.
182

183
        If the instrument parameter of the audio data is not None, the returned value is
184
        calibrated in units of Pa.
185

186
        Returns
187
        -------
188
        np.ndarray:
189
            The calibrated value of the audio data.
190

191
        """
192
        raw_data = self.get_value()
1✔
193
        calibration_factor = (
1✔
194
            1.0 if self.instrument is None else self.instrument.end_to_end
195
        )
196
        return raw_data * calibration_factor
1✔
197

198
    def write(
1✔
199
        self,
200
        folder: Path,
201
        subtype: str | None = None,
202
        link: bool = False,
203
    ) -> None:
204
        """Write the audio data to file.
205

206
        Parameters
207
        ----------
208
        folder: pathlib.Path
209
            Folder in which to write the audio file.
210
        subtype: str | None
211
            Subtype as provided by the soundfile module.
212
            Defaulted as the default 16-bit PCM for WAV audio files.
213
        link: bool
214
            If True, the AudioData will be bound to the written file.
215
            Its items will be replaced with a single item, which will match the whole
216
            new AudioFile.
217

218
        """
219
        super().create_directories(path=folder)
1✔
220
        sf.write(
1✔
221
            folder / f"{self}.wav",
222
            self.get_value(),
223
            self.sample_rate,
224
            subtype=subtype,
225
        )
226
        if link:
1✔
227
            self.link(folder=folder)
1✔
228

229
    def link(self, folder: Path) -> None:
1✔
230
        """Link the AudioData to an AudioFile in the folder.
231

232
        The given folder should contain a file named "str(self).wav".
233
        Linking is intended for AudioData objects that have already been written.
234
        After linking, the AudioData will have a single item with the same
235
        properties of the target AudioFile.
236

237
        Parameters
238
        ----------
239
        folder: Path
240
            Folder in which is located the AudioFile to which the AudioData instance
241
            should be linked.
242

243
        """
244
        file = AudioFile(
1✔
245
            path=folder / f"{self}.wav",
246
            strptime_format=TIMESTAMP_FORMATS_EXPORTED_FILES,
247
        )
248
        self.items = AudioData.from_files([file]).items
1✔
249

250
    def _get_item_value(self, item: AudioItem) -> np.ndarray:
1✔
251
        """Return the resampled (if needed) data from the audio item."""
252
        item_data = item.get_value()
1✔
253
        if item.is_empty:
1✔
254
            return item_data.repeat(
1✔
255
                round(item.duration.total_seconds() * self.sample_rate),
256
            )
257
        if item.sample_rate != self.sample_rate:
1✔
258
            return resample(item_data, item.sample_rate, self.sample_rate)
1✔
259
        return item_data
1✔
260

261
    def split(self, nb_subdata: int = 2) -> list[AudioData]:
1✔
262
        """Split the audio data object in the specified number of audio subdata.
263

264
        Parameters
265
        ----------
266
        nb_subdata: int
267
            Number of subdata in which to split the data.
268

269
        Returns
270
        -------
271
        list[AudioData]
272
            The list of AudioData subdata objects.
273

274
        """
275
        normalization_values = (
1✔
276
            self.normalization_values
277
            if any(self.normalization_values.values())
278
            else self.get_normalization_values()
279
        )
280
        return [
1✔
281
            AudioData.from_base_data(
282
                data=base_data,
283
                sample_rate=self.sample_rate,
284
                instrument=self.instrument,
285
                normalization=self.normalization,
286
                normalization_values=normalization_values,
287
            )
288
            for base_data in super().split(nb_subdata)
289
        ]
290

291
    def split_frames(self, start_frame: int = 0, stop_frame: int = -1) -> AudioData:
1✔
292
        """Return a new AudioData from a subpart of this AudioData's data.
293

294
        Parameters
295
        ----------
296
        start_frame: int
297
            First frame included in the new AudioData.
298
        stop_frame: int
299
            First frame after the last frame included in the new AudioData.
300

301
        Returns
302
        -------
303
        AudioData
304
            A new AudioData which data is included between start_frame and stop_frame.
305

306
        """
307
        if start_frame < 0:
1✔
UNCOV
308
            raise ValueError("Start_frame must be greater than or equal to 0.")
×
309
        if stop_frame < -1 or stop_frame > self.shape:
1✔
UNCOV
310
            raise ValueError("Stop_frame must be lower than the length of the data.")
×
311

312
        start_timestamp = self.begin + Timedelta(
1✔
313
            seconds=ceil(start_frame / self.sample_rate * 1e9) / 1e9,
314
        )
315
        stop_timestamp = (
1✔
316
            self.end
317
            if stop_frame == -1
318
            else self.begin + Timedelta(seconds=stop_frame / self.sample_rate)
319
        )
320
        normalization_values = (
1✔
321
            self.normalization_values
322
            if any(self.normalization_values.values())
323
            else self.get_normalization_values()
324
        )
325
        return AudioData.from_files(
1✔
326
            list(self.files),
327
            start_timestamp,
328
            stop_timestamp,
329
            sample_rate=self.sample_rate,
330
            instrument=self.instrument,
331
            normalization=self.normalization,
332
            normalization_values=normalization_values,
333
        )
334

335
    def to_dict(self) -> dict:
1✔
336
        """Serialize an AudioData to a dictionary.
337

338
        Returns
339
        -------
340
        dict:
341
            The serialized dictionary representing the AudioData.
342

343
        """
344
        base_dict = super().to_dict()
1✔
345
        instrument_dict = {
1✔
346
            "instrument": (
347
                None if self.instrument is None else self.instrument.to_dict()
348
            ),
349
        }
350
        return (
1✔
351
            base_dict
352
            | instrument_dict
353
            | {
354
                "sample_rate": self.sample_rate,
355
                "normalization": self.normalization.value,
356
                "normalization_values": self.normalization_values,
357
            }
358
        )
359

360
    @classmethod
1✔
361
    def from_dict(cls, dictionary: dict) -> AudioData:
1✔
362
        """Deserialize an AudioData from a dictionary.
363

364
        Parameters
365
        ----------
366
        dictionary: dict
367
            The serialized dictionary representing the AudioData.
368

369
        Returns
370
        -------
371
        AudioData
372
            The deserialized AudioData.
373

374
        """
375
        base_data = BaseData.from_dict(dictionary)
1✔
376
        instrument = (
1✔
377
            None
378
            if dictionary["instrument"] is None
379
            else Instrument.from_dict(dictionary["instrument"])
380
        )
381
        return cls.from_base_data(
1✔
382
            data=base_data,
383
            sample_rate=dictionary["sample_rate"],
384
            normalization=Normalization(dictionary["normalization"]),
385
            normalization_values=dictionary["normalization_values"],
386
            instrument=instrument,
387
        )
388

389
    @classmethod
1✔
390
    def from_files(
1✔
391
        cls,
392
        files: list[AudioFile],
393
        begin: Timestamp | None = None,
394
        end: Timestamp | None = None,
395
        sample_rate: float | None = None,
396
        instrument: Instrument | None = None,
397
        normalization: Normalization = Normalization.RAW,
398
        normalization_values: dict | None = None,
399
    ) -> AudioData:
400
        """Return an AudioData object from a list of AudioFiles.
401

402
        Parameters
403
        ----------
404
        files: list[AudioFile]
405
            List of AudioFiles containing the data.
406
        begin: Timestamp | None
407
            Begin of the data object.
408
            Defaulted to the begin of the first file.
409
        end: Timestamp | None
410
            End of the data object.
411
            Defaulted to the end of the last file.
412
        sample_rate: float | None
413
            Sample rate of the AudioData.
414
        instrument: Instrument | None
415
            Instrument that might be used to obtain acoustic pressure from
416
            the wav audio data.
417
        normalization: Normalization
418
            The type of normalization to apply to the audio data.
419
        normalization_values: dict|None
420
            Mean, peak and std values with which to normalize the data.
421

422
        Returns
423
        -------
424
        AudioData:
425
            The AudioData object.
426

427
        """
428
        return cls.from_base_data(
1✔
429
            data=BaseData.from_files(files, begin, end),
430
            sample_rate=sample_rate,
431
            instrument=instrument,
432
            normalization=normalization,
433
            normalization_values=normalization_values,
434
        )
435

436
    @classmethod
1✔
437
    def from_base_data(
1✔
438
        cls,
439
        data: BaseData,
440
        sample_rate: float | None = None,
441
        instrument: Instrument | None = None,
442
        normalization: Normalization = Normalization.RAW,
443
        normalization_values: dict | None = None,
444
    ) -> AudioData:
445
        """Return an AudioData object from a BaseData object.
446

447
        Parameters
448
        ----------
449
        data: BaseData
450
            BaseData object to convert to AudioData.
451
        sample_rate: float | None
452
            Sample rate of the AudioData.
453
        instrument: Instrument | None
454
            Instrument that might be used to obtain acoustic pressure from
455
            the wav audio data.
456
        normalization: Literal["raw","dc_reject","zscore"]
457
            The type of normalization to apply to the audio data.
458
        normalization_values: dict|None
459
            Mean, peak and std values with which to normalize the data.
460

461
        Returns
462
        -------
463
        AudioData:
464
            The AudioData object.
465

466
        """
467
        return cls(
1✔
468
            items=[AudioItem.from_base_item(item) for item in data.items],
469
            sample_rate=sample_rate,
470
            instrument=instrument,
471
            normalization=normalization,
472
            normalization_values=normalization_values,
473
        )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc