• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Project-OSmOSE / OSEkit / 23139359410

16 Mar 2026 10:33AM UTC coverage: 98.79% (+0.002%) from 98.788%
23139359410

Pull #350

github

web-flow
Merge 715866291 into 184a36d18
Pull Request #350: [DRAFT] Read dataset metadata

10 of 12 new or added lines in 3 files covered. (83.33%)

2 existing lines in 1 file now uncovered.

4979 of 5040 relevant lines covered (98.79%)

0.99 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.7
/src/osekit/core_api/audio_data.py
1
"""AudioData represent audio data scattered through different AudioFiles.
2

3
The AudioData has a collection of AudioItem.
4
The data is accessed via an AudioItem object per AudioFile.
5
"""
6

7
from __future__ import annotations
1✔
8

9
from collections.abc import Generator
1✔
10
from math import ceil
1✔
11
from typing import TYPE_CHECKING, Self
1✔
12

13
import numpy as np
1✔
14
import soundfile as sf
1✔
15
import soxr
1✔
16
from pandas import Timedelta, Timestamp
1✔
17

18
from osekit.config import resample_quality_settings
1✔
19
from osekit.core_api.audio_file import AudioFile
1✔
20
from osekit.core_api.audio_item import AudioItem
1✔
21
from osekit.core_api.base_data import BaseData
1✔
22
from osekit.core_api.instrument import Instrument
1✔
23
from osekit.utils.audio_utils import Normalization, normalize
1✔
24

25
if TYPE_CHECKING:
26
    from pathlib import Path
27

28

29
class AudioData(BaseData[AudioItem, AudioFile]):
1✔
30
    """``AudioData`` represent audio data scattered through different ``AudioFiles``.
31

32
    The ``AudioData`` has a collection of ``AudioItem``.
33
    The data is accessed via an ``AudioItem`` object per ``AudioFile``.
34
    """
35

36
    item_cls = AudioItem
1✔
37

38
    def __init__(
1✔
39
        self,
40
        items: list[AudioItem] | None = None,
41
        begin: Timestamp | None = None,
42
        end: Timestamp | None = None,
43
        name: str | None = None,
44
        sample_rate: int | None = None,
45
        instrument: Instrument | None = None,
46
        normalization: Normalization = Normalization.RAW,
47
        normalization_values: dict | None = None,
48
    ) -> None:
49
        """Initialize an ``AudioData`` from a list of ``AudioItems``.
50

51
        Parameters
52
        ----------
53
        items: list[AudioItem]
54
            List of the ``AudioItem`` constituting the ``AudioData``.
55
        sample_rate: int
56
            The sample rate of the audio data.
57
        begin: Timestamp | None
58
            Only effective if items is None.
59
            Set the begin of the empty data.
60
        end: Timestamp | None
61
            Only effective if items is None.
62
            Set the end of the empty data.
63
        name: str | None
64
            Name of the exported files.
65
        instrument: Instrument | None
66
            Instrument that might be used to obtain acoustic pressure from
67
            the wav audio data.
68
        normalization: Normalization
69
            The type of normalization to apply to the audio data.
70

71
        """
72
        super().__init__(items=items, begin=begin, end=end, name=name)
1✔
73
        self._set_sample_rate(sample_rate=sample_rate)
1✔
74
        self.instrument = instrument
1✔
75
        self.normalization = normalization
1✔
76
        self.normalization_values = normalization_values
1✔
77

78
    @property
1✔
79
    def nb_channels(self) -> int:
1✔
80
        """Number of channels of the audio data."""
81
        return max(
1✔
82
            [1] + [item.nb_channels for item in self.items if type(item) is AudioItem],
83
        )
84

85
    @property
1✔
86
    def shape(self) -> tuple[int, int]:
1✔
87
        """Shape of the audio data.
88

89
        First element is the number of data point in each channel,
90
        second element is the number of channels.
91

92
        """
93
        return self.length, self.nb_channels
1✔
94

95
    @property
1✔
96
    def length(self) -> int:
1✔
97
        """Number of data points in each channel."""
98
        return round(self.sample_rate * self.duration.total_seconds())
1✔
99

100
    @property
1✔
101
    def normalization(self) -> Normalization:
1✔
102
        """The type of normalization to apply to the audio data."""
103
        return self._normalization
1✔
104

105
    @normalization.setter
1✔
106
    def normalization(self, value: Normalization) -> None:
1✔
107
        self._normalization = value
1✔
108

109
    @property
1✔
110
    def normalization_values(self) -> dict:
1✔
111
        """Mean, peak and std values used for normalization."""
112
        return self._normalization_values
1✔
113

114
    @normalization_values.setter
1✔
115
    def normalization_values(self, value: dict | None) -> None:
1✔
116
        self._normalization_values = (
1✔
117
            value
118
            if value
119
            else {
120
                "mean": None,
121
                "peak": None,
122
                "std": None,
123
            }
124
        )
125

126
    @classmethod
1✔
127
    def _make_item(
1✔
128
        cls,
129
        file: AudioFile | None = None,
130
        begin: Timestamp | None = None,
131
        end: Timestamp | None = None,
132
    ) -> AudioItem:
133
        """Make an ``AudioItem`` for a given ``AudioFile`` between begin and end timestamps.
134

135
        Parameters
136
        ----------
137
        file: AudioFile
138
            ``AudioFile`` of the item.
139
        begin: Timestamp
140
            Begin of the item.
141
        end:
142
            End of the item.
143

144
        Returns
145
        -------
146
        An AudioItem for the ``AudioFile`` file, between the begin and end timestamps.
147

148
        """
149
        return AudioItem(file=file, begin=begin, end=end)
1✔
150

151
    @classmethod
1✔
152
    def _make_file(cls, path: Path, begin: Timestamp) -> AudioFile:
1✔
153
        """Make an ``AudioFile`` from a path and a begin timestamp.
154

155
        Parameters
156
        ----------
157
        path: Path
158
            Path to the file.
159
        begin: Timestamp
160
            Begin of the file.
161

162
        Returns
163
        -------
164
        AudioFile:
165
        The ``AudioFile`` instance.
166

167
        """
168
        return AudioFile(path=path, begin=begin)
1✔
169

170
    def get_normalization_values(self) -> dict:
1✔
171
        """Return the values used for normalizing the audio data.
172

173
        Returns
174
        -------
175
        dict:
176
            "mean": mean value to substract to center values on 0.
177
            "peak": peak value for PEAK normalization
178
            "std": standard deviation used for z-score normalization
179

180
        """
181
        values = np.array(self.get_raw_value())
1✔
182
        return {
1✔
183
            "mean": values.mean(),
184
            "peak": values.max(),
185
            "std": values.std(),
186
        }
187

188
    def __eq__(self, other: AudioData) -> bool:
1✔
189
        """Override __eq__."""
190
        return self.sample_rate == other.sample_rate and super().__eq__(other)
1✔
191

192
    def _set_sample_rate(self, sample_rate: int | None = None) -> None:
1✔
193
        """Set the ``AudioFile`` sample rate.
194

195
        If the sample_rate is specified, it is set.
196
        If it is not specified, it is set to the sampling rate of the
197
        first item that has one.
198
        Else, it is set to None.
199
        """
200
        if sample_rate is not None:
1✔
201
            self.sample_rate = sample_rate
1✔
202
            return
1✔
203
        if sr := next(
1✔
204
            (item.sample_rate for item in self.items if item.sample_rate is not None),
205
            None,
206
        ):
207
            self.sample_rate = sr
1✔
208
            return
1✔
209
        self.sample_rate = None
1✔
210

211
    def get_raw_value(self) -> np.ndarray:
1✔
212
        """Return the raw value of the audio data before normalization.
213

214
        The data from the audio file will be resampled if necessary.
215

216
        Returns
217
        -------
218
        np.ndarray:
219
            The value of the audio data.
220

221
        """
222
        return np.vstack(list(self.stream()))
1✔
223

224
    @staticmethod
1✔
225
    def _flush(
1✔
226
        resampler: soxr.ResampleStream,
227
        remaining_samples: int,
228
    ) -> np.ndarray:
229
        flush = resampler.resample_chunk(np.array([]), last=True)
1✔
230
        if len(flush) == 0:
1✔
NEW
231
            return np.array([])[:, None]
×
232
        if not remaining_samples:
1✔
NEW
233
            return np.array([])[:, None]
×
234
        flush = flush[:remaining_samples]
1✔
235
        return flush[:, None] if flush.ndim == 1 else flush
1✔
236

237
    def stream(self, chunk_size: int = 8192) -> Generator[np.ndarray, None, None]:
1✔
238
        """Stream the audio data in chunks.
239

240
        Parameters
241
        ----------
242
        chunk_size: int
243
            Size of the chunks of audio yielded by the generator.
244

245
        Returns
246
        -------
247
        Generator[np.ndarray, None, None]:
248
            Generated ``np.ndarray`` of dimensions (``chunk_size``*``self.nb_channels``)
249
            of the streamed audio data.
250

251
        """
252
        resampler = None
1✔
253
        input_sr = None
1✔
254
        produced_samples = 0
1✔
255
        total_samples = self.length
1✔
256

257
        for item in self.items:
1✔
258
            if item.is_empty:
1✔
259
                silence_length = round(item.duration.total_seconds() * self.sample_rate)
1✔
260
                yield item.get_value().repeat(
1✔
261
                    silence_length,
262
                    axis=0,
263
                )
264
                produced_samples += silence_length
1✔
265
                continue
1✔
266

267
            if (resampler is None) or (input_sr != item.sample_rate):
1✔
268
                if resampler:
1✔
269
                    flush = self._flush(
1✔
270
                        resampler=resampler,
271
                        remaining_samples=total_samples - produced_samples,
272
                    )
273
                    yield flush
1✔
274
                    produced_samples += len(flush[0])
1✔
275
                input_sr = item.sample_rate
1✔
276
                quality = resample_quality_settings[
1✔
277
                    "downsample" if input_sr > self.sample_rate else "upsample"
278
                ]
279
                resampler = soxr.ResampleStream(
1✔
280
                    in_rate=input_sr,
281
                    out_rate=self.sample_rate,
282
                    num_channels=self.nb_channels,
283
                    quality=quality,
284
                    dtype=np.float64,
285
                )
286

287
            for chunk in item.stream(chunk_size=chunk_size):
1✔
288
                y = chunk
1✔
289
                if item.sample_rate != self.sample_rate:
1✔
290
                    y = resampler.resample_chunk(x=chunk)
1✔
291

292
                remaining = total_samples - produced_samples
1✔
293
                y = y[:remaining]
1✔
294
                produced_samples += len(y)
1✔
295

296
                yield y
1✔
297

298
                if produced_samples >= total_samples:
1✔
299
                    return
1✔
300

301
        if resampler is None:
1✔
302
            return
1✔
303

304
        yield self._flush(
1✔
305
            resampler=resampler,
306
            remaining_samples=total_samples - produced_samples,
307
        )
308

309
    def get_value(self) -> np.ndarray:
1✔
310
        """Return the value of the audio data.
311

312
        The data from the audio file will be resampled if necessary.
313

314
        Returns
315
        -------
316
        np.ndarray:
317
            The value of the audio data.
318

319
        """
320
        return normalize(
1✔
321
            values=self.get_raw_value(),
322
            normalization=self.normalization,
323
            **self.normalization_values,
324
        )
325

326
    def get_value_calibrated(self) -> np.ndarray:
1✔
327
        """Return the value of the audio data accounting for the calibration factor.
328

329
        If the instrument parameter of the audio data is not None, the returned value is
330
        calibrated in units of Pa.
331

332
        Returns
333
        -------
334
        np.ndarray:
335
            The calibrated value of the audio data.
336

337
        """
338
        raw_data = self.get_value()
1✔
339
        calibration_factor = (
1✔
340
            1.0 if self.instrument is None else self.instrument.end_to_end
341
        )
342
        return raw_data * calibration_factor
1✔
343

344
    def write(
1✔
345
        self,
346
        folder: Path,
347
        *,
348
        subtype: str | None = None,
349
        link: bool = False,
350
    ) -> None:
351
        """Write the audio data to file.
352

353
        Parameters
354
        ----------
355
        folder: pathlib.Path
356
            Folder in which to write the audio file.
357
        subtype: str | None
358
            Subtype as provided by the soundfile module.
359
            Defaulted as the default 16-bit PCM for WAV audio files.
360
        link: bool
361
            If True, the ``AudioData`` will be bound to the written file.
362
            Its items will be replaced with a single item, which will match the whole
363
            new ``AudioFile``.
364

365
        """
366
        super().create_directories(path=folder)
1✔
367
        sf.write(
1✔
368
            folder / f"{self}.wav",
369
            self.get_value(),
370
            self.sample_rate,
371
            subtype=subtype,
372
        )
373
        if link:
1✔
374
            self.link(folder=folder)
1✔
375

376
    def link(self, folder: Path) -> None:
1✔
377
        """Link the ``AudioData`` to an ``AudioFile`` in the folder.
378

379
        The given folder should contain a file named ``"str(self).wav"``.
380
        Linking is intended for ``AudioData`` objects that have already been written.
381
        After linking, the ``AudioData`` will have a single item with the same
382
        properties of the target ``AudioFile``.
383

384
        Parameters
385
        ----------
386
        folder: Path
387
            Folder in which is located the ``AudioFile`` to which the ``AudioData`` instance
388
            should be linked.
389

390
        """
391
        file = AudioFile(
1✔
392
            path=folder / f"{self}.wav",
393
            begin=self.begin,
394
        )
395
        self.items = AudioData.from_files([file]).items
1✔
396

397
    def split(
1✔
398
        self,
399
        nb_subdata: int = 2,
400
        *,
401
        pass_normalization: bool = True,
402
    ) -> list[Self]:
403
        """Split the audio data object in the specified number of audio subdata.
404

405
        Parameters
406
        ----------
407
        nb_subdata: int
408
            Number of subdata in which to split the data.
409
        pass_normalization: bool
410
            If True, the normalization values (mean, std, peak) will be computed
411
            from the original audio data and passed to the split chunks.
412
            If the original ``AudioData`` is very long, this might lead to
413
            a RAM saturation.
414

415
        Returns
416
        -------
417
        list[AudioData]
418
            The list of ``AudioData`` subdata objects.
419

420
        """
421
        if not pass_normalization:
1✔
422
            normalization_values = None
1✔
423
        elif any(self.normalization_values.values()):
1✔
424
            normalization_values = self.normalization_values
1✔
425
        else:
426
            normalization_values = self.get_normalization_values()
1✔
427
        return super().split(
1✔
428
            nb_subdata=nb_subdata,
429
            normalization_values=normalization_values,
430
        )
431

432
    def _make_split_data(
1✔
433
        self,
434
        files: list[AudioFile],
435
        begin: Timestamp,
436
        end: Timestamp,
437
        **kwargs: tuple[float, float, float],
438
    ) -> AudioData:
439
        """Return an ``AudioData`` object after an ``AudioData.split()`` call.
440

441
        Parameters
442
        ----------
443
        files: list[AudioFile]
444
            The ``AudioFiles`` of the original ``AudioData``.
445
        begin: Timestamp
446
            The begin timestamp of the split ``AudioData``.
447
        end: Timestamp
448
            The end timestamp of the split ``AudioData``.
449
        kwargs:
450
            normalization_values: tuple[float, float, float]
451
                Values used for normalizing the split ``AudioData``.
452

453
        Returns
454
        -------
455
        AudioData:
456
            The ``AudioData`` instance.
457

458
        """
459
        return AudioData.from_files(
1✔
460
            files=files,
461
            begin=begin,
462
            end=end,
463
            sample_rate=self.sample_rate,
464
            instrument=self.instrument,
465
            normalization=self.normalization,
466
            normalization_values=kwargs["normalization_values"],
467
        )
468

469
    def split_frames(
1✔
470
        self,
471
        start_frame: int = 0,
472
        stop_frame: int = -1,
473
        *,
474
        pass_normalization: bool = True,
475
    ) -> AudioData:
476
        """Return a new ``AudioData`` from a subpart of this ``AudioData``'s data.
477

478
        Parameters
479
        ----------
480
        start_frame: int
481
            First frame included in the new ``AudioData``.
482
        stop_frame: int
483
            First frame after the last frame included in the new ``AudioData``.
484
        pass_normalization: bool
485
            If ``True``, the normalization values (mean, std, peak) will be computed
486
            from the original audio data and passed to the split chunks.
487
            If the original ``AudioData`` is very long, this might lead to
488
            a RAM saturation.
489

490
        Returns
491
        -------
492
        AudioData
493
            A new ``AudioData`` which data is included between start_frame and stop_frame.
494

495
        """
496
        if start_frame < 0:
1✔
497
            msg = "Start_frame must be greater than or equal to 0."
1✔
498
            raise ValueError(msg)
1✔
499
        if stop_frame < -1 or stop_frame > self.length:
1✔
500
            msg = "Stop_frame must be lower than the length of the data."
1✔
501
            raise ValueError(msg)
1✔
502

503
        start_timestamp = self.begin + Timedelta(
1✔
504
            seconds=ceil(start_frame / self.sample_rate * 1e9) / 1e9,
505
        )
506
        stop_timestamp = (
1✔
507
            self.end
508
            if stop_frame == -1
509
            else self.begin + Timedelta(seconds=stop_frame / self.sample_rate)
510
        )
511
        if not pass_normalization:
1✔
512
            normalization_values = None
1✔
513
        elif any(self.normalization_values.values()):
1✔
514
            normalization_values = self.normalization_values
1✔
515
        else:
516
            normalization_values = self.get_normalization_values()
1✔
517
        return AudioData.from_files(
1✔
518
            list(self.files),
519
            start_timestamp,
520
            stop_timestamp,
521
            sample_rate=self.sample_rate,
522
            instrument=self.instrument,
523
            normalization=self.normalization,
524
            normalization_values=normalization_values,
525
        )
526

527
    def to_dict(self) -> dict:
1✔
528
        """Serialize an ``AudioData`` to a dictionary.
529

530
        Returns
531
        -------
532
        dict:
533
            The serialized dictionary representing the ``AudioData``.
534

535
        """
536
        base_dict = super().to_dict()
1✔
537
        instrument_dict = {
1✔
538
            "instrument": (
539
                None if self.instrument is None else self.instrument.to_dict()
540
            ),
541
        }
542
        return (
1✔
543
            base_dict
544
            | instrument_dict
545
            | {
546
                "sample_rate": self.sample_rate,
547
                "normalization": self.normalization.value,
548
                "normalization_values": self.normalization_values,
549
            }
550
        )
551

552
    @classmethod
1✔
553
    def _from_base_dict(
1✔
554
        cls,
555
        dictionary: dict,
556
        files: list[AudioFile],
557
        begin: Timestamp,
558
        end: Timestamp,
559
        **kwargs,  # noqa: ANN003
560
    ) -> AudioData:
561
        """Deserialize the ``AudioData``-specific parts of a Data dictionary.
562

563
        This method is called within the ``BaseData.from_dict()`` method, which
564
        deserializes the base files, begin and end parameters.
565

566
        Parameters
567
        ----------
568
        dictionary: dict
569
            The serialized dictionary representing the ``AudioData``.
570
        files: list[AudioFile]
571
            The list of deserialized ``AudioFiles``.
572
        begin: Timestamp
573
            The deserialized begin timestamp.
574
        end: Timestamp
575
            The deserialized end timestamp.
576
        kwargs:
577
            None.
578

579
        Returns
580
        -------
581
        AudioData
582
            The deserialized ``AudioData``.
583

584
        """
585
        instrument = (
1✔
586
            None
587
            if dictionary["instrument"] is None
588
            else Instrument.from_dict(dictionary["instrument"])
589
        )
590
        return cls.from_files(
1✔
591
            files=files,
592
            begin=begin,
593
            end=end,
594
            instrument=instrument,
595
            sample_rate=dictionary["sample_rate"],
596
            normalization=Normalization(dictionary["normalization"]),
597
            normalization_values=dictionary["normalization_values"],
598
        )
599

600
    @classmethod
1✔
601
    def from_files(
1✔
602
        cls,
603
        files: list[AudioFile],  # The method is redefined just to specify the type
604
        begin: Timestamp | None = None,
605
        end: Timestamp | None = None,
606
        name: str | None = None,
607
        **kwargs,  # noqa: ANN003
608
    ) -> AudioData:
609
        """Return an ``AudioData`` object from a list of ``AudioFiles``.
610

611
        Parameters
612
        ----------
613
        files: list[AudioFile]
614
            List of ``AudioFiles`` containing the data.
615
        begin: Timestamp | None
616
            Begin of the data object.
617
            Defaulted to the begin of the first file.
618
        end: Timestamp | None
619
            End of the data object.
620
            Defaulted to the end of the last file.
621
        name: str | None
622
            Name of the exported files.
623
        kwargs
624
            Keyword arguments that are passed to the cls constructor.
625

626
            sample_rate: int
627
            The sample rate of the audio data.
628

629
            instrument: Instrument | None
630
            Instrument that might be used to obtain acoustic pressure from
631
            the wav audio data.
632

633
            normalization: Normalization
634
            The type of normalization to apply to the audio data.
635

636
        Returns
637
        -------
638
        Self:
639
        The ``AudioData`` object.
640

641
        """
642
        return super().from_files(
1✔
643
            files=files,  # This way, this static error doesn't appear to the user
644
            begin=begin,
645
            end=end,
646
            name=name,
647
            **kwargs,
648
        )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc