• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

gabyfle / SoundML / 32

14 May 2025 11:22PM UTC coverage: 43.312% (-11.7%) from 54.982%
32

push

github

gabyfle
STFT implementation, and various other things

- Modified the testing framework
- Added cosine_sum window function and now using it to compute various window functions
- Added STFT computation (this should close https://github.com/gabyfle/SoundML/issues/8)
- Added tests for STFT to compare results against librosa

50 of 71 new or added lines in 3 files covered. (70.42%)

9 existing lines in 1 file now uncovered.

204 of 471 relevant lines covered (43.31%)

35314.87 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

13.33
/src/utils.ml
1
(*****************************************************************************)
2
(*                                                                           *)
3
(*                                                                           *)
4
(*  Copyright (C) 2023-2025                                                  *)
5
(*    Gabriel Santamaria                                                     *)
6
(*                                                                           *)
7
(*                                                                           *)
8
(*  Licensed under the Apache License, Version 2.0 (the "License");          *)
9
(*  you may not use this file except in compliance with the License.         *)
10
(*  You may obtain a copy of the License at                                  *)
11
(*                                                                           *)
12
(*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13
(*                                                                           *)
14
(*  Unless required by applicable law or agreed to in writing, software      *)
15
(*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16
(*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17
(*  See the License for the specific language governing permissions and      *)
18
(*  limitations under the License.                                           *)
19
(*                                                                           *)
20
(*****************************************************************************)
21

22
open Owl
23
open Bigarray
24

25
module Convert = struct
26
  let mel_to_hz ?(htk : bool = false) mels =
×
27
    let open Audio.G in
×
28
    if htk then 700. $* (10. $** (mels /$ 2595.) -$ 1.)
×
29
    else
30
      let f_min = 0.0 in
×
31
      let f_sp = 200.0 /. 3. in
32
      let min_log_hz = 1000. in
33
      let min_log_mel = (min_log_hz -. f_min) /. f_sp in
34
      let logstep = Maths.log 6.4 /. 27.0 in
×
35
      let linear_mask = mels <=.$ min_log_mel in
36
      let log_mask = mels >=.$ min_log_mel in
×
37
      let linear_result = f_min $+ (f_sp $* mels) in
×
38
      let log_result = min_log_hz $* exp (logstep $* mels -$ min_log_mel) in
×
39
      (linear_mask * linear_result) + (log_mask * log_result)
×
40

41
  let hz_to_mel ?(htk : bool = false) freqs =
×
42
    let open Audio.G in
×
43
    if htk then 2595. $* log10 (1. $+ freqs /$ 700.)
×
44
    else
45
      let f_min = 0.0 in
×
46
      let f_sp = 200.0 /. 3. in
47
      let min_log_hz = 1000. in
48
      let min_log_mel = (min_log_hz -. f_min) /. f_sp in
49
      let logstep = Maths.log 6.4 /. 27.0 in
×
50
      let linear_mask = freqs <=.$ min_log_hz in
51
      let log_mask = freqs >=.$ min_log_hz in
×
52
      let linear_result = (freqs -$ f_min) /$ f_sp in
×
53
      let log_result = min_log_mel $+ log (freqs /$ min_log_hz) /$ logstep in
×
54
      (linear_mask * linear_result)
×
55
      (* we need to filter out possible nans here since log can result in
56
         nans *)
57
      + map (fun x -> if Float.is_nan x then 0.0 else x) (log_mask * log_result)
×
58

59
  type reference =
60
    | RefFloat of float
61
    | RefFunction of ((float, float32_elt) Audio.G.t -> float)
62

63
  let power_to_db ?(amin = 1e-10) ?(top_db : float option = Some 80.)
×
64
      (ref : reference) (s : (float, float32_elt) Audio.G.t) =
65
    assert (amin > 0.) ;
×
66
    let ref = match ref with RefFloat x -> x | RefFunction f -> f s in
×
67
    let amin = Audio.G.(init (kind s) (shape s) (fun _ -> amin)) in
×
68
    let ref = Audio.G.(init (kind s) (shape s) (fun _ -> ref)) in
×
69
    let log_spec = Audio.G.(10.0 $* log10 (max2 amin s)) in
×
70
    Audio.G.(log_spec -= (10.0 $* log10 (max2 amin ref))) ;
×
71
    let res =
72
      match top_db with
73
      | None ->
×
74
          log_spec
75
      | Some top_db ->
×
76
          assert (top_db >= 0.0) ;
×
77
          let max_spec = Audio.G.max' log_spec in
78
          let max_spec = max_spec -. top_db in
×
79
          let max_spec =
80
            Audio.G.(init (kind log_spec) (shape log_spec) (fun _ -> max_spec))
×
81
          in
82
          Audio.G.max2 log_spec max_spec
×
83
    in
84
    res
85

86
  let db_to_power ?(amin = 1e-10) (ref : reference)
×
87
      (s : ('a, Bigarray.float32_elt) Audio.G.t) =
88
    assert (amin > 0.) ;
×
89
    let ref = match ref with RefFloat x -> x | RefFunction f -> f s in
×
90
    let amin = Audio.G.(init (kind s) (shape s) (fun _ -> amin)) in
×
91
    let ref = Audio.G.(init (kind s) (shape s) (fun _ -> ref)) in
×
92
    let spec = Audio.G.(10.0 $* s /$ 10.0) in
×
93
    Audio.G.(spec += (10.0 $* log10 (max2 amin ref))) ;
×
94
    Audio.G.(exp10 spec)
95
end
96

97
let pad_center (data : ('a, 'b) Audio.G.t) (target_size : int) (value : 'a) :
98
    ('a, 'b) Audio.G.t =
99
  (*if Audio.G.numel data = 0 then data else*)
100
  let size = Audio.G.shape data |> fun s -> s.(0) in
8✔
101
  if size = target_size then data
2✔
102
  else if size > target_size then
6✔
103
    raise
2✔
104
      (Invalid_argument
105
         "An error occured while trying to pad: current_size > target_size" )
106
  else
107
    let pad_total = target_size - size in
4✔
108
    let pad_left = pad_total / 2 in
109
    let pad_right = pad_total - pad_left in
110
    let padding = [[pad_left; pad_right]] in
111
    Audio.G.pad ~v:value padding data
112

113
let frame (x : ('a, 'b) Audio.G.t) (frame_size : int) (hop_size : int)
114
    (axis : int) : ('a, 'b) Audio.G.t =
NEW
115
  if frame_size <= 0 then invalid_arg "frame_size must be positive" ;
×
NEW
116
  if hop_size <= 0 then invalid_arg "hop_size must be positive" ;
×
117
  let shape_in = Audio.G.shape x in
24✔
118
  let num_dims_in = Array.length shape_in in
24✔
119
  let len_axis = shape_in.(axis) in
24✔
120
  let num_frames =
24✔
NEW
121
    if len_axis < frame_size then 0
×
122
    else ((len_axis - frame_size) / hop_size) + 1
24✔
123
  in
124
  let shape_out_list =
125
    let prefix_dims = Array.to_list (Array.sub shape_in 0 axis) in
24✔
126
    let suffix_dims =
24✔
127
      if axis + 1 < num_dims_in then
NEW
128
        Array.to_list (Array.sub shape_in (axis + 1) (num_dims_in - (axis + 1)))
×
129
      else []
24✔
130
    in
131
    prefix_dims @ [num_frames; frame_size] @ suffix_dims
132
  in
133
  let shape_out = Array.of_list shape_out_list in
NEW
134
  if num_frames <= 0 then Audio.G.empty (Audio.G.kind x) shape_out
×
135
  else
136
    let get_value_for_output_indices (out_indices : int array) : 'a =
24✔
137
      let in_indices = Array.make num_dims_in 0 in
2,076,672✔
138
      for d = 0 to axis - 1 do
2,076,672✔
NEW
139
        in_indices.(d) <- out_indices.(d)
×
140
      done ;
141
      let frame_idx = out_indices.(axis) in
142
      let offset_in_frame = out_indices.(axis + 1) in
2,076,672✔
143
      in_indices.(axis) <- (frame_idx * hop_size) + offset_in_frame ;
2,076,672✔
144
      for d = axis + 1 to num_dims_in - 1 do
2,076,672✔
NEW
145
        in_indices.(d) <- out_indices.(d + 1)
×
146
      done ;
147
      Audio.G.get x in_indices
148
    in
149
    Audio.G.init_nd (Audio.G.kind x) shape_out get_value_for_output_indices
24✔
150

151
let fftfreq (n : int) (d : float) =
152
  let nslice = ((n - 1) / 2) + 1 in
×
153
  let fhalf =
154
    Audio.G.linspace Bigarray.Float32 0. (float_of_int nslice) nslice
×
155
  in
156
  let shalf =
×
157
    Audio.G.linspace Bigarray.Float32 (-.float_of_int nslice) (-1.) nslice
×
158
  in
159
  let v = Audio.G.concatenate ~axis:0 [|fhalf; shalf|] in
×
160
  Arr.(1. /. (d *. float_of_int n) $* v)
×
161

162
let rfftfreq (n : int) (d : float) =
163
  let nslice = n / 2 in
×
164
  let res =
165
    Audio.G.linspace Bigarray.Float32 0. (float_of_int nslice) (nslice + 1)
×
166
  in
167
  Arr.(1. /. (d *. float_of_int n) $* res)
×
168

169
let melfreq ?(nmels : int = 128) ?(fmin : float = 0.) ?(fmax : float = 11025.)
×
170
    ?(htk : bool = false) =
×
171
  let open Audio.G in
×
172
  let bounds =
173
    of_array Bigarray.Float32 [|fmin; fmax|] [|2|] |> Convert.hz_to_mel ~htk
×
174
  in
175
  let mel_f =
×
176
    linspace Bigarray.Float32 (get bounds [|0|]) (get bounds [|1|]) nmels
×
177
  in
178
  Convert.mel_to_hz mel_f ~htk
×
179
[@@warning "-unerasable-optional-argument"]
180

181
let roll (x : ('a, 'b) Audio.G.t) (shift : int) =
182
  let n = Array.get (Audio.G.shape x) 0 in
×
183
  let shift = if n = 0 then 0 else shift mod n in
×
184
  if shift = 0 then x
×
185
  else
186
    let shift = if shift < 0 then shift + n else shift in
×
187
    let result = Audio.G.copy x in
188
    Audio.G.set_slice_ ~out:result
×
189
      [[shift; n - 1]]
190
      result
191
      (Audio.G.get_slice [[0; n - shift - 1]; []] x) ;
×
192
    Audio.G.set_slice_ ~out:result
×
193
      [[0; shift - 1]]
194
      result
195
      (Audio.G.get_slice [[n - shift; n - 1]; []] x) ;
×
196
    result
×
197

198
let _float_typ_elt : type a b. (a, b) kind -> float -> a = function
199
  | Float32 ->
×
200
      fun a -> a
×
201
  | Float64 ->
×
202
      fun a -> a
×
203
  | Complex32 ->
×
204
      fun a -> Complex.{re= a; im= 0.}
×
205
  | Complex64 ->
×
206
      fun a -> Complex.{re= a; im= 0.}
×
207
  | Int8_signed ->
×
208
      int_of_float
209
  | Int8_unsigned ->
×
210
      int_of_float
211
  | Int16_signed ->
×
212
      int_of_float
213
  | Int16_unsigned ->
×
214
      int_of_float
215
  | Int32 ->
×
216
      fun a -> int_of_float a |> Int32.of_int
×
217
  | Int64 ->
×
218
      fun a -> int_of_float a |> Int64.of_int
×
219
  | _ ->
×
220
      failwith "_float_typ_elt: unsupported operation"
221

222
let cov ?(b : ('a, 'b) Audio.G.t option) ~(a : ('a, 'b) Audio.G.t) =
223
  let a =
×
224
    match b with
225
    | Some b ->
×
226
        let na = Audio.G.numel a in
227
        let nb = Audio.G.numel b in
×
228
        assert (na = nb) ;
×
229
        let a = Audio.G.reshape a [|na; 1|] in
230
        let b = Audio.G.reshape b [|nb; 1|] in
×
231
        Audio.G.concat_horizontal a b
×
232
    | None ->
×
233
        a
234
  in
235
  let mu = Audio.G.mean ~axis:0 ~keep_dims:true a in
236
  let a = Audio.G.sub a mu in
×
237
  let a' = Audio.G.transpose a in
×
238
  let c = Audio.G.dot a' a in
×
239
  let n =
×
240
    Audio.G.row_num a - 1
×
241
    |> Stdlib.max 1 |> float_of_int
×
242
    |> _float_typ_elt (Genarray.kind a)
×
243
  in
244
  Audio.G.div_scalar c n
×
245
[@@warning "-unerasable-optional-argument"]
246

247
let unwrap ?(discont = None) ?(axis = -1) ?(period = 2. *. Owl.Const.pi)
×
248
    (p : (float, 'a) Owl.Dense.Ndarray.Generic.t) =
249
  let nd = Audio.G.num_dims p in
×
250
  let dd = Audio.G.diff ~axis p in
×
251
  let discont = match discont with Some d -> d | None -> period /. 2. in
×
252
  let slices = Array.init nd (fun _ -> R [0; -1]) in
×
253
  slices.(axis) <- R [1; -1] ;
×
254
  let boundary_ambiguous, interval_high =
×
255
    if Float.is_integer period then (mod_float period 2. = 0., period /. 2.)
×
256
    else (true, period /. 2.)
×
257
  in
258
  let open Audio.G in
259
  let interval_low = -.interval_high in
260
  let ddmod = (dd -$ interval_low) %$ period in
×
261
  let mask = ddmod <.$ 0. in
×
262
  ddmod += (mask *$ period) ;
×
263
  ddmod +$= interval_low ;
×
264
  if boundary_ambiguous then (
×
265
    let mask = (ddmod =.$ interval_low) * (dd >.$ 0.) in
×
266
    ddmod *= (1. $- mask) ;
×
267
    ddmod += (mask *$ interval_high) ) ;
×
268
  let ph_correct = ddmod - dd in
×
269
  let mask = abs dd >.$ discont in
×
270
  let ph_correct = ph_correct * mask in
×
271
  let up = copy p in
272
  set_fancy_ext slices up (get_fancy_ext slices p + cumsum ~axis ph_correct) ;
×
273
  up
×
274

275
let outer (op : ('a, 'b) Audio.G.t -> ('a, 'b) Audio.G.t -> ('a, 'b) Audio.G.t)
276
    (x : ('a, 'b) Audio.G.t) (y : ('a, 'b) Audio.G.t) =
277
  let nx = (Audio.G.shape x).(0) in
×
278
  let ny = (Audio.G.shape y).(0) in
×
279
  let x = Audio.G.reshape x [|nx; 1|] in
×
280
  let y = Audio.G.reshape y [|1; ny|] in
×
281
  op x y
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc