18967319638

Committed 31 Oct 2025 08:41AM UTC coverage: 96.711% (+4.1%) from 92.572%

Build # 18967319638

Build Type

Pull #281

github

Committed by

web-flow

Commit Message

Merge 60f877ab4 into 1406de2c3

Pull Request Pull Request #281: [DRAFT] Job rework

Run Details

536 of 548 new or added lines in 6 files covered. (97.81%)

10 existing lines in 1 file now uncovered.

3822 of 3952 relevant lines covered (96.71%)

0.97 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.21

/src/osekit/public_api/export_analysis.py

"""Module that provides scripts for running public API analyses."""

from __future__ import annotations

import argparse
import logging
import os
from pathlib import Path

from osekit import config, setup_logging
from osekit.config import global_logging_context as glc
from osekit.core_api.audio_dataset import AudioDataset
from osekit.core_api.spectro_dataset import SpectroDataset
from osekit.public_api.analysis import AnalysisType
from osekit.public_api.dataset import Dataset


def write_analysis(
    analysis_type: AnalysisType,
    ads: AudioDataset | None,
    sds: SpectroDataset | None,
    subtype: str | None = None,
    matrix_folder_path: Path | None = None,
    spectrogram_folder_path: Path | None = None,
    welch_folder_path: Path | None = None,
    first: int = 0,
    last: int | None = None,
    logger: logging.Logger | None = None,
    *,
    link: bool = True,
) -> None:
    """Write SpectroDataset output files to disk.

    Parameters
    ----------
    analysis_type: AnalysisType
        Flags that should be use to specify the type of analysis to run.
        See Analysis.AnalysisType docstring for more info.
    subtype: str | None
        Subtype of the written audio files as provided by the soundfile module.
        Defaulted as the default 16-bit PCM for WAV audio files.
        This parameter has no effect if Analysis.AUDIO is not in analysis.
    ads: AudioDataset
        The AudioDataset of which the data should be written.
    sds: SpectroDataset
        The SpectroDataset of which the data should be written.
    matrix_folder_path: Path
        The folder in which the matrix npz files should be written.
    spectrogram_folder_path: Path
        The folder in which the spectrogram png files should be written.
    welch_folder_path: Path
        The folder in which the welch npz files should be written.
    link: bool
        If set to True, the ads data will be linked to the exported files.
    first: int
        Index of the first data object to write.
    last: int|None
        Index after the last data object to write.
    logger: logging.Logger | None
        Logger to use to log the analysis steps.

    """
    logger = glc.logger if logger is None else logger

    logger.info("Running analysis...")

    if AnalysisType.AUDIO in analysis_type:
        logger.info("Writing audio files...")
        ads.write(
            folder=ads.folder,
            subtype=subtype,
            link=link,
            first=first,
            last=last,
        )
        ads.write_json(ads.folder)

    if (
        AnalysisType.MATRIX not in analysis_type
        and AnalysisType.SPECTROGRAM not in analysis_type
        and AnalysisType.WELCH not in analysis_type
    ):
        return

    # Avoid re-computing the reshaped audio
    if AnalysisType.AUDIO in analysis_type:
        sds.link_audio_dataset(ads, first=first, last=last)

    if (
        AnalysisType.MATRIX in analysis_type
        and AnalysisType.SPECTROGRAM in analysis_type
    ):
        logger.info("Computing and writing spectrum matrices and spectrograms...")
        sds.save_all(
            matrix_folder=matrix_folder_path,
            spectrogram_folder=spectrogram_folder_path,
            link=link,
            first=first,
            last=last,
        )
    elif AnalysisType.SPECTROGRAM in analysis_type:
        logger.info("Computing and writing spectrograms...")
        sds.save_spectrogram(
            folder=spectrogram_folder_path,
            first=first,
            last=last,
        )
    elif AnalysisType.MATRIX in analysis_type:
        logger.info("Computing and writing spectrum matrices...")
        sds.write(
            folder=matrix_folder_path,
            link=link,
            first=first,
            last=last,
        )
    if AnalysisType.WELCH in analysis_type:
        logger.info("Computing and writing welches...")
        sds.write_welch(
            folder=welch_folder_path,
            first=first,
            last=last,
        )

    # Update the sds from the JSON in case it has already been modified in another job
    sds.update_json_audio_data(first=first, last=last)
    logger.info("Analysis done!")


def create_parser() -> argparse.ArgumentParser:
    """Create the argument parser."""
    parser = argparse.ArgumentParser(description="Export audio/spectro datasets.")

    parser.add_argument(
        "--analysis",
        "-a",
        required=True,
        help="Flags representing which files to export. See AnalysisType doc for more info.",
        type=int,
    )

    parser.add_argument(
        "--ads-json",
        "-ads",
        required=False,
        help="Path to the JSON of the AudioDataset to export.",
        type=str,
        default=None,
    )

    parser.add_argument(
        "--sds-json",
        "-sds",
        required=False,
        help="Path to the JSON of the SpectroDataset to export.",
        type=str,
        default=None,
    )

    parser.add_argument(
        "--subtype",
        "-sbtp",
        required=False,
        help="The subtype format of the audio files to export.",
        type=str,
        default=None,
    )

    parser.add_argument(
        "--matrix-folder-path",
        "-mf",
        required=False,
        help="The path of the folder in which the npz matrix files are written.",
        type=str,
        default=None,
    )

    parser.add_argument(
        "--spectrogram-folder-path",
        "-sf",
        required=False,
        help="The path of the folder in which the png spectrogram files are written.",
        type=str,
        default=None,
    )

    parser.add_argument(
        "--welch-folder-path",
        "-wf",
        required=False,
        help="The path of the folder in which the npz welch files are written.",
        type=str,
        default=None,
    )

    parser.add_argument(
        "--first",
        "-f",
        required=False,
        help="The index of the first file to export.",
        type=int,
        default=0,
    )

    parser.add_argument(
        "--last",
        "-l",
        required=False,
        help="The index after the last file to export.",
        type=int,
        default=-1,
    )

    parser.add_argument(
        "--downsampling-quality",
        "-dq",
        required=False,
        help="The downsampling quality preset as specified in the soxr library.",
        type=str,
        default=None,
    )

    parser.add_argument(
        "--upsampling-quality",
        "-uq",
        required=False,
        help="The upsampling quality preset as specified in the soxr library.",
        type=str,
        default=None,
    )

    parser.add_argument(
        "--umask",
        required=False,
        type=int,
        default=0o002,
        help="The umask to apply on the created file permissions.",
    )

    parser.add_argument(
        "--tqdm-disable",
        required=False,
        type=str,
        default="true",
        help="Disable TQDM progress bars.",
    )

    parser.add_argument(
        "--multiprocessing",
        required=False,
        type=str,
        default="false",
        help="Turn multiprocessing on or off.",
    )

    parser.add_argument(
        "--use-logging-setup",
        required=False,
        type=str,
        default="false",
        help="Call osekit.setup_logging() before running the analysis.",
    )

    parser.add_argument(
        "--nb-processes",
        required=False,
        type=str,
        default=None,
        help="Set the number of processes to use.",
    )

    parser.add_argument(
        "--dataset-json-path",
        "-p",
        required=False,
        help="The path to the Dataset JSON file of which to use the logger.",
        type=str,
        default=None,
    )

    return parser


def main() -> None:
    """Export an analysis."""
    args = create_parser().parse_args()

    os.environ["DISABLE_TQDM"] = "" if not args.tqdm_disable else str(args.tqdm_disable)

    if args.use_logging_setup.lower() == "true":
        setup_logging()

    config.multiprocessing["is_active"] = args.multiprocessing.lower() == "true"
    if (nb_processes := args.nb_processes) is not None:
        config.multiprocessing["nb_processes"] = (
            None if nb_processes.lower() == "none" else int(nb_processes)
        )

    os.umask(args.umask)

    if args.downsampling_quality is not None:
        config.resample_quality_settings["downsample"] = args.downsampling_quality
    if args.upsampling_quality is not None:
        config.resample_quality_settings["upsample"] = args.upsampling_quality

    logger = (
        logging.getLogger()
        if (args.dataset_json_path is None or args.dataset_json_path.lower() == "none")
        else Dataset.from_json(Path(args.dataset_json_path)).logger
    )

    ads = (
        AudioDataset.from_json(Path(args.ads_json))
        if args.ads_json.lower() != "none"
        else None
    )
    sds = (
        SpectroDataset.from_json(Path(args.sds_json))
        if args.sds_json.lower() != "none"
        else None
    )

    subtype = None if args.subtype.lower() == "none" else args.subtype

    analysis_type = AnalysisType(args.analysis)

    write_analysis(
        analysis_type=analysis_type,
        ads=ads,
        sds=sds,
        subtype=subtype,
        matrix_folder_path=Path(args.matrix_folder_path),
        spectrogram_folder_path=Path(args.spectrogram_folder_path),
        welch_folder_path=Path(args.welch_folder_path),
        first=args.first,
        last=args.last,
        link=True,
        logger=logger,
    )


if __name__ == "__main__":
    main()

1	"""Module that provides scripts for running public API analyses."""
2
3	from __future__ import annotations	1✔
4
5	import argparse	1✔
6	import logging	1✔
7	import os	1✔
8	from pathlib import Path	1✔
9
10	from osekit import config, setup_logging	1✔
11	from osekit.config import global_logging_context as glc	1✔
12	from osekit.core_api.audio_dataset import AudioDataset	1✔
13	from osekit.core_api.spectro_dataset import SpectroDataset	1✔
14	from osekit.public_api.analysis import AnalysisType	1✔
15	from osekit.public_api.dataset import Dataset	1✔
16
17
18	def write_analysis(	1✔
19	analysis_type: AnalysisType,
20	ads: AudioDataset \| None,
21	sds: SpectroDataset \| None,
22	subtype: str \| None = None,
23	matrix_folder_path: Path \| None = None,
24	spectrogram_folder_path: Path \| None = None,
25	welch_folder_path: Path \| None = None,
26	first: int = 0,
27	last: int \| None = None,
28	logger: logging.Logger \| None = None,
29	*,
30	link: bool = True,
31	) -> None:
32	"""Write SpectroDataset output files to disk.
33
34	Parameters
35	----------
36	analysis_type: AnalysisType
37	Flags that should be use to specify the type of analysis to run.
38	See Analysis.AnalysisType docstring for more info.
39	subtype: str \| None
40	Subtype of the written audio files as provided by the soundfile module.
41	Defaulted as the default 16-bit PCM for WAV audio files.
42	This parameter has no effect if Analysis.AUDIO is not in analysis.
43	ads: AudioDataset
44	The AudioDataset of which the data should be written.
45	sds: SpectroDataset
46	The SpectroDataset of which the data should be written.
47	matrix_folder_path: Path
48	The folder in which the matrix npz files should be written.
49	spectrogram_folder_path: Path
50	The folder in which the spectrogram png files should be written.
51	welch_folder_path: Path
52	The folder in which the welch npz files should be written.
53	link: bool
54	If set to True, the ads data will be linked to the exported files.
55	first: int
56	Index of the first data object to write.
57	last: int\|None
58	Index after the last data object to write.
59	logger: logging.Logger \| None
60	Logger to use to log the analysis steps.
61
62	"""
63	logger = glc.logger if logger is None else logger	1✔
64
65	logger.info("Running analysis...")	1✔
66
67	if AnalysisType.AUDIO in analysis_type:	1✔
68	logger.info("Writing audio files...")	1✔
69	ads.write(	1✔
70	folder=ads.folder,
71	subtype=subtype,
72	link=link,
73	first=first,
74	last=last,
75	)
76	ads.write_json(ads.folder)	1✔
77
78	if (	1✔
79	AnalysisType.MATRIX not in analysis_type
80	and AnalysisType.SPECTROGRAM not in analysis_type
81	and AnalysisType.WELCH not in analysis_type
82	):
83	return	1✔
84
85	# Avoid re-computing the reshaped audio
86	if AnalysisType.AUDIO in analysis_type:	1✔
87	sds.link_audio_dataset(ads, first=first, last=last)	1✔
88
89	if (	1✔
90	AnalysisType.MATRIX in analysis_type
91	and AnalysisType.SPECTROGRAM in analysis_type
92	):
93	logger.info("Computing and writing spectrum matrices and spectrograms...")	1✔
94	sds.save_all(	1✔
95	matrix_folder=matrix_folder_path,
96	spectrogram_folder=spectrogram_folder_path,
97	link=link,
98	first=first,
99	last=last,
100	)
101	elif AnalysisType.SPECTROGRAM in analysis_type:	1✔
102	logger.info("Computing and writing spectrograms...")	1✔
103	sds.save_spectrogram(	1✔
104	folder=spectrogram_folder_path,
105	first=first,
106	last=last,
107	)
108	elif AnalysisType.MATRIX in analysis_type:	×
109	logger.info("Computing and writing spectrum matrices...")	×
110	sds.write(	×
111	folder=matrix_folder_path,
112	link=link,
113	first=first,
114	last=last,
115	)
116	if AnalysisType.WELCH in analysis_type:	1✔
117	logger.info("Computing and writing welches...")	×
118	sds.write_welch(	×
119	folder=welch_folder_path,
120	first=first,
121	last=last,
122	)
123
124	# Update the sds from the JSON in case it has already been modified in another job
125	sds.update_json_audio_data(first=first, last=last)	1✔
126	logger.info("Analysis done!")	1✔
127
128
129	def create_parser() -> argparse.ArgumentParser:	1✔
130	"""Create the argument parser."""
131	parser = argparse.ArgumentParser(description="Export audio/spectro datasets.")	1✔
132
133	parser.add_argument(	1✔
134	"--analysis",
135	"-a",
136	required=True,
137	help="Flags representing which files to export. See AnalysisType doc for more info.",
138	type=int,
139	)
140
141	parser.add_argument(	1✔
142	"--ads-json",
143	"-ads",
144	required=False,
145	help="Path to the JSON of the AudioDataset to export.",
146	type=str,
147	default=None,
148	)
149
150	parser.add_argument(	1✔
151	"--sds-json",
152	"-sds",
153	required=False,
154	help="Path to the JSON of the SpectroDataset to export.",
155	type=str,
156	default=None,
157	)
158
159	parser.add_argument(	1✔
160	"--subtype",
161	"-sbtp",
162	required=False,
163	help="The subtype format of the audio files to export.",
164	type=str,
165	default=None,
166	)
167
168	parser.add_argument(	1✔
169	"--matrix-folder-path",
170	"-mf",
171	required=False,
172	help="The path of the folder in which the npz matrix files are written.",
173	type=str,
174	default=None,
175	)
176
177	parser.add_argument(	1✔
178	"--spectrogram-folder-path",
179	"-sf",
180	required=False,
181	help="The path of the folder in which the png spectrogram files are written.",
182	type=str,
183	default=None,
184	)
185
186	parser.add_argument(	1✔
187	"--welch-folder-path",
188	"-wf",
189	required=False,
190	help="The path of the folder in which the npz welch files are written.",
191	type=str,
192	default=None,
193	)
194
195	parser.add_argument(	1✔
196	"--first",
197	"-f",
198	required=False,
199	help="The index of the first file to export.",
200	type=int,
201	default=0,
202	)
203
204	parser.add_argument(	1✔
205	"--last",
206	"-l",
207	required=False,
208	help="The index after the last file to export.",
209	type=int,
210	default=-1,
211	)
212
213	parser.add_argument(	1✔
214	"--downsampling-quality",
215	"-dq",
216	required=False,
217	help="The downsampling quality preset as specified in the soxr library.",
218	type=str,
219	default=None,
220	)
221
222	parser.add_argument(	1✔
223	"--upsampling-quality",
224	"-uq",
225	required=False,
226	help="The upsampling quality preset as specified in the soxr library.",
227	type=str,
228	default=None,
229	)
230
231	parser.add_argument(	1✔
232	"--umask",
233	required=False,
234	type=int,
235	default=0o002,
236	help="The umask to apply on the created file permissions.",
237	)
238
239	parser.add_argument(	1✔
240	"--tqdm-disable",
241	required=False,
242	type=str,
243	default="true",
244	help="Disable TQDM progress bars.",
245	)
246
247	parser.add_argument(	1✔
248	"--multiprocessing",
249	required=False,
250	type=str,
251	default="false",
252	help="Turn multiprocessing on or off.",
253	)
254
255	parser.add_argument(	1✔
256	"--use-logging-setup",
257	required=False,
258	type=str,
259	default="false",
260	help="Call osekit.setup_logging() before running the analysis.",
261	)
262
263	parser.add_argument(	1✔
264	"--nb-processes",
265	required=False,
266	type=str,
267	default=None,
268	help="Set the number of processes to use.",
269	)
270
271	parser.add_argument(	1✔
272	"--dataset-json-path",
273	"-p",
274	required=False,
275	help="The path to the Dataset JSON file of which to use the logger.",
276	type=str,
277	default=None,
278	)
279
280	return parser	1✔
281
282
283	def main() -> None:	1✔
284	"""Export an analysis."""
285	args = create_parser().parse_args()	1✔
286
287	os.environ["DISABLE_TQDM"] = "" if not args.tqdm_disable else str(args.tqdm_disable)	1✔
288
289	if args.use_logging_setup.lower() == "true":	1✔
290	setup_logging()	1✔
291
292	config.multiprocessing["is_active"] = args.multiprocessing.lower() == "true"	1✔
293	if (nb_processes := args.nb_processes) is not None:	1✔
294	config.multiprocessing["nb_processes"] = (	1✔
295	None if nb_processes.lower() == "none" else int(nb_processes)
296	)
297
298	os.umask(args.umask)	1✔
299
300	if args.downsampling_quality is not None:	1✔
301	config.resample_quality_settings["downsample"] = args.downsampling_quality	1✔
302	if args.upsampling_quality is not None:	1✔
303	config.resample_quality_settings["upsample"] = args.upsampling_quality	1✔
304
305	logger = (	1✔
306	logging.getLogger()
307	if (args.dataset_json_path is None or args.dataset_json_path.lower() == "none")
308	else Dataset.from_json(Path(args.dataset_json_path)).logger
309	)
310
311	ads = (	1✔
312	AudioDataset.from_json(Path(args.ads_json))
313	if args.ads_json.lower() != "none"
314	else None
315	)
316	sds = (	1✔
317	SpectroDataset.from_json(Path(args.sds_json))
318	if args.sds_json.lower() != "none"
319	else None
320	)
321
322	subtype = None if args.subtype.lower() == "none" else args.subtype	1✔
323
324	analysis_type = AnalysisType(args.analysis)	1✔
325
326	write_analysis(	1✔
327	analysis_type=analysis_type,
328	ads=ads,
329	sds=sds,
330	subtype=subtype,
331	matrix_folder_path=Path(args.matrix_folder_path),
332	spectrogram_folder_path=Path(args.spectrogram_folder_path),
333	welch_folder_path=Path(args.welch_folder_path),
334	first=args.first,
335	last=args.last,
336	link=True,
337	logger=logger,
338	)
339
340
341	if __name__ == "__main__":	1✔
NEW 342	main()	×

Project-OSmOSE / OSEkit / 18967319638

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous