18938733032

Committed 30 Oct 2025 11:13AM UTC coverage: 96.203% (+3.6%) from 92.572%

Build # 18938733032

Build Type

Pull #281

github

Committed by

web-flow

Commit Message

Merge 646d9d6f8 into 1406de2c3

Pull Request Pull Request #281: [DRAFT] Job rework

Run Details

445 of 464 new or added lines in 5 files covered. (95.91%)

11 existing lines in 2 files now uncovered.

3724 of 3871 relevant lines covered (96.2%)

0.96 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

69.74

/src/osekit/public_api/export_analysis.py

"""Module that provides scripts for running public API analyses."""

from __future__ import annotations

import argparse
import logging
import os
from pathlib import Path

from osekit import config, setup_logging
from osekit.config import global_logging_context as glc
from osekit.config import resample_quality_settings
from osekit.core_api.audio_dataset import AudioDataset
from osekit.core_api.spectro_dataset import SpectroDataset
from osekit.public_api.analysis import AnalysisType
from osekit.public_api.dataset import Dataset


def write_analysis(
    analysis_type: AnalysisType,
    ads: AudioDataset | None,
    sds: SpectroDataset | None,
    subtype: str,
    matrix_folder_name: str,
    spectrogram_folder_name: str,
    welch_folder_name: str,
    first: int = 0,
    last: int | None = None,
    logger: logging.Logger | None = None,
    *,
    link: bool = True,
) -> None:
    """Write SpectroDataset output files to disk.

    Parameters
    ----------
    analysis_type: AnalysisType
        Flags that should be use to specify the type of analysis to run.
        See Analysis.AnalysisType docstring for more info.
    subtype: str | None
        Subtype of the written audio files as provided by the soundfile module.
        Defaulted as the default 16-bit PCM for WAV audio files.
        This parameter has no effect if Analysis.AUDIO is not in analysis.
    ads: AudioDataset
        The AudioDataset of which the data should be written.
    sds: SpectroDataset
        The SpectroDataset of which the data should be written.
    matrix_folder_name: Path
        The folder in which the matrix npz files should be written.
    spectrogram_folder_name: Path
        The folder in which the spectrogram png files should be written.
    welch_folder_name: Path
        The folder in which the welch npz files should be written.
    link: bool
        If set to True, the ads data will be linked to the exported files.
    first: int
        Index of the first data object to write.
    last: int|None
        Index after the last data object to write.
    logger: logging.Logger | None
        Logger to use to log the analysis steps.

    """
    logger = glc.logger if logger is None else logger

    logger.info("Running analysis...")

    if AnalysisType.AUDIO in analysis_type:
        logger.info("Writing audio files...")
        ads.write(
            folder=ads.folder,
            subtype=subtype,
            link=link,
            first=first,
            last=last,
        )
        ads.write_json(ads.folder)

    if (
        AnalysisType.MATRIX not in analysis_type
        and AnalysisType.SPECTROGRAM not in analysis_type
        and AnalysisType.WELCH not in analysis_type
    ):
        return

    # Avoid re-computing the reshaped audio
    if AnalysisType.AUDIO in analysis_type:
        sds.link_audio_dataset(ads, first=first, last=last)

    if (
        AnalysisType.MATRIX in analysis_type
        and AnalysisType.SPECTROGRAM in analysis_type
    ):
        logger.info("Computing and writing spectrum matrices and spectrograms...")
        sds.save_all(
            matrix_folder=sds.folder / matrix_folder_name,
            spectrogram_folder=sds.folder / spectrogram_folder_name,
            link=link,
            first=first,
            last=last,
        )
    elif AnalysisType.SPECTROGRAM in analysis_type:
        logger.info("Computing and writing spectrograms...")
        sds.save_spectrogram(
            folder=sds.folder / spectrogram_folder_name,
            first=first,
            last=last,
        )
    elif AnalysisType.MATRIX in analysis_type:
        logger.info("Computing and writing spectrum matrices...")
        sds.write(
            folder=sds.folder / matrix_folder_name,
            link=link,
            first=first,
            last=last,
        )
    if AnalysisType.WELCH in analysis_type:
        logger.info("Computing and writing welches...")
        sds.write_welch(
            folder=sds.folder / welch_folder_name,
            first=first,
            last=last,
        )

    # Update the sds from the JSON in case it has already been modified in another job
    sds.update_json_audio_data(first=first, last=last)
    logger.info("Analysis done!")


def create_parser() -> argparse.ArgumentParser:
    """Create the argument parser."""
    parser = argparse.ArgumentParser(description="Export audio/spectro datasets.")

    parser.add_argument(
        "--analysis",
        "-a",
        required=True,
        help="Flags representing which files to export. See AnalysisType doc for more info.",
        type=int,
    )

    parser.add_argument(
        "--ads-json",
        "-ads",
        required=False,
        help="Path to the JSON of the AudioDataset to export.",
        type=str,
        default=None,
    )

    parser.add_argument(
        "--sds-json",
        "-sds",
        required=False,
        help="Path to the JSON of the SpectroDataset to export.",
        type=str,
        default=None,
    )

    parser.add_argument(
        "--subtype",
        "-sbtp",
        required=False,
        help="The subtype format of the audio files to export.",
        type=str,
        default=None,
    )

    parser.add_argument(
        "--matrix-folder-path",
        "-mf",
        required=False,
        help="The path of the folder in which the npz matrix files are written.",
        type=str,
        default=None,
    )

    parser.add_argument(
        "--spectrogram-folder-path",
        "-sf",
        required=False,
        help="The path of the folder in which the png spectrogram files are written.",
        type=str,
        default=None,
    )

    parser.add_argument(
        "--welch-folder-path",
        "-wf",
        required=False,
        help="The path of the folder in which the npz welch files are written.",
        type=str,
        default=None,
    )

    parser.add_argument(
        "--first",
        "-f",
        required=False,
        help="The index of the first file to export.",
        type=int,
        default=0,
    )

    parser.add_argument(
        "--last",
        "-l",
        required=False,
        help="The index after the last file to export.",
        type=int,
        default=-1,
    )

    parser.add_argument(
        "--downsampling-quality",
        "-dq",
        required=False,
        help="The downsampling quality preset as specified in the soxr library.",
        type=str,
        default=None,
    )

    parser.add_argument(
        "--upsampling-quality",
        "-uq",
        required=False,
        help="The upsampling quality preset as specified in the soxr library.",
        type=str,
        default=None,
    )

    parser.add_argument(
        "--umask",
        required=False,
        type=int,
        default=0o002,
        help="The umask to apply on the created file permissions.",
    )

    parser.add_argument(
        "--tqdm-disable",
        required=False,
        type=str,
        default="true",
        help="Disable TQDM progress bars.",
    )

    parser.add_argument(
        "--multiprocessing",
        required=False,
        type=str,
        default="false",
        help="Turn multiprocessing on or off.",
    )

    parser.add_argument(
        "--use-logging-setup",
        required=False,
        type=str,
        default="false",
        help="Call osekit.setup_logging() before running the analysis.",
    )

    parser.add_argument(
        "--nb-processes",
        required=False,
        type=int,
        default=None,
        help="Set the number of processes to use.",
    )

    parser.add_argument(
        "--dataset-json-path",
        "-p",
        required=False,
        help="The path to the Dataset JSON file of which to use the logger.",
        type=str,
        default=None,
    )

    return parser


if __name__ == "__main__":
    args = create_parser().parse_args()

    os.environ["DISABLE_TQDM"] = "" if not args.tqdm_disable else str(args.tqdm_disable)

    if args.use_logging_setup.lower() == "true":
        setup_logging()

    config.multiprocessing["is_active"] = args.multiprocessing.lower() == "true"
    if (nb_processes := args.nb_processes) is not None:
        config.nb_processes = (
            None if nb_processes.lower() == "none" else int(nb_processes)
        )

    os.umask(args.umask)

    if args.downsampling_quality is not None:
        resample_quality_settings["downsample"] = args.downsampling_quality
    if args.upsampling_quality is not None:
        resample_quality_settings["upsample"] = args.upsampling_quality

    logger = (
        logging.getLogger()
        if (args.dataset_json_path is None or args.dataset_json_path.lower() == "none")
        else Dataset.from_json(Path(args.dataset_json_path)).logger
    )

    ads = (
        AudioDataset.from_json(Path(args.ads_json))
        if args.ads_json.lower() != "none"
        else None
    )
    sds = (
        SpectroDataset.from_json(Path(args.sds_json))
        if args.sds_json.lower() != "none"
        else None
    )

    subtype = None if args.subtype.lower() == "none" else args.subtype

    analysis_type = AnalysisType(args.analysis)

    write_analysis(
        analysis_type=analysis_type,
        ads=ads,
        sds=sds,
        subtype=subtype,
        matrix_folder_name=args.matrix_folder_name,
        spectrogram_folder_name=args.spectrogram_folder_name,
        welch_folder_name=args.welch_folder_name,
        first=args.first,
        last=args.last,
        link=True,
        logger=logger,
    )

1	"""Module that provides scripts for running public API analyses."""
2
3	from __future__ import annotations	1✔
4
5	import argparse	1✔
6	import logging	1✔
7	import os	1✔
8	from pathlib import Path	1✔
9
10	from osekit import config, setup_logging	1✔
11	from osekit.config import global_logging_context as glc	1✔
12	from osekit.config import resample_quality_settings	1✔
13	from osekit.core_api.audio_dataset import AudioDataset	1✔
14	from osekit.core_api.spectro_dataset import SpectroDataset	1✔
15	from osekit.public_api.analysis import AnalysisType	1✔
16	from osekit.public_api.dataset import Dataset	1✔
17
18
19	def write_analysis(	1✔
20	analysis_type: AnalysisType,
21	ads: AudioDataset \| None,
22	sds: SpectroDataset \| None,
23	subtype: str,
24	matrix_folder_name: str,
25	spectrogram_folder_name: str,
26	welch_folder_name: str,
27	first: int = 0,
28	last: int \| None = None,
29	logger: logging.Logger \| None = None,
30	*,
31	link: bool = True,
32	) -> None:
33	"""Write SpectroDataset output files to disk.
34
35	Parameters
36	----------
37	analysis_type: AnalysisType
38	Flags that should be use to specify the type of analysis to run.
39	See Analysis.AnalysisType docstring for more info.
40	subtype: str \| None
41	Subtype of the written audio files as provided by the soundfile module.
42	Defaulted as the default 16-bit PCM for WAV audio files.
43	This parameter has no effect if Analysis.AUDIO is not in analysis.
44	ads: AudioDataset
45	The AudioDataset of which the data should be written.
46	sds: SpectroDataset
47	The SpectroDataset of which the data should be written.
48	matrix_folder_name: Path
49	The folder in which the matrix npz files should be written.
50	spectrogram_folder_name: Path
51	The folder in which the spectrogram png files should be written.
52	welch_folder_name: Path
53	The folder in which the welch npz files should be written.
54	link: bool
55	If set to True, the ads data will be linked to the exported files.
56	first: int
57	Index of the first data object to write.
58	last: int\|None
59	Index after the last data object to write.
60	logger: logging.Logger \| None
61	Logger to use to log the analysis steps.
62
63	"""
64	logger = glc.logger if logger is None else logger	1✔
65
66	logger.info("Running analysis...")	1✔
67
68	if AnalysisType.AUDIO in analysis_type:	1✔
69	logger.info("Writing audio files...")	1✔
70	ads.write(	1✔
71	folder=ads.folder,
72	subtype=subtype,
73	link=link,
74	first=first,
75	last=last,
76	)
77	ads.write_json(ads.folder)	1✔
78
79	if (	1✔
80	AnalysisType.MATRIX not in analysis_type
81	and AnalysisType.SPECTROGRAM not in analysis_type
82	and AnalysisType.WELCH not in analysis_type
83	):
84	return	1✔
85
86	# Avoid re-computing the reshaped audio
87	if AnalysisType.AUDIO in analysis_type:	1✔
88	sds.link_audio_dataset(ads, first=first, last=last)	1✔
89
90	if (	1✔
91	AnalysisType.MATRIX in analysis_type
92	and AnalysisType.SPECTROGRAM in analysis_type
93	):
94	logger.info("Computing and writing spectrum matrices and spectrograms...")	1✔
95	sds.save_all(	1✔
96	matrix_folder=sds.folder / matrix_folder_name,
97	spectrogram_folder=sds.folder / spectrogram_folder_name,
98	link=link,
99	first=first,
100	last=last,
101	)
102	elif AnalysisType.SPECTROGRAM in analysis_type:	1✔
103	logger.info("Computing and writing spectrograms...")	1✔
104	sds.save_spectrogram(	1✔
105	folder=sds.folder / spectrogram_folder_name,
106	first=first,
107	last=last,
108	)
109	elif AnalysisType.MATRIX in analysis_type:	×
110	logger.info("Computing and writing spectrum matrices...")	×
111	sds.write(	×
112	folder=sds.folder / matrix_folder_name,
113	link=link,
114	first=first,
115	last=last,
116	)
117	if AnalysisType.WELCH in analysis_type:	1✔
118	logger.info("Computing and writing welches...")	×
119	sds.write_welch(	×
120	folder=sds.folder / welch_folder_name,
121	first=first,
122	last=last,
123	)
124
125	# Update the sds from the JSON in case it has already been modified in another job
126	sds.update_json_audio_data(first=first, last=last)	1✔
127	logger.info("Analysis done!")	1✔
128
129
130	def create_parser() -> argparse.ArgumentParser:	1✔
131	"""Create the argument parser."""
132	parser = argparse.ArgumentParser(description="Export audio/spectro datasets.")	1✔
133
134	parser.add_argument(	1✔
135	"--analysis",
136	"-a",
137	required=True,
138	help="Flags representing which files to export. See AnalysisType doc for more info.",
139	type=int,
140	)
141
142	parser.add_argument(	1✔
143	"--ads-json",
144	"-ads",
145	required=False,
146	help="Path to the JSON of the AudioDataset to export.",
147	type=str,
148	default=None,
149	)
150
151	parser.add_argument(	1✔
152	"--sds-json",
153	"-sds",
154	required=False,
155	help="Path to the JSON of the SpectroDataset to export.",
156	type=str,
157	default=None,
158	)
159
160	parser.add_argument(	1✔
161	"--subtype",
162	"-sbtp",
163	required=False,
164	help="The subtype format of the audio files to export.",
165	type=str,
166	default=None,
167	)
168
169	parser.add_argument(	1✔
170	"--matrix-folder-path",
171	"-mf",
172	required=False,
173	help="The path of the folder in which the npz matrix files are written.",
174	type=str,
175	default=None,
176	)
177
178	parser.add_argument(	1✔
179	"--spectrogram-folder-path",
180	"-sf",
181	required=False,
182	help="The path of the folder in which the png spectrogram files are written.",
183	type=str,
184	default=None,
185	)
186
187	parser.add_argument(	1✔
188	"--welch-folder-path",
189	"-wf",
190	required=False,
191	help="The path of the folder in which the npz welch files are written.",
192	type=str,
193	default=None,
194	)
195
196	parser.add_argument(	1✔
197	"--first",
198	"-f",
199	required=False,
200	help="The index of the first file to export.",
201	type=int,
202	default=0,
203	)
204
205	parser.add_argument(	1✔
206	"--last",
207	"-l",
208	required=False,
209	help="The index after the last file to export.",
210	type=int,
211	default=-1,
212	)
213
214	parser.add_argument(	1✔
215	"--downsampling-quality",
216	"-dq",
217	required=False,
218	help="The downsampling quality preset as specified in the soxr library.",
219	type=str,
220	default=None,
221	)
222
223	parser.add_argument(	1✔
224	"--upsampling-quality",
225	"-uq",
226	required=False,
227	help="The upsampling quality preset as specified in the soxr library.",
228	type=str,
229	default=None,
230	)
231
232	parser.add_argument(	1✔
233	"--umask",
234	required=False,
235	type=int,
236	default=0o002,
237	help="The umask to apply on the created file permissions.",
238	)
239
240	parser.add_argument(	1✔
241	"--tqdm-disable",
242	required=False,
243	type=str,
244	default="true",
245	help="Disable TQDM progress bars.",
246	)
247
248	parser.add_argument(	1✔
249	"--multiprocessing",
250	required=False,
251	type=str,
252	default="false",
253	help="Turn multiprocessing on or off.",
254	)
255
256	parser.add_argument(	1✔
257	"--use-logging-setup",
258	required=False,
259	type=str,
260	default="false",
261	help="Call osekit.setup_logging() before running the analysis.",
262	)
263
264	parser.add_argument(	1✔
265	"--nb-processes",
266	required=False,
267	type=int,
268	default=None,
269	help="Set the number of processes to use.",
270	)
271
272	parser.add_argument(	1✔
273	"--dataset-json-path",
274	"-p",
275	required=False,
276	help="The path to the Dataset JSON file of which to use the logger.",
277	type=str,
278	default=None,
279	)
280
281	return parser	1✔
282
283
284	if __name__ == "__main__":	1✔
NEW 285	args = create_parser().parse_args()	×
286
287	os.environ["DISABLE_TQDM"] = "" if not args.tqdm_disable else str(args.tqdm_disable)	×
288
NEW 289	if args.use_logging_setup.lower() == "true":	×
NEW 290	setup_logging()	×
291
292	config.multiprocessing["is_active"] = args.multiprocessing.lower() == "true"	×
NEW 293	if (nb_processes := args.nb_processes) is not None:	×
NEW 294	config.nb_processes = (	×
295	None if nb_processes.lower() == "none" else int(nb_processes)
296	)
297
298	os.umask(args.umask)	×
299
300	if args.downsampling_quality is not None:	×
301	resample_quality_settings["downsample"] = args.downsampling_quality	×
302	if args.upsampling_quality is not None:	×
303	resample_quality_settings["upsample"] = args.upsampling_quality	×
304
NEW 305	logger = (	×
306	logging.getLogger()
307	if (args.dataset_json_path is None or args.dataset_json_path.lower() == "none")
308	else Dataset.from_json(Path(args.dataset_json_path)).logger
309	)
310
NEW 311	ads = (	×
312	AudioDataset.from_json(Path(args.ads_json))
313	if args.ads_json.lower() != "none"
314	else None
315	)
NEW 316	sds = (	×
317	SpectroDataset.from_json(Path(args.sds_json))
318	if args.sds_json.lower() != "none"
319	else None
320	)
321
UNCOV 322	subtype = None if args.subtype.lower() == "none" else args.subtype	×
323
324	analysis_type = AnalysisType(args.analysis)	×
325
326	write_analysis(	×
327	analysis_type=analysis_type,
328	ads=ads,
329	sds=sds,
330	subtype=subtype,
331	matrix_folder_name=args.matrix_folder_name,
332	spectrogram_folder_name=args.spectrogram_folder_name,
333	welch_folder_name=args.welch_folder_name,
334	first=args.first,
335	last=args.last,
336	link=True,
337	logger=logger,
338	)

Project-OSmOSE / OSEkit / 18938733032

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous