18915983740

Committed 29 Oct 2025 05:01PM UTC coverage: 95.657% (+3.1%) from 92.572%

Build # 18915983740

Build Type

Pull #281

github

Committed by

web-flow

Commit Message

Merge a4cb85148 into 1406de2c3

Pull Request Pull Request #281: [DRAFT] Job rework

Run Details

383 of 403 new or added lines in 4 files covered. (95.04%)

12 existing lines in 2 files now uncovered.

3656 of 3822 relevant lines covered (95.66%)

0.96 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

44.0

/src/osekit/public_api/export_analysis.py

"""Module that provides scripts for running public API analyses."""

from __future__ import annotations

import argparse
import logging
import os
from pathlib import Path

from osekit import config, setup_logging
from osekit.config import global_logging_context as glc
from osekit.config import resample_quality_settings
from osekit.core_api.audio_dataset import AudioDataset
from osekit.core_api.spectro_dataset import SpectroDataset
from osekit.public_api.analysis import AnalysisType
from osekit.public_api.dataset import Dataset


def write_analysis(
    analysis_type: AnalysisType,
    ads: AudioDataset | None,
    sds: SpectroDataset | None,
    subtype: str,
    matrix_folder_name: str,
    spectrogram_folder_name: str,
    welch_folder_name: str,
    first: int = 0,
    last: int | None = None,
    logger: logging.Logger | None = None,
    *,
    link: bool = True,
) -> None:
    """Write SpectroDataset output files to disk.

    Parameters
    ----------
    analysis_type: AnalysisType
        Flags that should be use to specify the type of analysis to run.
        See Analysis.AnalysisType docstring for more info.
    subtype: str | None
        Subtype of the written audio files as provided by the soundfile module.
        Defaulted as the default 16-bit PCM for WAV audio files.
        This parameter has no effect if Analysis.AUDIO is not in analysis.
    ads: AudioDataset
        The AudioDataset of which the data should be written.
    sds: SpectroDataset
        The SpectroDataset of which the data should be written.
    matrix_folder_name: Path
        The folder in which the matrix npz files should be written.
    spectrogram_folder_name: Path
        The folder in which the spectrogram png files should be written.
    welch_folder_name: Path
        The folder in which the welch npz files should be written.
    link: bool
        If set to True, the ads data will be linked to the exported files.
    first: int
        Index of the first data object to write.
    last: int|None
        Index after the last data object to write.
    logger: logging.Logger | None
        Logger to use to log the analysis steps.

    """
    logger = glc.logger if logger is None else logger

    logger.info("Running analysis...")

    if AnalysisType.AUDIO in analysis_type:
        logger.info("Writing audio files...")
        ads.write(
            folder=ads.folder,
            subtype=subtype,
            link=link,
            first=first,
            last=last,
        )
        ads.write_json(ads.folder)

    if (
        AnalysisType.MATRIX not in analysis_type
        and AnalysisType.SPECTROGRAM not in analysis_type
        and AnalysisType.WELCH not in analysis_type
    ):
        return

    # Avoid re-computing the reshaped audio
    if AnalysisType.AUDIO in analysis_type:
        sds.link_audio_dataset(ads, first=first, last=last)

    if (
        AnalysisType.MATRIX in analysis_type
        and AnalysisType.SPECTROGRAM in analysis_type
    ):
        logger.info("Computing and writing spectrum matrices and spectrograms...")
        sds.save_all(
            matrix_folder=sds.folder / matrix_folder_name,
            spectrogram_folder=sds.folder / spectrogram_folder_name,
            link=link,
            first=first,
            last=last,
        )
    elif AnalysisType.SPECTROGRAM in analysis_type:
        logger.info("Computing and writing spectrograms...")
        sds.save_spectrogram(
            folder=sds.folder / spectrogram_folder_name,
            first=first,
            last=last,
        )
    elif AnalysisType.MATRIX in analysis_type:
        logger.info("Computing and writing spectrum matrices...")
        sds.write(
            folder=sds.folder / matrix_folder_name,
            link=link,
            first=first,
            last=last,
        )
    if AnalysisType.WELCH in analysis_type:
        logger.info("Computing and writing welches...")
        sds.write_welch(
            folder=sds.folder / welch_folder_name,
            first=first,
            last=last,
        )

    # Update the sds from the JSON in case it has already been modified in another job
    sds.update_json_audio_data(first=first, last=last)
    logger.info("Analysis done!")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()

    required = parser.add_argument_group("required arguments")
    required.add_argument(
        "--analysis",
        "-a",
        required=True,
        help="Flags representing which files to export during this analysis.",
        type=int,
    )
    required.add_argument(
        "--ads-json",
        "-ads",
        required=True,
        help="Path to the JSON of the AudioDataset to export during this analysis.",
        type=str,
    )
    required.add_argument(
        "--sds-json",
        "-sds",
        required=True,
        help="Path to the JSON of the SpectroDataset to export during this analysis.",
        type=str,
    )
    parser.add_argument(
        "--subtype",
        "-sbtp",
        required=False,
        help="The subtype format of the audio files to export.",
        type=str,
        default=None,
    )
    required.add_argument(
        "--matrix-folder-name",
        "-mfn",
        required=True,
        help="The name of the folder in which the npz matrix files are written.",
        type=str,
    )
    required.add_argument(
        "--spectrogram-folder-name",
        "-sfn",
        required=True,
        help="The name of the folder in which the png spectrogram files are written.",
        type=str,
    )
    required.add_argument(
        "--welch-folder-name",
        "-wfn",
        required=True,
        help="The name of the folder in which the npz welch files are written.",
        type=str,
    )
    required.add_argument(
        "--first",
        "-f",
        required=True,
        help="The index of the first file to export.",
        type=int,
        default=0,
    )
    required.add_argument(
        "--last",
        "-l",
        required=True,
        help="The index after the last file to export.",
        type=int,
        default=-1,
    )
    parser.add_argument(
        "--downsampling-quality",
        "-dq",
        required=False,
        help="The downsampling quality preset as specified in the soxr library.",
        type=str,
        default=None,
    )
    parser.add_argument(
        "--upsampling-quality",
        "-uq",
        required=False,
        help="The upsampling quality preset as specified in the soxr library.",
        type=str,
        default=None,
    )
    parser.add_argument(
        "--umask",
        type=int,
        default=0o002,
        help="The umask to apply on the created file permissions.",
    )
    parser.add_argument(
        "--tqdm-disable",
        type=int,
        default=1,
        help="Disable TQDM progress bars.",
    )
    parser.add_argument(
        "--multiprocessing",
        type=str,
        default="false",
        help="Turn multiprocessing on or off.",
    )
    parser.add_argument(
        "--use-logging-setup",
        type=str,
        default="false",
        help="Call osekit.setup_logging() before running the analysis.",
    )
    parser.add_argument(
        "--nb-processes",
        type=str,
        default=None,
        help="Set the number of processes to use.",
    )
    parser.add_argument(
        "--dataset-json-path",
        "-p",
        help="The path to the Dataset JSON file of which to use the logger.",
        type=str,
    )

    args = parser.parse_args()

    os.environ["DISABLE_TQDM"] = "" if not args.tqdm_disable else str(args.tqdm_disable)

    if args.use_logging_setup.lower() == "true":
        setup_logging()

    config.multiprocessing["is_active"] = args.multiprocessing.lower() == "true"
    if (nb_processes := args.nb_processes) is not None:
        config.nb_processes = (
            None if nb_processes.lower() == "none" else int(nb_processes)
        )

    os.umask(args.umask)

    if args.downsampling_quality is not None:
        resample_quality_settings["downsample"] = args.downsampling_quality
    if args.upsampling_quality is not None:
        resample_quality_settings["upsample"] = args.upsampling_quality

    logger = (
        logging.getLogger()
        if (args.dataset_json_path is None or args.dataset_json_path.lower() == "none")
        else Dataset.from_json(Path(args.dataset_json_path)).logger
    )

    ads = (
        AudioDataset.from_json(Path(args.ads_json))
        if args.ads_json.lower() != "none"
        else None
    )
    sds = (
        SpectroDataset.from_json(Path(args.sds_json))
        if args.sds_json.lower() != "none"
        else None
    )

    subtype = None if args.subtype.lower() == "none" else args.subtype

    analysis_type = AnalysisType(args.analysis)

    write_analysis(
        analysis_type=analysis_type,
        ads=ads,
        sds=sds,
        subtype=subtype,
        matrix_folder_name=args.matrix_folder_name,
        spectrogram_folder_name=args.spectrogram_folder_name,
        welch_folder_name=args.welch_folder_name,
        first=args.first,
        last=args.last,
        link=True,
        logger=logger,
    )

1	"""Module that provides scripts for running public API analyses."""
2
3	from __future__ import annotations	1✔
4
5	import argparse	1✔
6	import logging	1✔
7	import os	1✔
8	from pathlib import Path	1✔
9
10	from osekit import config, setup_logging	1✔
11	from osekit.config import global_logging_context as glc	1✔
12	from osekit.config import resample_quality_settings	1✔
13	from osekit.core_api.audio_dataset import AudioDataset	1✔
14	from osekit.core_api.spectro_dataset import SpectroDataset	1✔
15	from osekit.public_api.analysis import AnalysisType	1✔
16	from osekit.public_api.dataset import Dataset	1✔
17
18
19	def write_analysis(	1✔
20	analysis_type: AnalysisType,
21	ads: AudioDataset \| None,
22	sds: SpectroDataset \| None,
23	subtype: str,
24	matrix_folder_name: str,
25	spectrogram_folder_name: str,
26	welch_folder_name: str,
27	first: int = 0,
28	last: int \| None = None,
29	logger: logging.Logger \| None = None,
30	*,
31	link: bool = True,
32	) -> None:
33	"""Write SpectroDataset output files to disk.
34
35	Parameters
36	----------
37	analysis_type: AnalysisType
38	Flags that should be use to specify the type of analysis to run.
39	See Analysis.AnalysisType docstring for more info.
40	subtype: str \| None
41	Subtype of the written audio files as provided by the soundfile module.
42	Defaulted as the default 16-bit PCM for WAV audio files.
43	This parameter has no effect if Analysis.AUDIO is not in analysis.
44	ads: AudioDataset
45	The AudioDataset of which the data should be written.
46	sds: SpectroDataset
47	The SpectroDataset of which the data should be written.
48	matrix_folder_name: Path
49	The folder in which the matrix npz files should be written.
50	spectrogram_folder_name: Path
51	The folder in which the spectrogram png files should be written.
52	welch_folder_name: Path
53	The folder in which the welch npz files should be written.
54	link: bool
55	If set to True, the ads data will be linked to the exported files.
56	first: int
57	Index of the first data object to write.
58	last: int\|None
59	Index after the last data object to write.
60	logger: logging.Logger \| None
61	Logger to use to log the analysis steps.
62
63	"""
64	logger = glc.logger if logger is None else logger	1✔
65
66	logger.info("Running analysis...")	1✔
67
68	if AnalysisType.AUDIO in analysis_type:	1✔
69	logger.info("Writing audio files...")	1✔
70	ads.write(	1✔
71	folder=ads.folder,
72	subtype=subtype,
73	link=link,
74	first=first,
75	last=last,
76	)
77	ads.write_json(ads.folder)	1✔
78
79	if (	1✔
80	AnalysisType.MATRIX not in analysis_type
81	and AnalysisType.SPECTROGRAM not in analysis_type
82	and AnalysisType.WELCH not in analysis_type
83	):
84	return	1✔
85
86	# Avoid re-computing the reshaped audio
87	if AnalysisType.AUDIO in analysis_type:	1✔
88	sds.link_audio_dataset(ads, first=first, last=last)	1✔
89
90	if (	1✔
91	AnalysisType.MATRIX in analysis_type
92	and AnalysisType.SPECTROGRAM in analysis_type
93	):
94	logger.info("Computing and writing spectrum matrices and spectrograms...")	1✔
95	sds.save_all(	1✔
96	matrix_folder=sds.folder / matrix_folder_name,
97	spectrogram_folder=sds.folder / spectrogram_folder_name,
98	link=link,
99	first=first,
100	last=last,
101	)
102	elif AnalysisType.SPECTROGRAM in analysis_type:	1✔
103	logger.info("Computing and writing spectrograms...")	1✔
104	sds.save_spectrogram(	1✔
105	folder=sds.folder / spectrogram_folder_name,
106	first=first,
107	last=last,
108	)
109	elif AnalysisType.MATRIX in analysis_type:	×
110	logger.info("Computing and writing spectrum matrices...")	×
111	sds.write(	×
112	folder=sds.folder / matrix_folder_name,
113	link=link,
114	first=first,
115	last=last,
116	)
117	if AnalysisType.WELCH in analysis_type:	1✔
118	logger.info("Computing and writing welches...")	×
119	sds.write_welch(	×
120	folder=sds.folder / welch_folder_name,
121	first=first,
122	last=last,
123	)
124
125	# Update the sds from the JSON in case it has already been modified in another job
126	sds.update_json_audio_data(first=first, last=last)	1✔
127	logger.info("Analysis done!")	1✔
128
129
130	if __name__ == "__main__":	1✔
131	parser = argparse.ArgumentParser()	×
132
133	required = parser.add_argument_group("required arguments")	×
134	required.add_argument(	×
135	"--analysis",
136	"-a",
137	required=True,
138	help="Flags representing which files to export during this analysis.",
139	type=int,
140	)
141	required.add_argument(	×
142	"--ads-json",
143	"-ads",
144	required=True,
145	help="Path to the JSON of the AudioDataset to export during this analysis.",
146	type=str,
147	)
148	required.add_argument(	×
149	"--sds-json",
150	"-sds",
151	required=True,
152	help="Path to the JSON of the SpectroDataset to export during this analysis.",
153	type=str,
154	)
155	parser.add_argument(	×
156	"--subtype",
157	"-sbtp",
158	required=False,
159	help="The subtype format of the audio files to export.",
160	type=str,
161	default=None,
162	)
163	required.add_argument(	×
164	"--matrix-folder-name",
165	"-mfn",
166	required=True,
167	help="The name of the folder in which the npz matrix files are written.",
168	type=str,
169	)
170	required.add_argument(	×
171	"--spectrogram-folder-name",
172	"-sfn",
173	required=True,
174	help="The name of the folder in which the png spectrogram files are written.",
175	type=str,
176	)
177	required.add_argument(	×
178	"--welch-folder-name",
179	"-wfn",
180	required=True,
181	help="The name of the folder in which the npz welch files are written.",
182	type=str,
183	)
184	required.add_argument(	×
185	"--first",
186	"-f",
187	required=True,
188	help="The index of the first file to export.",
189	type=int,
190	default=0,
191	)
192	required.add_argument(	×
193	"--last",
194	"-l",
195	required=True,
196	help="The index after the last file to export.",
197	type=int,
198	default=-1,
199	)
200	parser.add_argument(	×
201	"--downsampling-quality",
202	"-dq",
203	required=False,
204	help="The downsampling quality preset as specified in the soxr library.",
205	type=str,
206	default=None,
207	)
208	parser.add_argument(	×
209	"--upsampling-quality",
210	"-uq",
211	required=False,
212	help="The upsampling quality preset as specified in the soxr library.",
213	type=str,
214	default=None,
215	)
216	parser.add_argument(	×
217	"--umask",
218	type=int,
219	default=0o002,
220	help="The umask to apply on the created file permissions.",
221	)
222	parser.add_argument(	×
223	"--tqdm-disable",
224	type=int,
225	default=1,
226	help="Disable TQDM progress bars.",
227	)
228	parser.add_argument(	×
229	"--multiprocessing",
230	type=str,
231	default="false",
232	help="Turn multiprocessing on or off.",
233	)
NEW 234	parser.add_argument(	×
235	"--use-logging-setup",
236	type=str,
237	default="false",
238	help="Call osekit.setup_logging() before running the analysis.",
239	)
UNCOV 240	parser.add_argument(	×
241	"--nb-processes",
242	type=str,
243	default=None,
244	help="Set the number of processes to use.",
245	)
NEW 246	parser.add_argument(	×
247	"--dataset-json-path",
248	"-p",
249	help="The path to the Dataset JSON file of which to use the logger.",
250	type=str,
251	)
252
253	args = parser.parse_args()	×
254
255	os.environ["DISABLE_TQDM"] = "" if not args.tqdm_disable else str(args.tqdm_disable)	×
256
NEW 257	if args.use_logging_setup.lower() == "true":	×
NEW 258	setup_logging()	×
259
260	config.multiprocessing["is_active"] = args.multiprocessing.lower() == "true"	×
NEW 261	if (nb_processes := args.nb_processes) is not None:	×
NEW 262	config.nb_processes = (	×
263	None if nb_processes.lower() == "none" else int(nb_processes)
264	)
265
266	os.umask(args.umask)	×
267
268	if args.downsampling_quality is not None:	×
269	resample_quality_settings["downsample"] = args.downsampling_quality	×
270	if args.upsampling_quality is not None:	×
271	resample_quality_settings["upsample"] = args.upsampling_quality	×
272
NEW 273	logger = (	×
274	logging.getLogger()
275	if (args.dataset_json_path is None or args.dataset_json_path.lower() == "none")
276	else Dataset.from_json(Path(args.dataset_json_path)).logger
277	)
278
NEW 279	ads = (	×
280	AudioDataset.from_json(Path(args.ads_json))
281	if args.ads_json.lower() != "none"
282	else None
283	)
NEW 284	sds = (	×
285	SpectroDataset.from_json(Path(args.sds_json))
286	if args.sds_json.lower() != "none"
287	else None
288	)
289
UNCOV 290	subtype = None if args.subtype.lower() == "none" else args.subtype	×
291
292	analysis_type = AnalysisType(args.analysis)	×
293
294	write_analysis(	×
295	analysis_type=analysis_type,
296	ads=ads,
297	sds=sds,
298	subtype=subtype,
299	matrix_folder_name=args.matrix_folder_name,
300	spectrogram_folder_name=args.spectrogram_folder_name,
301	welch_folder_name=args.welch_folder_name,
302	first=args.first,
303	last=args.last,
304	link=True,
305	logger=logger,
306	)

Project-OSmOSE / OSEkit / 18915983740

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous