• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

neurospin-deepinsight / brainprep / 23547419711

25 Mar 2026 02:52PM UTC coverage: 74.094% (-1.0%) from 75.06%
23547419711

push

github

AGrigis
brainprep: fix CI.

1 of 1 new or added line in 1 file covered. (100.0%)

358 existing lines in 17 files now uncovered.

1410 of 1903 relevant lines covered (74.09%)

0.74 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

76.92
/brainprep/utils/utils.py
1
##########################################################################
2
# NSAp - Copyright (C) CEA, 2021 - 2025
3
# Distributed under the terms of the CeCILL-B license, as published by
4
# the CEA-CNRS-INRIA. Refer to the LICENSE file or to
5
# http://www.cecill.info/licences/Licence_CeCILL-B_V1-en.html
6
# for details.
7
##########################################################################
8

9
"""
10
Module that contains some utility functions.
11
"""
12

13
import inspect
1✔
14
import json
1✔
15
import re
1✔
16
import uuid
1✔
17
from collections.abc import Callable, Iterable
1✔
18
from pathlib import Path
1✔
19
from typing import (
1✔
20
    Any,
21
    Union,
22
    get_args,
23
    get_origin,
24
)
25

26
from .._version import __version__
1✔
27
from ..typing import (
1✔
28
    Directory,
29
    File,
30
)
31
from .color import (
1✔
32
    print_info,
33
    print_warn,
34
)
35

36

37
def coerce_to_list(
1✔
38
        value: Any,
39
        expected_type: type) -> Any:
40
    """
41
    Coerce a value into a list when the expected type annotation indicates
42
    a list or tuple.
43

44
    Parameters
45
    ----------
46
    value : Any
47
        The input value to be coerced.
48
    expected_type : type
49
        The expected type annotation (e.g., `File`, `List[File]`,
50
        `Dict[str, Directory]`, `Union[str, Directory]`).
51

52
    Returns
53
    -------
54
    typed_value : Any
55
        The coerced value, with `list` converted to `list`.
56

57
    Notes
58
    -----
59
    - Comma-separated strings (e.g., ``"a,b,c"``) are split into lists.
60
    - Single non-list values are wrapped into a list.
61
    - Existing lists or tuples are returned as lists.
62
    """
63
    if value is None:
1✔
64
        return value
1✔
65

66
    origin = get_origin(expected_type)
1✔
67

68
    if origin in {list, tuple}:
1✔
69
        if isinstance(value, str) and "," in value:
1✔
70
            return value.split(",")
1✔
71
        if not isinstance(value, (list, tuple)):
1✔
UNCOV
72
            return [value]
×
73
        return list(value)
1✔
74

75
    return value
1✔
76

77

78
def coerce_to_path(
1✔
79
        value: Any,
80
        expected_type: type) -> Any:
81
    """
82
    Recursively convert values to `pathlib.Path` based on expected type
83
    annotations.
84

85
    Parameters
86
    ----------
87
    value : Any
88
        The input value to be coerced.
89
    expected_type : type
90
        The expected type annotation (e.g., `File`, `List[File]`,
91
        `Dict[str, Directory]`, `Union[str, Directory]`).
92

93
    Returns
94
    -------
95
    typed_value : Any
96
        The coerced value, with `File` and `Directory` converted to
97
        `pathlib.Path`.
98
    """
99
    origin = get_origin(expected_type)
1✔
100
    args = get_args(expected_type)
1✔
101

102
    if value is None:
1✔
103
        return value
1✔
104

105
    if expected_type in {File, Directory}:
1✔
106
        return Path(value).resolve()
1✔
107

108
    if origin is Union and (File in args or Directory in args):
1✔
UNCOV
109
        return Path(value).resolve()
×
110

111
    if origin in {list, tuple, set} and args:
1✔
112
        container_type = origin
1✔
113
        inner_type = args[0]
1✔
114
        return container_type(coerce_to_path(inner_value, inner_type)
1✔
115
                              for inner_value in value)
116

117
    if origin is dict and len(args) == 2:
1✔
118
        _, val_type = args
1✔
119
        return {key: coerce_to_path(val, val_type)
1✔
120
                for key, val in value.items()}
121

122
    return value
1✔
123

124

125
def parse_bids_keys(
1✔
126
        bids_path: File,
127
        full_path: bool = False,
128
        check_run: bool = False) -> dict[str]:
129
    """
130
    Parse BIDS entities and modality from a filename or path with validation.
131

132
    This function extracts BIDS entities (e.g., subject, session, task,
133
    run) from a BIDS-compliant filename or full path. It also identifies the
134
    modality and applies default values when certain entities are missing.
135

136
    When the `ses` entity is absent, it defaults to "01". This provides ensures
137
    consistent downstream file handling.
138

139
    When the `run` entity is absent, a deterministic 5-digit identifier is
140
    generated from the filename using UUID. This produces a short, stable
141
    hash so that the same filename always yields the same default run value.
142

143
    Parameters
144
    ----------
145
    bids_path : File
146
        The BIDS file to parse.
147
    full_path: bool
148
        If True, extract entities from the full input path rather than
149
        only the filename. Default is False.
150
    check_run: bool
151
        If True, checks whether the current run value appears more
152
        than once, assigns a UUID-style fallback if needed, and warns if even
153
        that fallback is not unique. Default is False.
154

155
    Returns
156
    -------
157
    entities : dict[str]
158
        A dictionary containing the parsed BIDS entities and the detected
159
        modality. Missing entities such as `ses` and `run` are filled with
160
        default values.
161

162
    Notes
163
    -----
164
    If the BIDS file name does not contain the `run` key a warn message is
165
    displayed.
166
    """
167
    # Extract the filename from the path id necessary
168
    filename = str(bids_path) if full_path else bids_path.name
1✔
169

170
    # Regex pattern for BIDS entities
171
    entity_pattern = (
1✔
172
        r"(?P<entity>(sub|ses|task|acq|run|echo|rec|dir|mod|ce|part|space|res|"
173
        r"recording))"
174
        r"-(?P<value>[^_/]+)"
175
    )
176
    entities = {}
1✔
177
    for match in re.finditer(entity_pattern, filename):
1✔
178
        entity = match.group("entity")
1✔
179
        value = match.group("value")
1✔
180
        entities[entity] = value
1✔
181

182
    # Extract modality (suffix before extension)
183
    suffix_pattern = (
1✔
184
        r"_(?P<modality>[a-zA-Z0-9]+)(?=\.(nii|nii\.gz|json|tsv|edf|vhdr"
185
        r"|eeg|bvec|bval|csv|mat|xml))"
186
    )
187
    modality_match = re.search(suffix_pattern, filename)
1✔
188
    if modality_match:
1✔
189
        entities["modality"] = modality_match.group("modality")
1✔
190

191
    # Update modality
192
    if "mod" not in entities and "modality" in entities:
1✔
193
        entities["mod"] = entities["modality"]
1✔
194

195
    # Define default values for missing entities
196
    defaults = {
1✔
197
        "ses": "01",
198
        "run": make_run_id(filename)[1],
199
    }
200

201
    # Fill in missing entities with defaults
202
    run_in_entities = "run" in entities
1✔
203
    for key, default in defaults.items():
1✔
204
        entities.setdefault(key, default)
1✔
205

206
    # Check integrity
207
    if check_run:
1✔
208
        status = check_run_fn(bids_path, entities, full_path)
1✔
209
        if run_in_entities and not status:
1✔
UNCOV
210
            print_info(
×
211
                "Multiple files with same run ID detected, using UUID instead."
212
            )
UNCOV
213
            entities["run"] = defaults["run"]
×
UNCOV
214
            status = check_run_fn(bids_path, entities, full_path)
×
215
        if not status:
1✔
UNCOV
216
            print_warn(
×
217
                f"The generated UUID is not unique: {bids_path}"
218
            )
219
    elif not run_in_entities:
1✔
220
        print_warn(
1✔
221
            f"BIDS file name does not contain run key: {filename}"
222
        )
223

224
    return entities
1✔
225

226

227
def check_run_fn(
1✔
228
        bids_path: File,
229
        entities: dict[str],
230
        full_path: bool = False) -> bool:
231
    """
232
    Scan the folder containing a BIDS file and verify that the run entity
233
    associated with the file appears exactly once among all matching files.
234

235
    Parameters
236
    ----------
237
    bids_path : File
238
        A BIDS file.
239
    entities : dict[str]
240
        Dictionary of parsed BIDS entities for the file, including the
241
        modality.
242
    full_path : bool
243
        If True, extract entities from the full path instead of only the
244
        filename. Default False.
245

246
    Returns
247
    -------
248
    bool
249
        True if the run identifier occurs exactly once among all matching
250
        files in the folder (or zero for virtual data), False otherwise.
251
    """
252
    filename = str(bids_path) if full_path else bids_path.name
1✔
253
    ext = "".join(bids_path.suffixes)
1✔
254
    entity_pattern = (
1✔
255
        r"(?P<entity>(run))"
256
        r"-(?P<value>[^_/]+)"
257
    )
258
    pattern = f"*sub-*{entities['modality']}*{ext}"
1✔
259

260
    all_entities = []
1✔
261
    for bids_path_ in bids_path.parent.glob(pattern):
1✔
262
        filename_ = str(bids_path_) if full_path else bids_path_.name
1✔
263
        entities_ = {"filename": filename_}
1✔
264

265
        # Extract run entity if present
266
        for match in re.finditer(entity_pattern, str(bids_path_)):
1✔
UNCOV
267
            entity = match.group("entity")
×
UNCOV
268
            value = match.group("value")
×
UNCOV
269
            entities_[entity] = value
×
270

271
        # If run is missing, generate one
272
        if "run" not in entities_:
1✔
273
            entities_["run"] = make_run_id(filename)[1]
1✔
274

275
        all_entities.append(entities_)
1✔
276

277
    # Count how many times the current file's run appears
278
    all_run_ids = [item["run"] for item in all_entities]
1✔
279
    count = all_run_ids.count(entities["run"])
1✔
280

281
    return count in (0, 1)
1✔
282

283

284
def make_run_id(
1✔
285
        filename: str) -> tuple[str, str]:
286
    """
287
    Generate a deterministic identifier and a 5-digit short code from a
288
    filename.
289

290
    This function computes a UUIDv5 using the URL namespace and the provided
291
    filename, converts the UUID to its integer representation, and returns both
292
    the full integer-based code and its first five digits. The result is stable
293
    and reproducible: the same filename always produces the same values.
294

295
    Parameters
296
    ----------
297
    filename : str
298
        The filename used as the seed for generating the identifiers.
299

300
    Returns
301
    -------
302
    code : str
303
        The full integer representation of the UUIDv5 derived from the
304
        filename.
305
    short_code : str
306
        The first five digits of the UUID-derived code, used as a compact ID.
307
    """
308
    code = str(uuid.uuid5(uuid.NAMESPACE_URL, filename).int)
1✔
309
    return code, code[:5]
1✔
310

311

312
def sidecar_from_file(
1✔
313
        image_file: File) -> File:
314
    """
315
    Infers the corresponding JSON sidecar file for a given NIfTI image file.
316

317
    This function checks that the input file has a ``.nii.gz`` extension and
318
    attempts to locate a sidecar ``.json`` file with the same base name. If
319
    either condition fails, it raises a ValueError.
320

321
    Parameters
322
    ----------
323
    image_file : File
324
        The NIfTI image file for which to infer the JSON sidecar.
325

326
    Returns
327
    -------
328
    sidecar_file : File
329
        Path to the inferred JSON sidecar file.
330

331
    Raises
332
    ------
333
    ValueError
334
        If the input file does not have a `.nii.gz` extension or if the
335
        corresponding JSON sidecar file does not exist.
336

337
    Examples
338
    --------
339
    >>> from pathlib import Path
340
    >>> from brainprep.utils import sidecar_from_file
341
    >>>
342
    >>> image_file = Path("/tmp/sub-01_T1w.nii.gz")
343
    >>> sidecar_file = Path("/tmp/sub-01_T1w.json")
344
    >>> sidecar_file.touch()
345
    >>>
346
    >>> sidecar_from_file(image_file)
347
    PosixPath('/tmp/sub-01_T1w.json')
348
    """
349
    if not str(image_file).endswith(".nii.gz"):
1✔
UNCOV
350
        raise ValueError(
×
351
            f"Input image file must be in NIIGZ format: {image_file}"
352
        )
353
    sidecar_file = Path(str(image_file).replace(".nii.gz", ".json"))
1✔
354
    if not sidecar_file.is_file():
1✔
UNCOV
355
        raise ValueError(
×
356
            f"Sidecar inferred from input image file not found: {sidecar_file}"
357
        )
358
    return sidecar_file
1✔
359

360

361
def find_stack_level() -> int:
1✔
362
    """
363
    Return the index of the first stack frame outside the ``brainprep``
364
    package.
365

366
    This function walks backward through the current call stack and finds the
367
    first frame whose file path does not belong to the ``brainprep`` package
368
    directory. Test files (i.e., files whose names start with ``test_``) are
369
    always treated as external. This is useful for producing cleaner warnings
370
    and error messages by pointing to user code rather than internal library
371
    frames.
372

373
    Returns
374
    -------
375
    int
376
        The number of internal frames to skip before reaching user code.
377

378
    Notes
379
    -----
380
    Adapted from the pandas codebase.
381

382
    Examples
383
    --------
384
    >>> import warnings
385
    >>> from brainprep.utils import find_stack_level
386
    >>>
387
    >>> def load_data(path):
388
    ...     if not path.exists():
389
    ...         warnings.warn(
390
    ...             "The provided path does not exist.",
391
    ...             stacklevel=find_stack_level()
392
    ...         )
393
    """
UNCOV
394
    import brainprep
×
395

UNCOV
396
    pkg_dir = Path(brainprep.__file__).parent
×
397

398
    # https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow
UNCOV
399
    frame = inspect.currentframe()
×
UNCOV
400
    try:
×
UNCOV
401
        n = 0
×
UNCOV
402
        while frame:
×
UNCOV
403
            filename = inspect.getfile(frame)
×
UNCOV
404
            is_test_file = Path(filename).name.startswith("test_")
×
UNCOV
405
            in_nilearn_code = filename.startswith(str(pkg_dir))
×
UNCOV
406
            if not in_nilearn_code or is_test_file:
×
UNCOV
407
                break
×
UNCOV
408
            frame = frame.f_back
×
UNCOV
409
            n += 1
×
410
    finally:
411
        # See note in
412
        # https://docs.python.org/3/library/inspect.html#inspect.Traceback
UNCOV
413
        del frame
×
UNCOV
414
    return n
×
415

416

417
def find_first_occurrence(
1✔
418
        input_file: Path,
419
        target: str) -> Path:
420
    """
421
    Return the closest parent directory whose name matches `target`.
422

423
    Parameters
424
    ----------
425
    input_file : Path
426
        Starting path (file or directory).
427
    target : str
428
        Name of the directory to search for.
429

430
    Returns
431
    -------
432
    Path
433
        The first parent directory named `target`.
434

435
    Raises
436
    ------
437
    ValueError
438
        If no parent directory with the given name is found.
439
    """
440
    for parent in input_file.parents:
1✔
441
        if parent.name == target:
1✔
442
            return parent
1✔
443

UNCOV
444
    raise ValueError(
×
445
        f"Unable to find target '{target}' in parents of {input_file}"
446
    )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc