• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Ouranosinc / xclim / 20307480662

17 Dec 2025 03:09PM UTC coverage: 92.066% (-0.04%) from 92.101%
20307480662

push

github

web-flow
Bump filelock from 3.18.0 to 3.20.1 in /CI in the pip group across 1 directory (#2287)

7844 of 8520 relevant lines covered (92.07%)

5.41 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

77.69
/src/xclim/testing/utils.py
1
"""
2
Testing and Tutorial Utilities' Module
3
======================================
4
"""
5

6
from __future__ import annotations
6✔
7

8
import importlib.metadata as ilm
6✔
9
import importlib.resources as ilr
6✔
10
import logging
6✔
11
import os
6✔
12
import platform
6✔
13
import re
6✔
14
import sys
6✔
15
import time
6✔
16
import warnings
6✔
17
from collections.abc import Callable, Iterable, Sequence
6✔
18
from datetime import datetime as dt
6✔
19
from functools import wraps
6✔
20
from importlib.metadata import PackageNotFoundError
6✔
21
from io import StringIO
6✔
22
from pathlib import Path
6✔
23
from shutil import copytree
6✔
24
from typing import IO, Any, TextIO
6✔
25
from urllib.error import HTTPError, URLError
6✔
26
from urllib.parse import urljoin, urlparse
6✔
27
from urllib.request import urlretrieve
6✔
28

29
from filelock import FileLock
6✔
30
from packaging.version import Version
6✔
31
from xarray import Dataset
6✔
32
from xarray import open_dataset as _open_dataset
6✔
33

34
import xclim
6✔
35
from xclim import __version__ as __xclim_version__
6✔
36

37
try:
6✔
38
    import pytest
6✔
39
    from pytest_socket import SocketBlockedError
6✔
40
except ImportError:
×
41
    pytest = None
×
42
    SocketBlockedError = None
×
43

44
try:
6✔
45
    import pooch
6✔
46
except ImportError:
×
47
    warnings.warn("The `pooch` library is not installed. The default cache directory for testing data will not be set.")
×
48
    pooch = None
×
49

50

51
logger = logging.getLogger("xclim")
6✔
52

53

54
__all__ = [
6✔
55
    "TESTDATA_BRANCH",
56
    "TESTDATA_CACHE_DIR",
57
    "TESTDATA_REPO_URL",
58
    "audit_url",
59
    "default_testdata_cache",
60
    "default_testdata_repo_url",
61
    "default_testdata_version",
62
    "gather_testing_data",
63
    "list_input_variables",
64
    "nimbus",
65
    "open_dataset",
66
    "populate_testing_data",
67
    "publish_release_notes",
68
    "run_doctests",
69
    "show_versions",
70
    "testing_setup_warnings",
71
]
72

73
default_testdata_version = "v2025.4.29"
6✔
74
"""Default version of the testing data to use when fetching datasets."""
6✔
75

76
default_testdata_repo_url = "https://raw.githubusercontent.com/Ouranosinc/xclim-testdata/"
6✔
77
"""Default URL of the testing data repository to use when fetching datasets."""
6✔
78

79
try:
6✔
80
    default_testdata_cache = Path(pooch.os_cache("xclim-testdata"))
6✔
81
    """Default location for the testing data cache."""
6✔
82
except (AttributeError, TypeError):
×
83
    default_testdata_cache = None
×
84

85
TESTDATA_REPO_URL = str(os.getenv("XCLIM_TESTDATA_REPO_URL", default_testdata_repo_url))
6✔
86
"""
6✔
87
Sets the URL of the testing data repository to use when fetching datasets.
88

89
Notes
90
-----
91
When running tests locally, this can be set for both `pytest` and `tox` by exporting the variable:
92

93
.. code-block:: console
94

95
    $ export XCLIM_TESTDATA_REPO_URL="https://github.com/my_username/xclim-testdata"
96

97
or setting the variable at runtime:
98

99
.. code-block:: console
100

101
    $ env XCLIM_TESTDATA_REPO_URL="https://github.com/my_username/xclim-testdata" pytest
102
"""
103

104
TESTDATA_BRANCH = str(os.getenv("XCLIM_TESTDATA_BRANCH", default_testdata_version))
6✔
105
"""
6✔
106
Sets the branch of the testing data repository to use when fetching datasets.
107

108
Notes
109
-----
110
When running tests locally, this can be set for both `pytest` and `tox` by exporting the variable:
111

112
.. code-block:: console
113

114
    $ export XCLIM_TESTDATA_BRANCH="my_testing_branch"
115

116
or setting the variable at runtime:
117

118
.. code-block:: console
119

120
    $ env XCLIM_TESTDATA_BRANCH="my_testing_branch" pytest
121
"""
122

123
TESTDATA_CACHE_DIR = os.getenv("XCLIM_TESTDATA_CACHE_DIR", default_testdata_cache)
6✔
124
"""
6✔
125
Sets the directory to store the testing datasets.
126

127
If not set, the default location will be used (based on ``platformdirs``, see :func:`pooch.os_cache`).
128

129
Notes
130
-----
131
When running tests locally, this can be set for both `pytest` and `tox` by exporting the variable:
132

133
.. code-block:: console
134

135
    $ export XCLIM_TESTDATA_CACHE_DIR="/path/to/my/data"
136

137
or setting the variable at runtime:
138

139
.. code-block:: console
140

141
    $ env XCLIM_TESTDATA_CACHE_DIR="/path/to/my/data" pytest
142
"""
143

144

145
def list_input_variables(submodules: Sequence[str] | None = None, realms: Sequence[str] | None = None) -> dict:
6✔
146
    """
147
    List all possible variables names used in xclim's indicators.
148

149
    Made for development purposes. Parses all indicator parameters with the
150
    :py:attr:`xclim.core.utils.InputKind.VARIABLE` or `OPTIONAL_VARIABLE` kinds.
151

152
    Parameters
153
    ----------
154
    submodules : str, optional
155
        Restrict the output to indicators of a list of submodules only. Default None, which parses all indicators.
156
    realms : Sequence of str, optional
157
        Restrict the output to indicators of a list of realms only. Default None, which parses all indicators.
158

159
    Returns
160
    -------
161
    dict
162
        A mapping from variable name to indicator class.
163
    """
164
    from collections import defaultdict  # pylint: disable=import-outside-toplevel
6✔
165

166
    from xclim import indicators  # pylint: disable=import-outside-toplevel
6✔
167
    from xclim.core.indicator import registry  # pylint: disable=import-outside-toplevel
6✔
168
    from xclim.core.utils import InputKind  # pylint: disable=import-outside-toplevel
6✔
169

170
    submodules = submodules or [sub for sub in dir(indicators) if not sub.startswith("__")]
6✔
171
    realms = realms or ["atmos", "ocean", "land", "seaIce"]
6✔
172

173
    variables = defaultdict(list)
6✔
174
    for name, ind in registry.items():
6✔
175
        if "." in name:
6✔
176
            # external submodule, submodule name is prepended to registry key
177
            if name.split(".")[0] not in submodules:
6✔
178
                continue
6✔
179
        elif ind.realm not in submodules:
6✔
180
            # official indicator : realm == submodule
181
            continue
×
182
        if ind.realm not in realms:
6✔
183
            continue
6✔
184

185
        # ok we want this one.
186
        for varname, meta in ind._all_parameters.items():
6✔
187
            if meta.kind in [
6✔
188
                InputKind.VARIABLE,
189
                InputKind.OPTIONAL_VARIABLE,
190
            ]:
191
                var = meta.default or varname
6✔
192
                variables[var].append(ind)
6✔
193

194
    return variables
6✔
195

196

197
# Publishing Tools ###
198

199

200
def publish_release_notes(
6✔
201
    style: str = "md",
202
    file: os.PathLike[str] | StringIO | TextIO | None = None,
203
    changes: str | os.PathLike[str] | None = None,
204
) -> str | None:
205
    """
206
    Format release notes in Markdown or ReStructuredText.
207

208
    Parameters
209
    ----------
210
    style : {"rst", "md"}
211
        Use ReStructuredText formatting or Markdown. Default: Markdown.
212
    file : {os.PathLike, StringIO, TextIO}, optional
213
        If provided, prints to the given file-like object. Otherwise, returns a string.
214
    changes : str or os.PathLike[str], optional
215
        If provided, manually points to the file where the changelog can be found.
216
        Assumes a relative path otherwise.
217

218
    Returns
219
    -------
220
    str, optional
221
        If `file` not provided, the formatted release notes.
222

223
    Notes
224
    -----
225
    This function is used solely for development and packaging purposes.
226
    """
227
    if isinstance(changes, str | Path):
6✔
228
        changes_file = Path(changes).absolute()
6✔
229
    else:
230
        changes_file = Path(__file__).absolute().parents[3].joinpath("CHANGELOG.rst")
×
231

232
    if not changes_file.exists():
6✔
233
        raise FileNotFoundError("Changelog file not found in xclim folder tree.")
6✔
234

235
    with open(changes_file, encoding="utf-8") as hf:
6✔
236
        changes = hf.read()
6✔
237

238
    if style == "rst":
6✔
239
        hyperlink_replacements = {
6✔
240
            r":issue:`([0-9]+)`": r"`GH/\1 <https://github.com/Ouranosinc/xclim/issues/\1>`_",
241
            r":pull:`([0-9]+)`": r"`PR/\1 <https://github.com/Ouranosinc/xclim/pull/\>`_",
242
            r":user:`([a-zA-Z0-9_.-]+)`": r"`@\1 <https://github.com/\1>`_",
243
        }
244
    elif style == "md":
6✔
245
        hyperlink_replacements = {
6✔
246
            r":issue:`([0-9]+)`": r"[GH/\1](https://github.com/Ouranosinc/xclim/issues/\1)",
247
            r":pull:`([0-9]+)`": r"[PR/\1](https://github.com/Ouranosinc/xclim/pull/\1)",
248
            r":user:`([a-zA-Z0-9_.-]+)`": r"[@\1](https://github.com/\1)",
249
        }
250
    else:
251
        msg = f"Formatting style not supported: {style}"
6✔
252
        raise NotImplementedError(msg)
6✔
253

254
    for search, replacement in hyperlink_replacements.items():
6✔
255
        changes = re.sub(search, replacement, changes)
6✔
256

257
    if style == "md":
6✔
258
        changes = changes.replace("=========\nChangelog\n=========", "# Changelog")
6✔
259

260
        titles = {r"\n(.*?)\n([\-]{1,})": "-", r"\n(.*?)\n([\^]{1,})": "^"}
6✔
261
        for title_expression, level in titles.items():
6✔
262
            found = re.findall(title_expression, changes)
6✔
263
            for grouping in found:
6✔
264
                fixed_grouping = str(grouping[0]).replace("(", r"\(").replace(")", r"\)")
6✔
265
                search = rf"({fixed_grouping})\n([\{level}]{'{' + str(len(grouping[1])) + '}'})"
6✔
266
                replacement = f"{'##' if level == '-' else '###'} {grouping[0]}"
6✔
267
                changes = re.sub(search, replacement, changes)
6✔
268

269
        link_expressions = r"[\`]{1}([\w\s]+)\s<(.+)>`\_"
6✔
270
        found = re.findall(link_expressions, changes)
6✔
271
        for grouping in found:
6✔
272
            search = rf"`{grouping[0]} <.+>`\_"
6✔
273
            replacement = f"[{str(grouping[0]).strip()}]({grouping[1]})"
6✔
274
            changes = re.sub(search, replacement, changes)
6✔
275

276
    if not file:
6✔
277
        return changes
6✔
278
    if isinstance(file, Path | os.PathLike):
6✔
279
        with open(file, "w", encoding="utf-8") as f:
6✔
280
            print(changes, file=f)
6✔
281
    else:
282
        print(changes, file=file)
×
283
    return None
6✔
284

285

286
_xclim_deps = [
6✔
287
    "xclim",
288
    "xarray",
289
    "statsmodels",
290
    "scikit-learn",
291
    "scipy",
292
    "pint",
293
    "pandas",
294
    "numpy",
295
    "numba",
296
    "lmoments3",
297
    "jsonpickle",
298
    "flox",
299
    "dask",
300
    "cf_xarray",
301
    "cftime",
302
    "clisops",
303
    "click",
304
    "bottleneck",
305
    "boltons",
306
]
307

308

309
def show_versions(
6✔
310
    file: os.PathLike | StringIO | TextIO | None = None,
311
    deps: Iterable[str] | None = None,
312
) -> str | None:
313
    """
314
    Print the versions of xclim and its dependencies.
315

316
    Parameters
317
    ----------
318
    file : {os.PathLike, StringIO, TextIO}, optional
319
        If provided, prints to the given file-like object. Otherwise, returns a string.
320
    deps : iterable of str, optional
321
        An iterable of dependencies to gather and print version information from.
322
        Otherwise, prints `xclim` dependencies.
323

324
    Returns
325
    -------
326
    str or None
327
        If `file` not provided, the versions of xclim and its dependencies.
328
    """
329
    dependencies: list[str]
330
    if deps is None:
6✔
331
        dependencies = _xclim_deps
6✔
332
    else:
333
        dependencies = deps
×
334

335
    dependency_versions = {}
6✔
336
    for d in dependencies:
6✔
337
        try:
6✔
338
            _version = ilm.version(d)
6✔
339
        except PackageNotFoundError:
6✔
340
            _version = None
6✔
341
        dependency_versions[d] = _version
6✔
342

343
    modules_versions = "\n".join([f"{k}: {stat}" for k, stat in sorted(dependency_versions.items())])
6✔
344

345
    installed_versions = [
6✔
346
        "INSTALLED VERSIONS",
347
        "------------------",
348
        f"python: {platform.python_version()}",
349
        f"{modules_versions}",
350
        f"Anaconda-based environment: {'yes' if Path(sys.base_prefix).joinpath('conda-meta').exists() else 'no'}",
351
    ]
352

353
    message = "\n".join(installed_versions)
6✔
354

355
    if not file:
6✔
356
        return message
6✔
357
    if isinstance(file, Path | os.PathLike):
6✔
358
        with open(file, "w", encoding="utf-8") as f:
6✔
359
            print(message, file=f)
6✔
360
    else:
361
        print(message, file=file)
×
362
    return None
6✔
363

364

365
# Test Data Utilities ###
366

367

368
def run_doctests():
6✔
369
    """Run the doctests for the module."""
370
    if pytest is None:
×
371
        raise ImportError(
×
372
            "The `pytest` package is required to run the doctests. "
373
            "You can install it with `pip install pytest` or `pip install xclim[dev]`."
374
        )
375

376
    cmd = [
×
377
        f"--rootdir={Path(__file__).absolute().parent}",
378
        "--numprocesses=0",
379
        "--xdoctest",
380
        f"{Path(__file__).absolute().parents[1]}",
381
    ]
382

383
    sys.exit(pytest.main(cmd))
×
384

385

386
def testing_setup_warnings():
6✔
387
    """Warn users about potential incompatibilities between xclim and xclim-testdata versions."""
388
    if re.match(r"^\d+\.\d+\.\d+$", __xclim_version__) and TESTDATA_BRANCH != default_testdata_version:
6✔
389
        # This does not need to be emitted on GitHub Workflows and ReadTheDocs
390
        if not os.getenv("CI") and not os.getenv("READTHEDOCS"):
×
391
            warnings.warn(
×
392
                f"`xclim` stable ({__xclim_version__}) is running tests against a non-default "
393
                f"branch of the testing data. It is possible that changes to the testing data may "
394
                f"be incompatible with some assertions in this version. "
395
                f"Please be sure to check {TESTDATA_REPO_URL} for more information.",
396
            )
397

398
    if re.match(r"^v\d+\.\d+\.\d+", TESTDATA_BRANCH):
6✔
399
        # Find the date of last modification of xclim source files to generate a calendar version
400
        install_date = dt.strptime(
6✔
401
            time.ctime(os.path.getmtime(xclim.__file__)),
402
            "%a %b %d %H:%M:%S %Y",
403
        )
404
        install_calendar_version = f"{install_date.year}.{install_date.month}.{install_date.day}"
6✔
405

406
        if Version(TESTDATA_BRANCH) > Version(install_calendar_version):
6✔
407
            warnings.warn(
×
408
                f"The installation date of `xclim` ({install_date.ctime()}) "
409
                f"predates the last release of testing data ({TESTDATA_BRANCH}). "
410
                "It is very likely that the testing data is incompatible with this build of `xclim`.",
411
            )
412

413

414
def load_registry(branch: str = TESTDATA_BRANCH, repo: str = TESTDATA_REPO_URL) -> dict[str, str]:
6✔
415
    """
416
    Load the registry file for the test data.
417

418
    Parameters
419
    ----------
420
    branch : str
421
        Branch of the repository to use when fetching testing datasets.
422
    repo : str
423
        URL of the repository to use when fetching testing datasets.
424

425
    Returns
426
    -------
427
    dict
428
        Dictionary of filenames and hashes.
429
    """
430
    if not repo.endswith("/"):
6✔
431
        repo = f"{repo}/"
×
432
    remote_registry = audit_url(
6✔
433
        urljoin(
434
            urljoin(repo, branch if branch.endswith("/") else f"{branch}/"),
435
            "data/registry.txt",
436
        )
437
    )
438

439
    if repo != default_testdata_repo_url:
6✔
440
        external_repo_name = urlparse(repo).path.split("/")[-2]
×
441
        external_branch_name = branch.split("/")[-1]
×
442
        registry_file = Path(
×
443
            str(ilr.files("xclim").joinpath(f"testing/registry.{external_repo_name}.{external_branch_name}.txt"))
444
        )
445
        urlretrieve(remote_registry, registry_file)  # noqa: S310
×
446

447
    elif branch != default_testdata_version:
6✔
448
        custom_registry_folder = Path(str(ilr.files("xclim").joinpath(f"testing/{branch}")))
×
449
        custom_registry_folder.mkdir(parents=True, exist_ok=True)
×
450
        registry_file = custom_registry_folder.joinpath("registry.txt")
×
451
        urlretrieve(remote_registry, registry_file)  # noqa: S310
×
452

453
    else:
454
        registry_file = Path(str(ilr.files("xclim").joinpath("testing/registry.txt")))
6✔
455

456
    if not registry_file.exists():
6✔
457
        raise FileNotFoundError(f"Registry file not found: {registry_file}")
×
458

459
    # Load the registry file
460
    with registry_file.open(encoding="utf-8") as f:
6✔
461
        registry = {line.split()[0]: line.split()[1] for line in f}
6✔
462
    return registry
6✔
463

464

465
def nimbus(
6✔
466
    repo: str = TESTDATA_REPO_URL,
467
    branch: str = TESTDATA_BRANCH,
468
    cache_dir: str | Path = TESTDATA_CACHE_DIR,
469
    allow_updates: bool = True,
470
):
471
    """
472
    Pooch registry instance for xclim test data.
473

474
    Parameters
475
    ----------
476
    repo : str
477
        URL of the repository to use when fetching testing datasets.
478
    branch : str
479
        Branch of repository to use when fetching testing datasets.
480
    cache_dir : str or Path
481
        The path to the directory where the data files are stored.
482
    allow_updates : bool
483
        If True, allow updates to the data files. Default is True.
484

485
    Returns
486
    -------
487
    pooch.Pooch
488
        The Pooch instance for accessing the xclim testing data.
489

490
    Notes
491
    -----
492
    There are three environment variables that can be used to control the behaviour of this registry:
493
        - ``XCLIM_TESTDATA_CACHE_DIR``: If this environment variable is set, it will be used as the
494
          base directory to store the data files.
495
          The directory should be an absolute path (i.e., it should start with ``/``).
496
          Otherwise, the default location will be used (based on ``platformdirs``, see :py:func:`pooch.os_cache`).
497
        - ``XCLIM_TESTDATA_REPO_URL``: If this environment variable is set, it will be used as the URL of
498
          the repository to use when fetching datasets. Otherwise, the default repository will be used.
499
        - ``XCLIM_TESTDATA_BRANCH``: If this environment variable is set, it will be used as the branch of
500
          the repository to use when fetching datasets. Otherwise, the default branch will be used.
501

502
    Examples
503
    --------
504
    Using the registry to download a file:
505

506
    .. code-block:: python
507

508
        import xarray as xr
509
        from xclim.testing.helpers import nimbus
510

511
        example_file = nimbus().fetch("example.nc")
512
        data = xr.open_dataset(example_file)
513
    """
514
    if pooch is None:
6✔
515
        raise ImportError(
×
516
            "The `pooch` package is required to fetch the xclim testing data. "
517
            "You can install it with `pip install pooch` or `pip install xclim[dev]`."
518
        )
519
    if not repo.endswith("/"):
6✔
520
        repo = f"{repo}/"
×
521
    remote = audit_url(urljoin(urljoin(repo, branch if branch.endswith("/") else f"{branch}/"), "data"))
6✔
522

523
    _nimbus = pooch.create(
6✔
524
        path=cache_dir,
525
        base_url=remote,
526
        version=default_testdata_version,
527
        version_dev=branch,
528
        allow_updates=allow_updates,
529
        registry=load_registry(branch=branch, repo=repo),
530
    )
531

532
    # Add a custom fetch method to the Pooch instance
533
    # Needed to address: https://github.com/readthedocs/readthedocs.org/issues/11763
534
    # Fix inspired by @bjlittle (https://github.com/bjlittle/geovista/pull/1202)
535
    _nimbus.fetch_diversion = _nimbus.fetch
6✔
536

537
    # Overload the fetch method to add user-agent headers
538
    @wraps(_nimbus.fetch_diversion)
6✔
539
    def _fetch(*args, **kwargs: bool | Callable) -> str:  # numpydoc ignore=GL08  # *args: str
6✔
540
        def _downloader(
6✔
541
            url: str,
542
            output_file: str | IO,
543
            poocher: pooch.Pooch,
544
            check_only: bool | None = False,
545
        ) -> None:
546
            """Download the file from the URL and save it to the save_path."""
547
            headers = {"User-Agent": f"xclim ({__xclim_version__})"}
×
548
            downloader = pooch.HTTPDownloader(headers=headers)
×
549
            return downloader(url, output_file, poocher, check_only=check_only)
×
550

551
        # default to our http/s downloader with user-agent headers
552
        kwargs.setdefault("downloader", _downloader)
6✔
553
        try:
6✔
554
            return _nimbus.fetch_diversion(*args, **kwargs)
6✔
555
        except SocketBlockedError as err:
×
556
            raise FileNotFoundError(
×
557
                "File was not found in the testing data cache and remote socket connections are disabled. "
558
                "You may need to download the testing data using `xclim prefetch_testing_data`."
559
            ) from err
560

561
    # Replace the fetch method with the custom fetch method
562
    _nimbus.fetch = _fetch
6✔
563

564
    return _nimbus
6✔
565

566

567
def open_dataset(name: str, nimbus_kwargs: dict[str, Path | str | bool] | None = None, **xr_kwargs: Any) -> Dataset:
6✔
568
    r"""
569
    Convenience function to open a dataset from the xclim testing data using the `nimbus` class.
570

571
    This is a thin wrapper around the `nimbus` class to make it easier to open xclim testing datasets.
572

573
    Parameters
574
    ----------
575
    name : str
576
        Name of the file containing the dataset.
577
    nimbus_kwargs : dict
578
        Keyword arguments passed to the nimbus function.
579
    **xr_kwargs : Any
580
        Keyword arguments passed to xarray.open_dataset.
581

582
    Returns
583
    -------
584
    xarray.Dataset
585
        The dataset.
586

587
    See Also
588
    --------
589
    xarray.open_dataset : Open and read a dataset from a file or file-like object.
590
    nimbus : Pooch wrapper for accessing the xclim testing data.
591

592
    Notes
593
    -----
594
    As of `xclim` v0.57.0, this function no longer supports the `dap_url` parameter. For OPeNDAP datasets, use
595
    `xarray.open_dataset` directly using the OPeNDAP URL with an appropriate backend installed (netCDF4, pydap, etc.).
596
    """
597
    if nimbus_kwargs is None:
6✔
598
        nimbus_kwargs = {}
×
599
    return _open_dataset(nimbus(**nimbus_kwargs).fetch(name), **xr_kwargs)
6✔
600

601

602
def populate_testing_data(
6✔
603
    temp_folder: Path | None = None,
604
    repo: str = TESTDATA_REPO_URL,
605
    branch: str = TESTDATA_BRANCH,
606
    local_cache: Path = TESTDATA_CACHE_DIR,
607
) -> None:
608
    """
609
    Populate the local cache with the testing data.
610

611
    Parameters
612
    ----------
613
    temp_folder : Path, optional
614
        Path to a temporary folder to use as the local cache. If not provided, the default location will be used.
615
    repo : str, optional
616
        URL of the repository to use when fetching testing datasets.
617
    branch : str, optional
618
        Branch of xclim-testdata to use when fetching testing datasets.
619
    local_cache : Path
620
        The path to the local cache. Defaults to the location set by the platformdirs library.
621
        The testing data will be downloaded to this local cache.
622
    """
623
    # Create the Pooch instance
624
    n = nimbus(repo=repo, branch=branch, cache_dir=temp_folder or local_cache)
6✔
625

626
    # Download the files
627
    errored_files = []
6✔
628
    for file in load_registry():
6✔
629
        try:
6✔
630
            n.fetch(file)
6✔
631
        except HTTPError:
×
632
            msg = f"File `{file}` not accessible in remote repository."
×
633
            logging.error(msg)
×
634
            errored_files.append(file)
×
635
        except SocketBlockedError as err:  # noqa
×
636
            msg = (
×
637
                "Unable to access registry file online. Testing suite is being run with `--disable-socket`. "
638
                "If you intend to run tests with this option enabled, please download the file beforehand with the "
639
                "following console command: `$ xclim prefetch_testing_data`."
640
            )
641
            raise SocketBlockedError(msg) from err
×
642
        else:
643
            logging.info("Files were downloaded successfully.")
6✔
644

645
    if errored_files:
6✔
646
        logging.error(
×
647
            "The following files were unable to be downloaded: %s",
648
            errored_files,
649
        )
650

651

652
def gather_testing_data(
6✔
653
    worker_cache_dir: str | os.PathLike[str] | Path,
654
    worker_id: str,
655
    _cache_dir: str | os.PathLike[str] | None = TESTDATA_CACHE_DIR,
656
) -> None:
657
    """
658
    Gather testing data across workers.
659

660
    Parameters
661
    ----------
662
    worker_cache_dir : str or Path
663
        The directory to store the testing data.
664
    worker_id : str
665
        The worker ID.
666
    _cache_dir : str or Path, optional
667
        The directory to store the testing data. Default is None.
668

669
    Raises
670
    ------
671
    ValueError
672
        If the cache directory is not set.
673
    FileNotFoundError
674
        If the testing data is not found.
675
    """
676
    if _cache_dir is None:
6✔
677
        raise ValueError(
×
678
            "The cache directory must be set. "
679
            "Please set the `cache_dir` parameter or the `XCLIM_DATA_DIR` environment variable."
680
        )
681
    cache_dir = Path(_cache_dir)
6✔
682

683
    if worker_id == "master":
6✔
684
        populate_testing_data(branch=TESTDATA_BRANCH)
×
685
    else:
686
        if platform.system() == "Windows":
6✔
687
            if not cache_dir.joinpath(default_testdata_version).exists():
×
688
                raise FileNotFoundError(
×
689
                    "Testing data not found and UNIX-style file-locking is not supported on Windows. "
690
                    "Consider running `$ xclim prefetch_testing_data` to download testing data beforehand."
691
                )
692
        else:
693
            cache_dir.mkdir(exist_ok=True, parents=True)
6✔
694
            lockfile = cache_dir.joinpath(".lock")
6✔
695
            test_data_being_written = FileLock(lockfile)
6✔
696
            with test_data_being_written:
6✔
697
                # This flag prevents multiple calls from re-attempting to download testing data in the same pytest run
698
                populate_testing_data(branch=TESTDATA_BRANCH)
6✔
699
                cache_dir.joinpath(".data_written").touch()
6✔
700
            with test_data_being_written.acquire():
6✔
701
                if lockfile.exists():
6✔
702
                    lockfile.unlink()
6✔
703
        copytree(cache_dir.joinpath(default_testdata_version), worker_cache_dir)
6✔
704

705

706
# Testing Utilities ###
707

708

709
def audit_url(url: str, context: str | None = None) -> str:
6✔
710
    """
711
    Check if the URL is well-formed.
712

713
    Parameters
714
    ----------
715
    url : str
716
        The URL to check.
717
    context : str, optional
718
        Additional context to include in the error message. Default is None.
719

720
    Returns
721
    -------
722
    str
723
        The URL if it is well-formed.
724

725
    Raises
726
    ------
727
    URLError
728
        If the URL is not well-formed.
729
    """
730
    msg = ""
6✔
731
    result = urlparse(url)
6✔
732
    if result.scheme == "http":
6✔
733
        msg = f"{context if context else ''} URL is not using secure HTTP: '{url}'".strip()
×
734
    if not all([result.scheme, result.netloc]):
6✔
735
        msg = f"{context if context else ''} URL is not well-formed: '{url}'".strip()
×
736

737
    if msg:
6✔
738
        logger.error(msg)
×
739
        raise URLError(msg)
×
740
    return url
6✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc