• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Ouranosinc / xclim / 13501709820

24 Feb 2025 03:31PM UTC coverage: 89.942% (-0.03%) from 89.97%
13501709820

push

github

web-flow
Bump the python group with 3 updates (#2085)

9613 of 10688 relevant lines covered (89.94%)

6.82 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

75.09
/src/xclim/testing/utils.py
1
"""
2
Testing and Tutorial Utilities' Module
3
======================================
4
"""
5

6
from __future__ import annotations
8✔
7

8
import importlib.resources as ilr
8✔
9
import logging
8✔
10
import os
8✔
11
import platform
8✔
12
import re
8✔
13
import sys
8✔
14
import time
8✔
15
import warnings
8✔
16
from collections.abc import Callable, Sequence
8✔
17
from datetime import datetime as dt
8✔
18
from functools import wraps
8✔
19
from importlib import import_module
8✔
20
from io import StringIO
8✔
21
from pathlib import Path
8✔
22
from shutil import copytree
8✔
23
from typing import IO, TextIO
8✔
24
from urllib.error import HTTPError, URLError
8✔
25
from urllib.parse import urljoin, urlparse
8✔
26
from urllib.request import urlretrieve
8✔
27

28
from filelock import FileLock
8✔
29
from packaging.version import Version
8✔
30
from xarray import Dataset
8✔
31
from xarray import open_dataset as _open_dataset
8✔
32

33
import xclim
8✔
34
from xclim import __version__ as __xclim_version__
8✔
35

36
try:
8✔
37
    import pytest
8✔
38
    from pytest_socket import SocketBlockedError
8✔
39
except ImportError:
×
40
    pytest = None
×
41
    SocketBlockedError = None
×
42

43
try:
8✔
44
    import pooch
8✔
45
except ImportError:
×
46
    warnings.warn("The `pooch` library is not installed. The default cache directory for testing data will not be set.")
×
47
    pooch = None
×
48

49

50
logger = logging.getLogger("xclim")
8✔
51

52

53
__all__ = [
8✔
54
    "TESTDATA_BRANCH",
55
    "TESTDATA_CACHE_DIR",
56
    "TESTDATA_REPO_URL",
57
    "audit_url",
58
    "default_testdata_cache",
59
    "default_testdata_repo_url",
60
    "default_testdata_version",
61
    "gather_testing_data",
62
    "list_input_variables",
63
    "nimbus",
64
    "open_dataset",
65
    "populate_testing_data",
66
    "publish_release_notes",
67
    "run_doctests",
68
    "show_versions",
69
    "testing_setup_warnings",
70
]
71

72
default_testdata_version = "v2025.1.8"
8✔
73
"""Default version of the testing data to use when fetching datasets."""
8✔
74

75
default_testdata_repo_url = "https://raw.githubusercontent.com/Ouranosinc/xclim-testdata/"
8✔
76
"""Default URL of the testing data repository to use when fetching datasets."""
8✔
77

78
try:
8✔
79
    default_testdata_cache = Path(pooch.os_cache("xclim-testdata"))
8✔
80
    """Default location for the testing data cache."""
8✔
81
except AttributeError:
×
82
    default_testdata_cache = None
×
83

84
TESTDATA_REPO_URL = str(os.getenv("XCLIM_TESTDATA_REPO_URL", default_testdata_repo_url))
8✔
85
"""
8✔
86
Sets the URL of the testing data repository to use when fetching datasets.
87

88
Notes
89
-----
90
When running tests locally, this can be set for both `pytest` and `tox` by exporting the variable:
91

92
.. code-block:: console
93

94
    $ export XCLIM_TESTDATA_REPO_URL="https://github.com/my_username/xclim-testdata"
95

96
or setting the variable at runtime:
97

98
.. code-block:: console
99

100
    $ env XCLIM_TESTDATA_REPO_URL="https://github.com/my_username/xclim-testdata" pytest
101
"""
102

103
TESTDATA_BRANCH = str(os.getenv("XCLIM_TESTDATA_BRANCH", default_testdata_version))
8✔
104
"""
8✔
105
Sets the branch of the testing data repository to use when fetching datasets.
106

107
Notes
108
-----
109
When running tests locally, this can be set for both `pytest` and `tox` by exporting the variable:
110

111
.. code-block:: console
112

113
    $ export XCLIM_TESTDATA_BRANCH="my_testing_branch"
114

115
or setting the variable at runtime:
116

117
.. code-block:: console
118

119
    $ env XCLIM_TESTDATA_BRANCH="my_testing_branch" pytest
120
"""
121

122
TESTDATA_CACHE_DIR = os.getenv("XCLIM_TESTDATA_CACHE_DIR", default_testdata_cache)
8✔
123
"""
8✔
124
Sets the directory to store the testing datasets.
125

126
If not set, the default location will be used (based on ``platformdirs``, see :func:`pooch.os_cache`).
127

128
Notes
129
-----
130
When running tests locally, this can be set for both `pytest` and `tox` by exporting the variable:
131

132
.. code-block:: console
133

134
    $ export XCLIM_TESTDATA_CACHE_DIR="/path/to/my/data"
135

136
or setting the variable at runtime:
137

138
.. code-block:: console
139

140
    $ env XCLIM_TESTDATA_CACHE_DIR="/path/to/my/data" pytest
141
"""
142

143

144
def list_input_variables(submodules: Sequence[str] | None = None, realms: Sequence[str] | None = None) -> dict:
8✔
145
    """
146
    List all possible variables names used in xclim's indicators.
147

148
    Made for development purposes. Parses all indicator parameters with the
149
    :py:attr:`xclim.core.utils.InputKind.VARIABLE` or `OPTIONAL_VARIABLE` kinds.
150

151
    Parameters
152
    ----------
153
    submodules : str, optional
154
        Restrict the output to indicators of a list of submodules only. Default None, which parses all indicators.
155
    realms : Sequence of str, optional
156
        Restrict the output to indicators of a list of realms only. Default None, which parses all indicators.
157

158
    Returns
159
    -------
160
    dict
161
        A mapping from variable name to indicator class.
162
    """
163
    from collections import defaultdict  # pylint: disable=import-outside-toplevel
8✔
164

165
    from xclim import indicators  # pylint: disable=import-outside-toplevel
8✔
166
    from xclim.core.indicator import registry  # pylint: disable=import-outside-toplevel
8✔
167
    from xclim.core.utils import InputKind  # pylint: disable=import-outside-toplevel
8✔
168

169
    submodules = submodules or [sub for sub in dir(indicators) if not sub.startswith("__")]
8✔
170
    realms = realms or ["atmos", "ocean", "land", "seaIce"]
8✔
171

172
    variables = defaultdict(list)
8✔
173
    for name, ind in registry.items():
8✔
174
        if "." in name:
8✔
175
            # external submodule, submodule name is prepended to registry key
176
            if name.split(".")[0] not in submodules:
8✔
177
                continue
8✔
178
        elif ind.realm not in submodules:
8✔
179
            # official indicator : realm == submodule
180
            continue
×
181
        if ind.realm not in realms:
8✔
182
            continue
8✔
183

184
        # ok we want this one.
185
        for varname, meta in ind._all_parameters.items():
8✔
186
            if meta.kind in [
8✔
187
                InputKind.VARIABLE,
188
                InputKind.OPTIONAL_VARIABLE,
189
            ]:
190
                var = meta.default or varname
8✔
191
                variables[var].append(ind)
8✔
192

193
    return variables
8✔
194

195

196
# Publishing Tools ###
197

198

199
def publish_release_notes(
8✔
200
    style: str = "md",
201
    file: os.PathLike[str] | StringIO | TextIO | None = None,
202
    changes: str | os.PathLike[str] | None = None,
203
) -> str | None:
204
    """
205
    Format release notes in Markdown or ReStructuredText.
206

207
    Parameters
208
    ----------
209
    style : {"rst", "md"}
210
        Use ReStructuredText formatting or Markdown. Default: Markdown.
211
    file : {os.PathLike, StringIO, TextIO}, optional
212
        If provided, prints to the given file-like object. Otherwise, returns a string.
213
    changes : str or os.PathLike[str], optional
214
        If provided, manually points to the file where the changelog can be found.
215
        Assumes a relative path otherwise.
216

217
    Returns
218
    -------
219
    str, optional
220
        If `file` not provided, the formatted release notes.
221

222
    Notes
223
    -----
224
    This function is used solely for development and packaging purposes.
225
    """
226
    if isinstance(changes, str | Path):
8✔
227
        changes_file = Path(changes).absolute()
8✔
228
    else:
229
        changes_file = Path(__file__).absolute().parents[3].joinpath("CHANGELOG.rst")
×
230

231
    if not changes_file.exists():
8✔
232
        raise FileNotFoundError("Changelog file not found in xclim folder tree.")
8✔
233

234
    with open(changes_file, encoding="utf-8") as hf:
8✔
235
        changes = hf.read()
8✔
236

237
    if style == "rst":
8✔
238
        hyperlink_replacements = {
8✔
239
            r":issue:`([0-9]+)`": r"`GH/\1 <https://github.com/Ouranosinc/xclim/issues/\1>`_",
240
            r":pull:`([0-9]+)`": r"`PR/\1 <https://github.com/Ouranosinc/xclim/pull/\>`_",
241
            r":user:`([a-zA-Z0-9_.-]+)`": r"`@\1 <https://github.com/\1>`_",
242
        }
243
    elif style == "md":
8✔
244
        hyperlink_replacements = {
8✔
245
            r":issue:`([0-9]+)`": r"[GH/\1](https://github.com/Ouranosinc/xclim/issues/\1)",
246
            r":pull:`([0-9]+)`": r"[PR/\1](https://github.com/Ouranosinc/xclim/pull/\1)",
247
            r":user:`([a-zA-Z0-9_.-]+)`": r"[@\1](https://github.com/\1)",
248
        }
249
    else:
250
        msg = f"Formatting style not supported: {style}"
8✔
251
        raise NotImplementedError(msg)
8✔
252

253
    for search, replacement in hyperlink_replacements.items():
8✔
254
        changes = re.sub(search, replacement, changes)
8✔
255

256
    if style == "md":
8✔
257
        changes = changes.replace("=========\nChangelog\n=========", "# Changelog")
8✔
258

259
        titles = {r"\n(.*?)\n([\-]{1,})": "-", r"\n(.*?)\n([\^]{1,})": "^"}
8✔
260
        for title_expression, level in titles.items():
8✔
261
            found = re.findall(title_expression, changes)
8✔
262
            for grouping in found:
8✔
263
                fixed_grouping = str(grouping[0]).replace("(", r"\(").replace(")", r"\)")
8✔
264
                search = rf"({fixed_grouping})\n([\{level}]{'{' + str(len(grouping[1])) + '}'})"
8✔
265
                replacement = f"{'##' if level == '-' else '###'} {grouping[0]}"
8✔
266
                changes = re.sub(search, replacement, changes)
8✔
267

268
        link_expressions = r"[\`]{1}([\w\s]+)\s<(.+)>`\_"
8✔
269
        found = re.findall(link_expressions, changes)
8✔
270
        for grouping in found:
8✔
271
            search = rf"`{grouping[0]} <.+>`\_"
8✔
272
            replacement = f"[{str(grouping[0]).strip()}]({grouping[1]})"
8✔
273
            changes = re.sub(search, replacement, changes)
8✔
274

275
    if not file:
8✔
276
        return changes
8✔
277
    if isinstance(file, Path | os.PathLike):
8✔
278
        with open(file, "w", encoding="utf-8") as f:
8✔
279
            print(changes, file=f)
8✔
280
    else:
281
        print(changes, file=file)
×
282
    return None
8✔
283

284

285
_xclim_deps = [
8✔
286
    "xclim",
287
    "xarray",
288
    "statsmodels",
289
    "sklearn",
290
    "scipy",
291
    "pint",
292
    "pandas",
293
    "numpy",
294
    "numba",
295
    "lmoments3",
296
    "jsonpickle",
297
    "flox",
298
    "dask",
299
    "cf_xarray",
300
    "cftime",
301
    "clisops",
302
    "click",
303
    "bottleneck",
304
    "boltons",
305
]
306

307

308
def show_versions(
8✔
309
    file: os.PathLike | StringIO | TextIO | None = None,
310
    deps: list[str] | None = None,
311
) -> str | None:
312
    """
313
    Print the versions of xclim and its dependencies.
314

315
    Parameters
316
    ----------
317
    file : {os.PathLike, StringIO, TextIO}, optional
318
        If provided, prints to the given file-like object. Otherwise, returns a string.
319
    deps : list of str, optional
320
        A list of dependencies to gather and print version information from.
321
        Otherwise, prints `xclim` dependencies.
322

323
    Returns
324
    -------
325
    str or None
326
        If `file` not provided, the versions of xclim and its dependencies.
327
    """
328
    dependencies: list[str]
329
    if deps is None:
8✔
330
        dependencies = _xclim_deps
8✔
331
    else:
332
        dependencies = deps
×
333

334
    dependency_versions = [(d, lambda mod: mod.__version__) for d in dependencies]
8✔
335

336
    deps_blob: list[tuple[str, str | None]] = []
8✔
337
    for modname, ver_f in dependency_versions:
8✔
338
        try:
8✔
339
            if modname in sys.modules:
8✔
340
                mod = sys.modules[modname]
8✔
341
            else:
342
                mod = import_module(modname)
8✔
343
        except (KeyError, ModuleNotFoundError):
8✔
344
            deps_blob.append((modname, None))
8✔
345
        else:
346
            try:
8✔
347
                ver = ver_f(mod)
8✔
348
                deps_blob.append((modname, ver))
8✔
349
            except AttributeError:
8✔
350
                deps_blob.append((modname, "installed"))
8✔
351

352
    modules_versions = "\n".join([f"{k}: {stat}" for k, stat in sorted(deps_blob)])
8✔
353

354
    installed_versions = [
8✔
355
        "INSTALLED VERSIONS",
356
        "------------------",
357
        f"python: {platform.python_version()}",
358
        f"{modules_versions}",
359
        f"Anaconda-based environment: {'yes' if Path(sys.base_prefix).joinpath('conda-meta').exists() else 'no'}",
360
    ]
361

362
    message = "\n".join(installed_versions)
8✔
363

364
    if not file:
8✔
365
        return message
8✔
366
    if isinstance(file, Path | os.PathLike):
8✔
367
        with open(file, "w", encoding="utf-8") as f:
8✔
368
            print(message, file=f)
8✔
369
    else:
370
        print(message, file=file)
×
371
    return None
8✔
372

373

374
# Test Data Utilities ###
375

376

377
def run_doctests():
8✔
378
    """Run the doctests for the module."""
379
    if pytest is None:
×
380
        raise ImportError(
×
381
            "The `pytest` package is required to run the doctests. "
382
            "You can install it with `pip install pytest` or `pip install xclim[dev]`."
383
        )
384

385
    cmd = [
×
386
        f"--rootdir={Path(__file__).absolute().parent}",
387
        "--numprocesses=0",
388
        "--xdoctest",
389
        f"{Path(__file__).absolute().parents[1]}",
390
    ]
391

392
    sys.exit(pytest.main(cmd))
×
393

394

395
def testing_setup_warnings():
8✔
396
    """Warn users about potential incompatibilities between xclim and xclim-testdata versions."""
397
    if re.match(r"^\d+\.\d+\.\d+$", __xclim_version__) and TESTDATA_BRANCH != default_testdata_version:
8✔
398
        # This does not need to be emitted on GitHub Workflows and ReadTheDocs
399
        if not os.getenv("CI") and not os.getenv("READTHEDOCS"):
×
400
            warnings.warn(
×
401
                f"`xclim` stable ({__xclim_version__}) is running tests against a non-default "
402
                f"branch of the testing data. It is possible that changes to the testing data may "
403
                f"be incompatible with some assertions in this version. "
404
                f"Please be sure to check {TESTDATA_REPO_URL} for more information.",
405
            )
406

407
    if re.match(r"^v\d+\.\d+\.\d+", TESTDATA_BRANCH):
8✔
408
        # Find the date of last modification of xclim source files to generate a calendar version
409
        install_date = dt.strptime(
8✔
410
            time.ctime(os.path.getmtime(xclim.__file__)),
411
            "%a %b %d %H:%M:%S %Y",
412
        )
413
        install_calendar_version = f"{install_date.year}.{install_date.month}.{install_date.day}"
8✔
414

415
        if Version(TESTDATA_BRANCH) > Version(install_calendar_version):
8✔
416
            warnings.warn(
×
417
                f"The installation date of `xclim` ({install_date.ctime()}) "
418
                f"predates the last release of testing data ({TESTDATA_BRANCH}). "
419
                "It is very likely that the testing data is incompatible with this build of `xclim`.",
420
            )
421

422

423
def load_registry(branch: str = TESTDATA_BRANCH, repo: str = TESTDATA_REPO_URL) -> dict[str, str]:
8✔
424
    """
425
    Load the registry file for the test data.
426

427
    Parameters
428
    ----------
429
    branch : str
430
        Branch of the repository to use when fetching testing datasets.
431
    repo : str
432
        URL of the repository to use when fetching testing datasets.
433

434
    Returns
435
    -------
436
    dict
437
        Dictionary of filenames and hashes.
438
    """
439
    if not repo.endswith("/"):
8✔
440
        repo = f"{repo}/"
×
441
    remote_registry = audit_url(
8✔
442
        urljoin(
443
            urljoin(repo, branch if branch.endswith("/") else f"{branch}/"),
444
            "data/registry.txt",
445
        )
446
    )
447

448
    if repo != default_testdata_repo_url:
8✔
449
        external_repo_name = urlparse(repo).path.split("/")[-2]
×
450
        external_branch_name = branch.split("/")[-1]
×
451
        registry_file = Path(
×
452
            str(ilr.files("xclim").joinpath(f"testing/registry.{external_repo_name}.{external_branch_name}.txt"))
453
        )
454
        urlretrieve(remote_registry, registry_file)  # noqa: S310
×
455

456
    elif branch != default_testdata_version:
8✔
457
        custom_registry_folder = Path(str(ilr.files("xclim").joinpath(f"testing/{branch}")))
×
458
        custom_registry_folder.mkdir(parents=True, exist_ok=True)
×
459
        registry_file = custom_registry_folder.joinpath("registry.txt")
×
460
        urlretrieve(remote_registry, registry_file)  # noqa: S310
×
461

462
    else:
463
        registry_file = Path(str(ilr.files("xclim").joinpath("testing/registry.txt")))
8✔
464

465
    if not registry_file.exists():
8✔
466
        raise FileNotFoundError(f"Registry file not found: {registry_file}")
×
467

468
    # Load the registry file
469
    with registry_file.open(encoding="utf-8") as f:
8✔
470
        registry = {line.split()[0]: line.split()[1] for line in f}
8✔
471
    return registry
8✔
472

473

474
def nimbus(  # noqa: PR01
8✔
475
    repo: str = TESTDATA_REPO_URL,
476
    branch: str = TESTDATA_BRANCH,
477
    cache_dir: str | Path = TESTDATA_CACHE_DIR,
478
    data_updates: bool = True,
479
):
480
    """
481
    Pooch registry instance for xclim test data.
482

483
    Parameters
484
    ----------
485
    repo : str
486
        URL of the repository to use when fetching testing datasets.
487
    branch : str
488
        Branch of repository to use when fetching testing datasets.
489
    cache_dir : str or Path
490
        The path to the directory where the data files are stored.
491
    data_updates : bool
492
        If True, allow updates to the data files. Default is True.
493

494
    Returns
495
    -------
496
    pooch.Pooch
497
        The Pooch instance for accessing the xclim testing data.
498

499
    Notes
500
    -----
501
    There are three environment variables that can be used to control the behaviour of this registry:
502
        - ``XCLIM_TESTDATA_CACHE_DIR``: If this environment variable is set, it will be used as the
503
          base directory to store the data files.
504
          The directory should be an absolute path (i.e., it should start with ``/``).
505
          Otherwise,the default location will be used (based on ``platformdirs``, see :py:func:`pooch.os_cache`).
506
        - ``XCLIM_TESTDATA_REPO_URL``: If this environment variable is set, it will be used as the URL of
507
          the repository to use when fetching datasets. Otherwise, the default repository will be used.
508
        - ``XCLIM_TESTDATA_BRANCH``: If this environment variable is set, it will be used as the branch of
509
          the repository to use when fetching datasets. Otherwise, the default branch will be used.
510

511
    Examples
512
    --------
513
    Using the registry to download a file:
514

515
    .. code-block:: python
516

517
        import xarray as xr
518
        from xclim.testing.helpers import nimbus
519

520
        example_file = nimbus().fetch("example.nc")
521
        data = xr.open_dataset(example_file)
522
    """
523
    if pooch is None:
8✔
524
        raise ImportError(
×
525
            "The `pooch` package is required to fetch the xclim testing data. "
526
            "You can install it with `pip install pooch` or `pip install xclim[dev]`."
527
        )
528
    if not repo.endswith("/"):
8✔
529
        repo = f"{repo}/"
×
530
    remote = audit_url(urljoin(urljoin(repo, branch if branch.endswith("/") else f"{branch}/"), "data"))
8✔
531

532
    _nimbus = pooch.create(
8✔
533
        path=cache_dir,
534
        base_url=remote,
535
        version=default_testdata_version,
536
        version_dev=branch,
537
        allow_updates=data_updates,
538
        registry=load_registry(branch=branch, repo=repo),
539
    )
540

541
    # Add a custom fetch method to the Pooch instance
542
    # Needed to address: https://github.com/readthedocs/readthedocs.org/issues/11763
543
    # Fix inspired by @bjlittle (https://github.com/bjlittle/geovista/pull/1202)
544
    _nimbus.fetch_diversion = _nimbus.fetch
8✔
545

546
    # Overload the fetch method to add user-agent headers
547
    @wraps(_nimbus.fetch_diversion)
8✔
548
    def _fetch(*args: str, **kwargs: bool | Callable) -> str:  # numpydoc ignore=GL08
8✔
549
        def _downloader(
8✔
550
            url: str,
551
            output_file: str | IO,
552
            poocher: pooch.Pooch,
553
            check_only: bool | None = False,
554
        ) -> None:
555
            """Download the file from the URL and save it to the save_path."""
556
            headers = {"User-Agent": f"xclim ({__xclim_version__})"}
×
557
            downloader = pooch.HTTPDownloader(headers=headers)
×
558
            return downloader(url, output_file, poocher, check_only=check_only)
×
559

560
        # default to our http/s downloader with user-agent headers
561
        kwargs.setdefault("downloader", _downloader)
8✔
562
        return _nimbus.fetch_diversion(*args, **kwargs)
8✔
563

564
    # Replace the fetch method with the custom fetch method
565
    _nimbus.fetch = _fetch
8✔
566

567
    return _nimbus
8✔
568

569

570
# FIXME: This function is soon to be deprecated.
571
# idea copied from raven that it borrowed from xclim that borrowed it from xarray that was borrowed from Seaborn
572
def open_dataset(
8✔
573
    name: str | os.PathLike[str],
574
    dap_url: str | None = None,
575
    branch: str = TESTDATA_BRANCH,
576
    repo: str = TESTDATA_REPO_URL,
577
    cache_dir: str | os.PathLike[str] | None = TESTDATA_CACHE_DIR,
578
    **kwargs,
579
) -> Dataset:
580
    r"""
581
    Open a dataset from the online GitHub-like repository.
582

583
    If a local copy is found then always use that to avoid network traffic.
584

585
    Parameters
586
    ----------
587
    name : str
588
        Name of the file containing the dataset.
589
    dap_url : str, optional
590
        URL to OPeNDAP folder where the data is stored. If supplied, supersedes github_url.
591
    branch : str
592
        Branch of the repository to use when fetching datasets.
593
    repo : str
594
        URL of the repository to use when fetching testing datasets.
595
    cache_dir : Path
596
        The directory in which to search for and write cached data.
597
    **kwargs : dict
598
        For NetCDF files, keywords passed to :py:func:`xarray.open_dataset`.
599

600
    Returns
601
    -------
602
    Union[Dataset, Path]
603
        The dataset.
604

605
    Raises
606
    ------
607
    OSError
608
        If the file is not found in the cache directory or cannot be read.
609

610
    See Also
611
    --------
612
    xarray.open_dataset : Open and read a dataset from a file or file-like object.
613
    """
614
    if cache_dir is None:
8✔
615
        raise ValueError(
×
616
            "The cache directory must be set. "
617
            "Please set the `cache_dir` parameter or the `XCLIM_DATA_DIR` environment variable."
618
        )
619

620
    if dap_url:
8✔
621
        dap_target = urljoin(dap_url, str(name))
×
622
        try:
×
623
            return _open_dataset(audit_url(dap_target, context="OPeNDAP"), **kwargs)
×
624
        except URLError:
×
625
            raise
×
626
        except OSError as err:
×
627
            msg = f"OPeNDAP file not read. Verify that the service is available: {dap_target}"
×
628
            raise OSError(msg) from err
×
629

630
    local_file = Path(cache_dir).joinpath(name)
8✔
631
    if not local_file.exists():
8✔
632
        try:
×
633
            local_file = nimbus(branch=branch, repo=repo, cache_dir=cache_dir).fetch(name)
×
634
        except OSError as err:
×
635
            msg = f"File not found locally. Verify that the testing data is available in remote: {local_file}"
×
636
            raise OSError(msg) from err
×
637
    try:
8✔
638
        ds = _open_dataset(local_file, **kwargs)
8✔
639
        return ds
8✔
640
    except OSError:
×
641
        raise
×
642

643

644
def populate_testing_data(
8✔
645
    temp_folder: Path | None = None,
646
    repo: str = TESTDATA_REPO_URL,
647
    branch: str = TESTDATA_BRANCH,
648
    local_cache: Path = TESTDATA_CACHE_DIR,
649
) -> None:
650
    """
651
    Populate the local cache with the testing data.
652

653
    Parameters
654
    ----------
655
    temp_folder : Path, optional
656
        Path to a temporary folder to use as the local cache. If not provided, the default location will be used.
657
    repo : str, optional
658
        URL of the repository to use when fetching testing datasets.
659
    branch : str, optional
660
        Branch of xclim-testdata to use when fetching testing datasets.
661
    local_cache : Path
662
        The path to the local cache. Defaults to the location set by the platformdirs library.
663
        The testing data will be downloaded to this local cache.
664
    """
665
    # Create the Pooch instance
666
    n = nimbus(repo=repo, branch=branch, cache_dir=temp_folder or local_cache)
7✔
667

668
    # Download the files
669
    errored_files = []
7✔
670
    for file in load_registry():
7✔
671
        try:
7✔
672
            n.fetch(file)
7✔
673
        except HTTPError:
×
674
            msg = f"File `{file}` not accessible in remote repository."
×
675
            logging.error(msg)
×
676
            errored_files.append(file)
×
677
        except SocketBlockedError as err:  # noqa
×
678
            msg = (
×
679
                "Unable to access registry file online. Testing suite is being run with `--disable-socket`. "
680
                "If you intend to run tests with this option enabled, please download the file beforehand with the "
681
                "following console command: `$ xclim prefetch_testing_data`."
682
            )
683
            raise SocketBlockedError(msg) from err
×
684
        else:
685
            logging.info("Files were downloaded successfully.")
7✔
686

687
    if errored_files:
7✔
688
        logging.error(
×
689
            "The following files were unable to be downloaded: %s",
690
            errored_files,
691
        )
692

693

694
def gather_testing_data(
8✔
695
    worker_cache_dir: str | os.PathLike[str] | Path,
696
    worker_id: str,
697
    _cache_dir: str | os.PathLike[str] | None = TESTDATA_CACHE_DIR,
698
) -> None:
699
    """
700
    Gather testing data across workers.
701

702
    Parameters
703
    ----------
704
    worker_cache_dir : str or Path
705
        The directory to store the testing data.
706
    worker_id : str
707
        The worker ID.
708
    _cache_dir : str or Path, optional
709
        The directory to store the testing data. Default is None.
710

711
    Raises
712
    ------
713
    ValueError
714
        If the cache directory is not set.
715
    FileNotFoundError
716
        If the testing data is not found.
717
    """
718
    if _cache_dir is None:
8✔
719
        raise ValueError(
×
720
            "The cache directory must be set. "
721
            "Please set the `cache_dir` parameter or the `XCLIM_DATA_DIR` environment variable."
722
        )
723
    cache_dir = Path(_cache_dir)
8✔
724

725
    if worker_id == "master":
8✔
726
        populate_testing_data(branch=TESTDATA_BRANCH)
×
727
    else:
728
        if platform.system() == "Windows":
8✔
729
            if not cache_dir.joinpath(default_testdata_version).exists():
1✔
730
                raise FileNotFoundError(
×
731
                    "Testing data not found and UNIX-style file-locking is not supported on Windows. "
732
                    "Consider running `$ xclim prefetch_testing_data` to download testing data beforehand."
733
                )
734
        else:
735
            cache_dir.mkdir(exist_ok=True, parents=True)
7✔
736
            lockfile = cache_dir.joinpath(".lock")
7✔
737
            test_data_being_written = FileLock(lockfile)
7✔
738
            with test_data_being_written:
7✔
739
                # This flag prevents multiple calls from re-attempting to download testing data in the same pytest run
740
                populate_testing_data(branch=TESTDATA_BRANCH)
7✔
741
                cache_dir.joinpath(".data_written").touch()
7✔
742
            with test_data_being_written.acquire():
7✔
743
                if lockfile.exists():
7✔
744
                    lockfile.unlink()
7✔
745
        copytree(cache_dir.joinpath(default_testdata_version), worker_cache_dir)
8✔
746

747

748
# Testing Utilities ###
749

750

751
def audit_url(url: str, context: str | None = None) -> str:
8✔
752
    """
753
    Check if the URL is well-formed.
754

755
    Parameters
756
    ----------
757
    url : str
758
        The URL to check.
759
    context : str, optional
760
        Additional context to include in the error message. Default is None.
761

762
    Returns
763
    -------
764
    str
765
        The URL if it is well-formed.
766

767
    Raises
768
    ------
769
    URLError
770
        If the URL is not well-formed.
771
    """
772
    msg = ""
8✔
773
    result = urlparse(url)
8✔
774
    if result.scheme == "http":
8✔
775
        msg = f"{context if context else ''} URL is not using secure HTTP: '{url}'".strip()
×
776
    if not all([result.scheme, result.netloc]):
8✔
777
        msg = f"{context if context else ''} URL is not well-formed: '{url}'".strip()
×
778

779
    if msg:
8✔
780
        logger.error(msg)
×
781
        raise URLError(msg)
×
782
    return url
8✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc