• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Ouranosinc / xclim / 12872165263

20 Jan 2025 04:24PM UTC coverage: 89.934% (-0.03%) from 89.962%
12872165263

push

github

web-flow
Bump tox-gh from 1.4.4 to 1.5.0 in the python group (#2052)

9524 of 10590 relevant lines covered (89.93%)

7.62 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

75.09
/src/xclim/testing/utils.py
1
"""
2
Testing and Tutorial Utilities' Module
3
======================================
4
"""
5

6
from __future__ import annotations
9✔
7

8
import importlib.resources as ilr
9✔
9
import logging
9✔
10
import os
9✔
11
import platform
9✔
12
import re
9✔
13
import sys
9✔
14
import time
9✔
15
import warnings
9✔
16
from collections.abc import Callable, Sequence
9✔
17
from datetime import datetime as dt
9✔
18
from functools import wraps
9✔
19
from importlib import import_module
9✔
20
from io import StringIO
9✔
21
from pathlib import Path
9✔
22
from shutil import copytree
9✔
23
from typing import IO, TextIO
9✔
24
from urllib.error import HTTPError, URLError
9✔
25
from urllib.parse import urljoin, urlparse
9✔
26
from urllib.request import urlretrieve
9✔
27

28
from filelock import FileLock
9✔
29
from packaging.version import Version
9✔
30
from xarray import Dataset
9✔
31
from xarray import open_dataset as _open_dataset
9✔
32

33
import xclim
9✔
34
from xclim import __version__ as __xclim_version__
9✔
35

36
try:
9✔
37
    import pytest
9✔
38
    from pytest_socket import SocketBlockedError
9✔
39
except ImportError:
×
40
    pytest = None
×
41
    SocketBlockedError = None
×
42

43
try:
9✔
44
    import pooch
9✔
45
except ImportError:
×
46
    warnings.warn(
×
47
        "The `pooch` library is not installed. "
48
        "The default cache directory for testing data will not be set."
49
    )
50
    pooch = None
×
51

52

53
logger = logging.getLogger("xclim")
9✔
54

55

56
__all__ = [
9✔
57
    "TESTDATA_BRANCH",
58
    "TESTDATA_CACHE_DIR",
59
    "TESTDATA_REPO_URL",
60
    "audit_url",
61
    "default_testdata_cache",
62
    "default_testdata_repo_url",
63
    "default_testdata_version",
64
    "gather_testing_data",
65
    "list_input_variables",
66
    "nimbus",
67
    "open_dataset",
68
    "populate_testing_data",
69
    "publish_release_notes",
70
    "run_doctests",
71
    "show_versions",
72
    "testing_setup_warnings",
73
]
74

75
default_testdata_version = "v2025.1.8"
9✔
76
"""Default version of the testing data to use when fetching datasets."""
9✔
77

78
default_testdata_repo_url = (
9✔
79
    "https://raw.githubusercontent.com/Ouranosinc/xclim-testdata/"
80
)
81
"""Default URL of the testing data repository to use when fetching datasets."""
9✔
82

83
try:
9✔
84
    default_testdata_cache = Path(pooch.os_cache("xclim-testdata"))
9✔
85
    """Default location for the testing data cache."""
9✔
86
except AttributeError:
×
87
    default_testdata_cache = None
×
88

89
TESTDATA_REPO_URL = str(os.getenv("XCLIM_TESTDATA_REPO_URL", default_testdata_repo_url))
9✔
90
"""
9✔
91
Sets the URL of the testing data repository to use when fetching datasets.
92

93
Notes
94
-----
95
When running tests locally, this can be set for both `pytest` and `tox` by exporting the variable:
96

97
.. code-block:: console
98

99
    $ export XCLIM_TESTDATA_REPO_URL="https://github.com/my_username/xclim-testdata"
100

101
or setting the variable at runtime:
102

103
.. code-block:: console
104

105
    $ env XCLIM_TESTDATA_REPO_URL="https://github.com/my_username/xclim-testdata" pytest
106
"""
107

108
TESTDATA_BRANCH = str(os.getenv("XCLIM_TESTDATA_BRANCH", default_testdata_version))
9✔
109
"""
9✔
110
Sets the branch of the testing data repository to use when fetching datasets.
111

112
Notes
113
-----
114
When running tests locally, this can be set for both `pytest` and `tox` by exporting the variable:
115

116
.. code-block:: console
117

118
    $ export XCLIM_TESTDATA_BRANCH="my_testing_branch"
119

120
or setting the variable at runtime:
121

122
.. code-block:: console
123

124
    $ env XCLIM_TESTDATA_BRANCH="my_testing_branch" pytest
125
"""
126

127
TESTDATA_CACHE_DIR = os.getenv("XCLIM_TESTDATA_CACHE_DIR", default_testdata_cache)
9✔
128
"""
9✔
129
Sets the directory to store the testing datasets.
130

131
If not set, the default location will be used (based on ``platformdirs``, see :func:`pooch.os_cache`).
132

133
Notes
134
-----
135
When running tests locally, this can be set for both `pytest` and `tox` by exporting the variable:
136

137
.. code-block:: console
138

139
    $ export XCLIM_TESTDATA_CACHE_DIR="/path/to/my/data"
140

141
or setting the variable at runtime:
142

143
.. code-block:: console
144

145
    $ env XCLIM_TESTDATA_CACHE_DIR="/path/to/my/data" pytest
146
"""
147

148

149
def list_input_variables(
9✔
150
    submodules: Sequence[str] | None = None, realms: Sequence[str] | None = None
151
) -> dict:
152
    """
153
    List all possible variables names used in xclim's indicators.
154

155
    Made for development purposes. Parses all indicator parameters with the
156
    :py:attr:`xclim.core.utils.InputKind.VARIABLE` or `OPTIONAL_VARIABLE` kinds.
157

158
    Parameters
159
    ----------
160
    submodules : str, optional
161
        Restrict the output to indicators of a list of submodules only. Default None, which parses all indicators.
162
    realms : Sequence of str, optional
163
        Restrict the output to indicators of a list of realms only. Default None, which parses all indicators.
164

165
    Returns
166
    -------
167
    dict
168
        A mapping from variable name to indicator class.
169
    """
170
    from collections import defaultdict  # pylint: disable=import-outside-toplevel
9✔
171

172
    from xclim import indicators  # pylint: disable=import-outside-toplevel
9✔
173
    from xclim.core.indicator import registry  # pylint: disable=import-outside-toplevel
9✔
174
    from xclim.core.utils import InputKind  # pylint: disable=import-outside-toplevel
9✔
175

176
    submodules = submodules or [
9✔
177
        sub for sub in dir(indicators) if not sub.startswith("__")
178
    ]
179
    realms = realms or ["atmos", "ocean", "land", "seaIce"]
9✔
180

181
    variables = defaultdict(list)
9✔
182
    for name, ind in registry.items():
9✔
183
        if "." in name:
9✔
184
            # external submodule, submodule name is prepended to registry key
185
            if name.split(".")[0] not in submodules:
9✔
186
                continue
9✔
187
        elif ind.realm not in submodules:
9✔
188
            # official indicator : realm == submodule
189
            continue
×
190
        if ind.realm not in realms:
9✔
191
            continue
9✔
192

193
        # ok we want this one.
194
        for varname, meta in ind._all_parameters.items():
9✔
195
            if meta.kind in [
9✔
196
                InputKind.VARIABLE,
197
                InputKind.OPTIONAL_VARIABLE,
198
            ]:
199
                var = meta.default or varname
9✔
200
                variables[var].append(ind)
9✔
201

202
    return variables
9✔
203

204

205
# Publishing Tools ###
206

207

208
def publish_release_notes(
9✔
209
    style: str = "md",
210
    file: os.PathLike[str] | StringIO | TextIO | None = None,
211
    changes: str | os.PathLike[str] | None = None,
212
) -> str | None:
213
    """
214
    Format release notes in Markdown or ReStructuredText.
215

216
    Parameters
217
    ----------
218
    style : {"rst", "md"}
219
        Use ReStructuredText formatting or Markdown. Default: Markdown.
220
    file : {os.PathLike, StringIO, TextIO}, optional
221
        If provided, prints to the given file-like object. Otherwise, returns a string.
222
    changes : str or os.PathLike[str], optional
223
        If provided, manually points to the file where the changelog can be found.
224
        Assumes a relative path otherwise.
225

226
    Returns
227
    -------
228
    str, optional
229
        If `file` not provided, the formatted release notes.
230

231
    Notes
232
    -----
233
    This function is used solely for development and packaging purposes.
234
    """
235
    if isinstance(changes, str | Path):
9✔
236
        changes_file = Path(changes).absolute()
9✔
237
    else:
238
        changes_file = Path(__file__).absolute().parents[3].joinpath("CHANGELOG.rst")
×
239

240
    if not changes_file.exists():
9✔
241
        raise FileNotFoundError("Changelog file not found in xclim folder tree.")
9✔
242

243
    with open(changes_file, encoding="utf-8") as hf:
9✔
244
        changes = hf.read()
9✔
245

246
    if style == "rst":
9✔
247
        hyperlink_replacements = {
9✔
248
            r":issue:`([0-9]+)`": r"`GH/\1 <https://github.com/Ouranosinc/xclim/issues/\1>`_",
249
            r":pull:`([0-9]+)`": r"`PR/\1 <https://github.com/Ouranosinc/xclim/pull/\>`_",
250
            r":user:`([a-zA-Z0-9_.-]+)`": r"`@\1 <https://github.com/\1>`_",
251
        }
252
    elif style == "md":
9✔
253
        hyperlink_replacements = {
9✔
254
            r":issue:`([0-9]+)`": r"[GH/\1](https://github.com/Ouranosinc/xclim/issues/\1)",
255
            r":pull:`([0-9]+)`": r"[PR/\1](https://github.com/Ouranosinc/xclim/pull/\1)",
256
            r":user:`([a-zA-Z0-9_.-]+)`": r"[@\1](https://github.com/\1)",
257
        }
258
    else:
259
        msg = f"Formatting style not supported: {style}"
9✔
260
        raise NotImplementedError(msg)
9✔
261

262
    for search, replacement in hyperlink_replacements.items():
9✔
263
        changes = re.sub(search, replacement, changes)
9✔
264

265
    if style == "md":
9✔
266
        changes = changes.replace("=========\nChangelog\n=========", "# Changelog")
9✔
267

268
        titles = {r"\n(.*?)\n([\-]{1,})": "-", r"\n(.*?)\n([\^]{1,})": "^"}
9✔
269
        for title_expression, level in titles.items():
9✔
270
            found = re.findall(title_expression, changes)
9✔
271
            for grouping in found:
9✔
272
                fixed_grouping = (
9✔
273
                    str(grouping[0]).replace("(", r"\(").replace(")", r"\)")
274
                )
275
                search = rf"({fixed_grouping})\n([\{level}]{'{' + str(len(grouping[1])) + '}'})"
9✔
276
                replacement = f"{'##' if level == '-' else '###'} {grouping[0]}"
9✔
277
                changes = re.sub(search, replacement, changes)
9✔
278

279
        link_expressions = r"[\`]{1}([\w\s]+)\s<(.+)>`\_"
9✔
280
        found = re.findall(link_expressions, changes)
9✔
281
        for grouping in found:
9✔
282
            search = rf"`{grouping[0]} <.+>`\_"
9✔
283
            replacement = f"[{str(grouping[0]).strip()}]({grouping[1]})"
9✔
284
            changes = re.sub(search, replacement, changes)
9✔
285

286
    if not file:
9✔
287
        return changes
9✔
288
    if isinstance(file, Path | os.PathLike):
9✔
289
        with open(file, "w", encoding="utf-8") as f:
9✔
290
            print(changes, file=f)
9✔
291
    else:
292
        print(changes, file=file)
×
293
    return None
9✔
294

295

296
_xclim_deps = [
9✔
297
    "xclim",
298
    "xarray",
299
    "statsmodels",
300
    "sklearn",
301
    "scipy",
302
    "pint",
303
    "pandas",
304
    "numpy",
305
    "numba",
306
    "lmoments3",
307
    "jsonpickle",
308
    "flox",
309
    "dask",
310
    "cf_xarray",
311
    "cftime",
312
    "clisops",
313
    "click",
314
    "bottleneck",
315
    "boltons",
316
]
317

318

319
def show_versions(
9✔
320
    file: os.PathLike | StringIO | TextIO | None = None,
321
    deps: list[str] | None = None,
322
) -> str | None:
323
    """
324
    Print the versions of xclim and its dependencies.
325

326
    Parameters
327
    ----------
328
    file : {os.PathLike, StringIO, TextIO}, optional
329
        If provided, prints to the given file-like object. Otherwise, returns a string.
330
    deps : list of str, optional
331
        A list of dependencies to gather and print version information from. Otherwise, prints `xclim` dependencies.
332

333
    Returns
334
    -------
335
    str or None
336
        If `file` not provided, the versions of xclim and its dependencies.
337
    """
338
    dependencies: list[str]
339
    if deps is None:
9✔
340
        dependencies = _xclim_deps
9✔
341
    else:
342
        dependencies = deps
×
343

344
    dependency_versions = [(d, lambda mod: mod.__version__) for d in dependencies]
9✔
345

346
    deps_blob: list[tuple[str, str | None]] = []
9✔
347
    for modname, ver_f in dependency_versions:
9✔
348
        try:
9✔
349
            if modname in sys.modules:
9✔
350
                mod = sys.modules[modname]
9✔
351
            else:
352
                mod = import_module(modname)
9✔
353
        except (KeyError, ModuleNotFoundError):
9✔
354
            deps_blob.append((modname, None))
9✔
355
        else:
356
            try:
9✔
357
                ver = ver_f(mod)
9✔
358
                deps_blob.append((modname, ver))
9✔
359
            except AttributeError:
9✔
360
                deps_blob.append((modname, "installed"))
9✔
361

362
    modules_versions = "\n".join([f"{k}: {stat}" for k, stat in sorted(deps_blob)])
9✔
363

364
    installed_versions = [
9✔
365
        "INSTALLED VERSIONS",
366
        "------------------",
367
        f"python: {platform.python_version()}",
368
        f"{modules_versions}",
369
        f"Anaconda-based environment: {'yes' if Path(sys.base_prefix).joinpath('conda-meta').exists() else 'no'}",
370
    ]
371

372
    message = "\n".join(installed_versions)
9✔
373

374
    if not file:
9✔
375
        return message
9✔
376
    if isinstance(file, Path | os.PathLike):
9✔
377
        with open(file, "w", encoding="utf-8") as f:
9✔
378
            print(message, file=f)
9✔
379
    else:
380
        print(message, file=file)
×
381
    return None
9✔
382

383

384
# Test Data Utilities ###
385

386

387
def run_doctests():
9✔
388
    """Run the doctests for the module."""
389
    if pytest is None:
×
390
        raise ImportError(
×
391
            "The `pytest` package is required to run the doctests. "
392
            "You can install it with `pip install pytest` or `pip install xclim[dev]`."
393
        )
394

395
    cmd = [
×
396
        f"--rootdir={Path(__file__).absolute().parent}",
397
        "--numprocesses=0",
398
        "--xdoctest",
399
        f"{Path(__file__).absolute().parents[1]}",
400
    ]
401

402
    sys.exit(pytest.main(cmd))
×
403

404

405
def testing_setup_warnings():
9✔
406
    """Warn users about potential incompatibilities between xclim and xclim-testdata versions."""
407
    if (
9✔
408
        re.match(r"^\d+\.\d+\.\d+$", __xclim_version__)
409
        and TESTDATA_BRANCH != default_testdata_version
410
    ):
411
        # This does not need to be emitted on GitHub Workflows and ReadTheDocs
412
        if not os.getenv("CI") and not os.getenv("READTHEDOCS"):
×
413
            warnings.warn(
×
414
                f"`xclim` stable ({__xclim_version__}) is running tests against a non-default branch of the testing data. "
415
                "It is possible that changes to the testing data may be incompatible with some assertions in this version. "
416
                f"Please be sure to check {TESTDATA_REPO_URL} for more information.",
417
            )
418

419
    if re.match(r"^v\d+\.\d+\.\d+", TESTDATA_BRANCH):
9✔
420
        # Find the date of last modification of xclim source files to generate a calendar version
421
        install_date = dt.strptime(
9✔
422
            time.ctime(os.path.getmtime(xclim.__file__)),
423
            "%a %b %d %H:%M:%S %Y",
424
        )
425
        install_calendar_version = (
9✔
426
            f"{install_date.year}.{install_date.month}.{install_date.day}"
427
        )
428

429
        if Version(TESTDATA_BRANCH) > Version(install_calendar_version):
9✔
430
            warnings.warn(
×
431
                f"The installation date of `xclim` ({install_date.ctime()}) "
432
                f"predates the last release of testing data ({TESTDATA_BRANCH}). "
433
                "It is very likely that the testing data is incompatible with this build of `xclim`.",
434
            )
435

436

437
def load_registry(
9✔
438
    branch: str = TESTDATA_BRANCH, repo: str = TESTDATA_REPO_URL
439
) -> dict[str, str]:
440
    """
441
    Load the registry file for the test data.
442

443
    Parameters
444
    ----------
445
    branch : str
446
        Branch of the repository to use when fetching testing datasets.
447
    repo : str
448
        URL of the repository to use when fetching testing datasets.
449

450
    Returns
451
    -------
452
    dict
453
        Dictionary of filenames and hashes.
454
    """
455
    if not repo.endswith("/"):
9✔
456
        repo = f"{repo}/"
×
457
    remote_registry = audit_url(
9✔
458
        urljoin(
459
            urljoin(repo, branch if branch.endswith("/") else f"{branch}/"),
460
            "data/registry.txt",
461
        )
462
    )
463

464
    if repo != default_testdata_repo_url:
9✔
465
        external_repo_name = urlparse(repo).path.split("/")[-2]
×
466
        external_branch_name = branch.split("/")[-1]
×
467
        registry_file = Path(
×
468
            str(
469
                ilr.files("xclim").joinpath(
470
                    f"testing/registry.{external_repo_name}.{external_branch_name}.txt"
471
                )
472
            )
473
        )
474
        urlretrieve(remote_registry, registry_file)  # noqa: S310
×
475

476
    elif branch != default_testdata_version:
9✔
477
        custom_registry_folder = Path(
×
478
            str(ilr.files("xclim").joinpath(f"testing/{branch}"))
479
        )
480
        custom_registry_folder.mkdir(parents=True, exist_ok=True)
×
481
        registry_file = custom_registry_folder.joinpath("registry.txt")
×
482
        urlretrieve(remote_registry, registry_file)  # noqa: S310
×
483

484
    else:
485
        registry_file = Path(str(ilr.files("xclim").joinpath("testing/registry.txt")))
9✔
486

487
    if not registry_file.exists():
9✔
488
        raise FileNotFoundError(f"Registry file not found: {registry_file}")
×
489

490
    # Load the registry file
491
    with registry_file.open(encoding="utf-8") as f:
9✔
492
        registry = {line.split()[0]: line.split()[1] for line in f}
9✔
493
    return registry
9✔
494

495

496
def nimbus(  # noqa: PR01
9✔
497
    repo: str = TESTDATA_REPO_URL,
498
    branch: str = TESTDATA_BRANCH,
499
    cache_dir: str | Path = TESTDATA_CACHE_DIR,
500
    data_updates: bool = True,
501
):
502
    """
503
    Pooch registry instance for xclim test data.
504

505
    Parameters
506
    ----------
507
    repo : str
508
        URL of the repository to use when fetching testing datasets.
509
    branch : str
510
        Branch of repository to use when fetching testing datasets.
511
    cache_dir : str or Path
512
        The path to the directory where the data files are stored.
513
    data_updates : bool
514
        If True, allow updates to the data files. Default is True.
515

516
    Returns
517
    -------
518
    pooch.Pooch
519
        The Pooch instance for accessing the xclim testing data.
520

521
    Notes
522
    -----
523
    There are three environment variables that can be used to control the behaviour of this registry:
524
        - ``XCLIM_TESTDATA_CACHE_DIR``: If this environment variable is set, it will be used as the base directory to
525
          store the data files. The directory should be an absolute path (i.e., it should start with ``/``).
526
          Otherwise,the default location will be used (based on ``platformdirs``, see :py:func:`pooch.os_cache`).
527
        - ``XCLIM_TESTDATA_REPO_URL``: If this environment variable is set, it will be used as the URL of the repository
528
          to use when fetching datasets. Otherwise, the default repository will be used.
529
        - ``XCLIM_TESTDATA_BRANCH``: If this environment variable is set, it will be used as the branch of the repository
530
          to use when fetching datasets. Otherwise, the default branch will be used.
531

532
    Examples
533
    --------
534
    Using the registry to download a file:
535

536
    .. code-block:: python
537

538
        import xarray as xr
539
        from xclim.testing.helpers import nimbus
540

541
        example_file = nimbus().fetch("example.nc")
542
        data = xr.open_dataset(example_file)
543
    """
544
    if pooch is None:
9✔
545
        raise ImportError(
×
546
            "The `pooch` package is required to fetch the xclim testing data. "
547
            "You can install it with `pip install pooch` or `pip install xclim[dev]`."
548
        )
549
    if not repo.endswith("/"):
9✔
550
        repo = f"{repo}/"
×
551
    remote = audit_url(
9✔
552
        urljoin(urljoin(repo, branch if branch.endswith("/") else f"{branch}/"), "data")
553
    )
554

555
    _nimbus = pooch.create(
9✔
556
        path=cache_dir,
557
        base_url=remote,
558
        version=default_testdata_version,
559
        version_dev=branch,
560
        allow_updates=data_updates,
561
        registry=load_registry(branch=branch, repo=repo),
562
    )
563

564
    # Add a custom fetch method to the Pooch instance
565
    # Needed to address: https://github.com/readthedocs/readthedocs.org/issues/11763
566
    # Fix inspired by @bjlittle (https://github.com/bjlittle/geovista/pull/1202)
567
    _nimbus.fetch_diversion = _nimbus.fetch
9✔
568

569
    # Overload the fetch method to add user-agent headers
570
    @wraps(_nimbus.fetch_diversion)
9✔
571
    def _fetch(*args: str, **kwargs: bool | Callable) -> str:  # numpydoc ignore=GL08
9✔
572

573
        def _downloader(
9✔
574
            url: str,
575
            output_file: str | IO,
576
            poocher: pooch.Pooch,
577
            check_only: bool | None = False,
578
        ) -> None:
579
            """Download the file from the URL and save it to the save_path."""
580
            headers = {"User-Agent": f"xclim ({__xclim_version__})"}
×
581
            downloader = pooch.HTTPDownloader(headers=headers)
×
582
            return downloader(url, output_file, poocher, check_only=check_only)
×
583

584
        # default to our http/s downloader with user-agent headers
585
        kwargs.setdefault("downloader", _downloader)
9✔
586
        return _nimbus.fetch_diversion(*args, **kwargs)
9✔
587

588
    # Replace the fetch method with the custom fetch method
589
    _nimbus.fetch = _fetch
9✔
590

591
    return _nimbus
9✔
592

593

594
# FIXME: This function is soon to be deprecated.
595
# idea copied from raven that it borrowed from xclim that borrowed it from xarray that was borrowed from Seaborn
596
def open_dataset(
9✔
597
    name: str | os.PathLike[str],
598
    dap_url: str | None = None,
599
    branch: str = TESTDATA_BRANCH,
600
    repo: str = TESTDATA_REPO_URL,
601
    cache_dir: str | os.PathLike[str] | None = TESTDATA_CACHE_DIR,
602
    **kwargs,
603
) -> Dataset:
604
    r"""
605
    Open a dataset from the online GitHub-like repository.
606

607
    If a local copy is found then always use that to avoid network traffic.
608

609
    Parameters
610
    ----------
611
    name : str
612
        Name of the file containing the dataset.
613
    dap_url : str, optional
614
        URL to OPeNDAP folder where the data is stored. If supplied, supersedes github_url.
615
    branch : str
616
        Branch of the repository to use when fetching datasets.
617
    repo : str
618
        URL of the repository to use when fetching testing datasets.
619
    cache_dir : Path
620
        The directory in which to search for and write cached data.
621
    **kwargs : dict
622
        For NetCDF files, keywords passed to :py:func:`xarray.open_dataset`.
623

624
    Returns
625
    -------
626
    Union[Dataset, Path]
627
        The dataset.
628

629
    Raises
630
    ------
631
    OSError
632
        If the file is not found in the cache directory or cannot be read.
633

634
    See Also
635
    --------
636
    xarray.open_dataset : Open and read a dataset from a file or file-like object.
637
    """
638
    if cache_dir is None:
9✔
639
        raise ValueError(
×
640
            "The cache directory must be set. "
641
            "Please set the `cache_dir` parameter or the `XCLIM_DATA_DIR` environment variable."
642
        )
643

644
    if dap_url:
9✔
645
        dap_target = urljoin(dap_url, str(name))
×
646
        try:
×
647
            return _open_dataset(audit_url(dap_target, context="OPeNDAP"), **kwargs)
×
648
        except URLError:
×
649
            raise
×
650
        except OSError as err:
×
651
            msg = f"OPeNDAP file not read. Verify that the service is available: {dap_target}"
×
652
            raise OSError(msg) from err
×
653

654
    local_file = Path(cache_dir).joinpath(name)
9✔
655
    if not local_file.exists():
9✔
656
        try:
×
657
            local_file = nimbus(branch=branch, repo=repo, cache_dir=cache_dir).fetch(
×
658
                name
659
            )
660
        except OSError as err:
×
661
            msg = f"File not found locally. Verify that the testing data is available in remote: {local_file}"
×
662
            raise OSError(msg) from err
×
663
    try:
9✔
664
        ds = _open_dataset(local_file, **kwargs)
9✔
665
        return ds
9✔
666
    except OSError:
×
667
        raise
×
668

669

670
def populate_testing_data(
9✔
671
    temp_folder: Path | None = None,
672
    repo: str = TESTDATA_REPO_URL,
673
    branch: str = TESTDATA_BRANCH,
674
    local_cache: Path = TESTDATA_CACHE_DIR,
675
) -> None:
676
    """
677
    Populate the local cache with the testing data.
678

679
    Parameters
680
    ----------
681
    temp_folder : Path, optional
682
        Path to a temporary folder to use as the local cache. If not provided, the default location will be used.
683
    repo : str, optional
684
        URL of the repository to use when fetching testing datasets.
685
    branch : str, optional
686
        Branch of xclim-testdata to use when fetching testing datasets.
687
    local_cache : Path
688
        The path to the local cache. Defaults to the location set by the platformdirs library.
689
        The testing data will be downloaded to this local cache.
690
    """
691
    # Create the Pooch instance
692
    n = nimbus(repo=repo, branch=branch, cache_dir=temp_folder or local_cache)
8✔
693

694
    # Download the files
695
    errored_files = []
8✔
696
    for file in load_registry():
8✔
697
        try:
8✔
698
            n.fetch(file)
8✔
699
        except HTTPError:
×
700
            msg = f"File `{file}` not accessible in remote repository."
×
701
            logging.error(msg)
×
702
            errored_files.append(file)
×
703
        except SocketBlockedError as err:  # noqa
×
704
            msg = (
×
705
                "Unable to access registry file online. Testing suite is being run with `--disable-socket`. "
706
                "If you intend to run tests with this option enabled, please download the file beforehand with the "
707
                "following console command: `$ xclim prefetch_testing_data`."
708
            )
709
            raise SocketBlockedError(msg) from err
×
710
        else:
711
            logging.info("Files were downloaded successfully.")
8✔
712

713
    if errored_files:
8✔
714
        logging.error(
×
715
            "The following files were unable to be downloaded: %s",
716
            errored_files,
717
        )
718

719

720
def gather_testing_data(
9✔
721
    worker_cache_dir: str | os.PathLike[str] | Path,
722
    worker_id: str,
723
    _cache_dir: str | os.PathLike[str] | None = TESTDATA_CACHE_DIR,
724
) -> None:
725
    """
726
    Gather testing data across workers.
727

728
    Parameters
729
    ----------
730
    worker_cache_dir : str or Path
731
        The directory to store the testing data.
732
    worker_id : str
733
        The worker ID.
734
    _cache_dir : str or Path, optional
735
        The directory to store the testing data. Default is None.
736

737
    Raises
738
    ------
739
    ValueError
740
        If the cache directory is not set.
741
    FileNotFoundError
742
        If the testing data is not found.
743
    """
744
    if _cache_dir is None:
9✔
745
        raise ValueError(
×
746
            "The cache directory must be set. "
747
            "Please set the `cache_dir` parameter or the `XCLIM_DATA_DIR` environment variable."
748
        )
749
    cache_dir = Path(_cache_dir)
9✔
750

751
    if worker_id == "master":
9✔
752
        populate_testing_data(branch=TESTDATA_BRANCH)
×
753
    else:
754
        if platform.system() == "Windows":
9✔
755
            if not cache_dir.joinpath(default_testdata_version).exists():
1✔
756
                raise FileNotFoundError(
×
757
                    "Testing data not found and UNIX-style file-locking is not supported on Windows. "
758
                    "Consider running `$ xclim prefetch_testing_data` to download testing data beforehand."
759
                )
760
        else:
761
            cache_dir.mkdir(exist_ok=True, parents=True)
8✔
762
            lockfile = cache_dir.joinpath(".lock")
8✔
763
            test_data_being_written = FileLock(lockfile)
8✔
764
            with test_data_being_written:
8✔
765
                # This flag prevents multiple calls from re-attempting to download testing data in the same pytest run
766
                populate_testing_data(branch=TESTDATA_BRANCH)
8✔
767
                cache_dir.joinpath(".data_written").touch()
8✔
768
            with test_data_being_written.acquire():
8✔
769
                if lockfile.exists():
8✔
770
                    lockfile.unlink()
8✔
771
        copytree(cache_dir.joinpath(default_testdata_version), worker_cache_dir)
9✔
772

773

774
# Testing Utilities ###
775

776

777
def audit_url(url: str, context: str | None = None) -> str:
9✔
778
    """
779
    Check if the URL is well-formed.
780

781
    Parameters
782
    ----------
783
    url : str
784
        The URL to check.
785
    context : str, optional
786
        Additional context to include in the error message. Default is None.
787

788
    Returns
789
    -------
790
    str
791
        The URL if it is well-formed.
792

793
    Raises
794
    ------
795
    URLError
796
        If the URL is not well-formed.
797
    """
798
    msg = ""
9✔
799
    result = urlparse(url)
9✔
800
    if result.scheme == "http":
9✔
801
        msg = f"{context if context else ''} URL is not using secure HTTP: '{url}'".strip()
×
802
    if not all([result.scheme, result.netloc]):
9✔
803
        msg = f"{context if context else ''} URL is not well-formed: '{url}'".strip()
×
804

805
    if msg:
9✔
806
        logger.error(msg)
×
807
        raise URLError(msg)
×
808
    return url
9✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc