• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Ouranosinc / xclim / 11784708548

11 Nov 2024 07:09PM UTC coverage: 89.75% (+0.4%) from 89.398%
11784708548

Pull #1971

github

web-flow
Merge ebf45886b into d5aefa4a9
Pull Request #1971: Employ a `src`-based layout for code base

4 of 6 new or added lines in 2 files covered. (66.67%)

9378 of 10449 relevant lines covered (89.75%)

7.58 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

76.11
/src/xclim/testing/utils.py
1
"""
2
Testing and Tutorial Utilities' Module
3
======================================
4
"""
5

6
from __future__ import annotations
9✔
7

8
import importlib.resources as ilr
9✔
9
import logging
9✔
10
import os
9✔
11
import platform
9✔
12
import re
9✔
13
import sys
9✔
14
import time
9✔
15
import warnings
9✔
16
from collections.abc import Sequence
9✔
17
from datetime import datetime as dt
9✔
18
from importlib import import_module
9✔
19
from io import StringIO
9✔
20
from pathlib import Path
9✔
21
from shutil import copytree
9✔
22
from typing import TextIO
9✔
23
from urllib.error import HTTPError, URLError
9✔
24
from urllib.parse import urljoin, urlparse
9✔
25
from urllib.request import urlretrieve
9✔
26

27
from filelock import FileLock
9✔
28
from packaging.version import Version
9✔
29
from xarray import Dataset
9✔
30
from xarray import open_dataset as _open_dataset
9✔
31

32
import xclim
9✔
33
from xclim import __version__ as __xclim_version__
9✔
34

35
try:
9✔
36
    import pytest
9✔
37
    from pytest_socket import SocketBlockedError
9✔
38
except ImportError:
×
39
    pytest = None
×
40
    SocketBlockedError = None
×
41

42
try:
9✔
43
    import pooch
9✔
44
except ImportError:
×
45
    warnings.warn(
×
46
        "The `pooch` library is not installed. "
47
        "The default cache directory for testing data will not be set."
48
    )
49
    pooch = None
×
50

51

52
logger = logging.getLogger("xclim")
9✔
53

54

55
__all__ = [
9✔
56
    "TESTDATA_BRANCH",
57
    "TESTDATA_CACHE_DIR",
58
    "TESTDATA_REPO_URL",
59
    "audit_url",
60
    "default_testdata_cache",
61
    "default_testdata_repo_url",
62
    "default_testdata_version",
63
    "gather_testing_data",
64
    "list_input_variables",
65
    "nimbus",
66
    "open_dataset",
67
    "populate_testing_data",
68
    "publish_release_notes",
69
    "run_doctests",
70
    "show_versions",
71
    "testing_setup_warnings",
72
]
73

74
default_testdata_version = "v2024.8.23"
9✔
75
"""Default version of the testing data to use when fetching datasets."""
9✔
76

77
default_testdata_repo_url = (
9✔
78
    "https://raw.githubusercontent.com/Ouranosinc/xclim-testdata/"
79
)
80
"""Default URL of the testing data repository to use when fetching datasets."""
9✔
81

82
try:
9✔
83
    default_testdata_cache = Path(pooch.os_cache("xclim-testdata"))
9✔
84
    """Default location for the testing data cache."""
9✔
85
except AttributeError:
×
86
    default_testdata_cache = None
×
87

88
TESTDATA_REPO_URL = str(os.getenv("XCLIM_TESTDATA_REPO_URL", default_testdata_repo_url))
9✔
89
"""Sets the URL of the testing data repository to use when fetching datasets.
9✔
90

91
Notes
92
-----
93
When running tests locally, this can be set for both `pytest` and `tox` by exporting the variable:
94

95
.. code-block:: console
96

97
    $ export XCLIM_TESTDATA_REPO_URL="https://github.com/my_username/xclim-testdata"
98

99
or setting the variable at runtime:
100

101
.. code-block:: console
102

103
    $ env XCLIM_TESTDATA_REPO_URL="https://github.com/my_username/xclim-testdata" pytest
104
"""
105

106
TESTDATA_BRANCH = str(os.getenv("XCLIM_TESTDATA_BRANCH", default_testdata_version))
9✔
107
"""Sets the branch of the testing data repository to use when fetching datasets.
9✔
108

109
Notes
110
-----
111
When running tests locally, this can be set for both `pytest` and `tox` by exporting the variable:
112

113
.. code-block:: console
114

115
    $ export XCLIM_TESTDATA_BRANCH="my_testing_branch"
116

117
or setting the variable at runtime:
118

119
.. code-block:: console
120

121
    $ env XCLIM_TESTDATA_BRANCH="my_testing_branch" pytest
122
"""
123

124
TESTDATA_CACHE_DIR = os.getenv("XCLIM_TESTDATA_CACHE_DIR", default_testdata_cache)
9✔
125
"""Sets the directory to store the testing datasets.
9✔
126

127
If not set, the default location will be used (based on ``platformdirs``, see :func:`pooch.os_cache`).
128

129
Notes
130
-----
131
When running tests locally, this can be set for both `pytest` and `tox` by exporting the variable:
132

133
.. code-block:: console
134

135
    $ export XCLIM_TESTDATA_CACHE_DIR="/path/to/my/data"
136

137
or setting the variable at runtime:
138

139
.. code-block:: console
140

141
    $ env XCLIM_TESTDATA_CACHE_DIR="/path/to/my/data" pytest
142
"""
143

144

145
def list_input_variables(
9✔
146
    submodules: Sequence[str] | None = None, realms: Sequence[str] | None = None
147
) -> dict:
148
    """List all possible variables names used in xclim's indicators.
149

150
    Made for development purposes. Parses all indicator parameters with the
151
    :py:attr:`xclim.core.utils.InputKind.VARIABLE` or `OPTIONAL_VARIABLE` kinds.
152

153
    Parameters
154
    ----------
155
    realms: Sequence of str, optional
156
      Restrict the output to indicators of a list of realms only. Default None, which parses all indicators.
157
    submodules: str, optional
158
      Restrict the output to indicators of a list of submodules only. Default None, which parses all indicators.
159

160
    Returns
161
    -------
162
    dict
163
      A mapping from variable name to indicator class.
164
    """
165
    from collections import defaultdict  # pylint: disable=import-outside-toplevel
9✔
166

167
    from xclim import indicators  # pylint: disable=import-outside-toplevel
9✔
168
    from xclim.core.indicator import registry  # pylint: disable=import-outside-toplevel
9✔
169
    from xclim.core.utils import InputKind  # pylint: disable=import-outside-toplevel
9✔
170

171
    submodules = submodules or [
9✔
172
        sub for sub in dir(indicators) if not sub.startswith("__")
173
    ]
174
    realms = realms or ["atmos", "ocean", "land", "seaIce"]
9✔
175

176
    variables = defaultdict(list)
9✔
177
    for name, ind in registry.items():
9✔
178
        if "." in name:
9✔
179
            # external submodule, submodule name is prepended to registry key
180
            if name.split(".")[0] not in submodules:
9✔
181
                continue
9✔
182
        elif ind.realm not in submodules:
9✔
183
            # official indicator : realm == submodule
184
            continue
×
185
        if ind.realm not in realms:
9✔
186
            continue
9✔
187

188
        # ok we want this one.
189
        for varname, meta in ind._all_parameters.items():
9✔
190
            if meta.kind in [
9✔
191
                InputKind.VARIABLE,
192
                InputKind.OPTIONAL_VARIABLE,
193
            ]:
194
                var = meta.default or varname
9✔
195
                variables[var].append(ind)
9✔
196

197
    return variables
9✔
198

199

200
# Publishing Tools ###
201

202

203
def publish_release_notes(
9✔
204
    style: str = "md",
205
    file: os.PathLike[str] | StringIO | TextIO | None = None,
206
    changes: str | os.PathLike[str] | None = None,
207
) -> str | None:
208
    """Format release notes in Markdown or ReStructuredText.
209

210
    Parameters
211
    ----------
212
    style : {"rst", "md"}
213
        Use ReStructuredText formatting or Markdown. Default: Markdown.
214
    file : {os.PathLike, StringIO, TextIO}, optional
215
        If provided, prints to the given file-like object. Otherwise, returns a string.
216
    changes : str or os.PathLike[str], optional
217
        If provided, manually points to the file where the changelog can be found.
218
        Assumes a relative path otherwise.
219

220
    Returns
221
    -------
222
    str, optional
223

224
    Notes
225
    -----
226
    This function is used solely for development and packaging purposes.
227
    """
228
    if isinstance(changes, str | Path):
9✔
229
        changes_file = Path(changes).absolute()
9✔
230
    else:
NEW
231
        changes_file = Path(__file__).absolute().parents[3].joinpath("CHANGELOG.rst")
×
232

233
    if not changes_file.exists():
9✔
234
        raise FileNotFoundError("Changelog file not found in xclim folder tree.")
9✔
235

236
    with open(changes_file, encoding="utf-8") as hf:
9✔
237
        changes = hf.read()
9✔
238

239
    if style == "rst":
9✔
240
        hyperlink_replacements = {
9✔
241
            r":issue:`([0-9]+)`": r"`GH/\1 <https://github.com/Ouranosinc/xclim/issues/\1>`_",
242
            r":pull:`([0-9]+)`": r"`PR/\1 <https://github.com/Ouranosinc/xclim/pull/\>`_",
243
            r":user:`([a-zA-Z0-9_.-]+)`": r"`@\1 <https://github.com/\1>`_",
244
        }
245
    elif style == "md":
9✔
246
        hyperlink_replacements = {
9✔
247
            r":issue:`([0-9]+)`": r"[GH/\1](https://github.com/Ouranosinc/xclim/issues/\1)",
248
            r":pull:`([0-9]+)`": r"[PR/\1](https://github.com/Ouranosinc/xclim/pull/\1)",
249
            r":user:`([a-zA-Z0-9_.-]+)`": r"[@\1](https://github.com/\1)",
250
        }
251
    else:
252
        msg = f"Formatting style not supported: {style}"
9✔
253
        raise NotImplementedError(msg)
9✔
254

255
    for search, replacement in hyperlink_replacements.items():
9✔
256
        changes = re.sub(search, replacement, changes)
9✔
257

258
    if style == "md":
9✔
259
        changes = changes.replace("=========\nChangelog\n=========", "# Changelog")
9✔
260

261
        titles = {r"\n(.*?)\n([\-]{1,})": "-", r"\n(.*?)\n([\^]{1,})": "^"}
9✔
262
        for title_expression, level in titles.items():
9✔
263
            found = re.findall(title_expression, changes)
9✔
264
            for grouping in found:
9✔
265
                fixed_grouping = (
9✔
266
                    str(grouping[0]).replace("(", r"\(").replace(")", r"\)")
267
                )
268
                search = rf"({fixed_grouping})\n([\{level}]{'{' + str(len(grouping[1])) + '}'})"
9✔
269
                replacement = f"{'##' if level == '-' else '###'} {grouping[0]}"
9✔
270
                changes = re.sub(search, replacement, changes)
9✔
271

272
        link_expressions = r"[\`]{1}([\w\s]+)\s<(.+)>`\_"
9✔
273
        found = re.findall(link_expressions, changes)
9✔
274
        for grouping in found:
9✔
275
            search = rf"`{grouping[0]} <.+>`\_"
9✔
276
            replacement = f"[{str(grouping[0]).strip()}]({grouping[1]})"
9✔
277
            changes = re.sub(search, replacement, changes)
9✔
278

279
    if not file:
9✔
280
        return changes
9✔
281
    if isinstance(file, Path | os.PathLike):
9✔
282
        with open(file, "w", encoding="utf-8") as f:
9✔
283
            print(changes, file=f)
9✔
284
    else:
285
        print(changes, file=file)
×
286
    return None
9✔
287

288

289
_xclim_deps = [
9✔
290
    "xclim",
291
    "xarray",
292
    "statsmodels",
293
    "sklearn",
294
    "scipy",
295
    "pint",
296
    "pandas",
297
    "numpy",
298
    "numba",
299
    "lmoments3",
300
    "jsonpickle",
301
    "flox",
302
    "dask",
303
    "cf_xarray",
304
    "cftime",
305
    "clisops",
306
    "click",
307
    "bottleneck",
308
    "boltons",
309
]
310

311

312
def show_versions(
9✔
313
    file: os.PathLike | StringIO | TextIO | None = None,
314
    deps: list[str] | None = None,
315
) -> str | None:
316
    """Print the versions of xclim and its dependencies.
317

318
    Parameters
319
    ----------
320
    file : {os.PathLike, StringIO, TextIO}, optional
321
        If provided, prints to the given file-like object. Otherwise, returns a string.
322
    deps : list of str, optional
323
        A list of dependencies to gather and print version information from. Otherwise, prints `xclim` dependencies.
324

325
    Returns
326
    -------
327
    str or None
328
    """
329
    dependencies: list[str]
330
    if deps is None:
9✔
331
        dependencies = _xclim_deps
9✔
332
    else:
333
        dependencies = deps
×
334

335
    dependency_versions = [(d, lambda mod: mod.__version__) for d in dependencies]
9✔
336

337
    deps_blob: list[tuple[str, str | None]] = []
9✔
338
    for modname, ver_f in dependency_versions:
9✔
339
        try:
9✔
340
            if modname in sys.modules:
9✔
341
                mod = sys.modules[modname]
9✔
342
            else:
343
                mod = import_module(modname)
9✔
344
        except (KeyError, ModuleNotFoundError):
9✔
345
            deps_blob.append((modname, None))
9✔
346
        else:
347
            try:
9✔
348
                ver = ver_f(mod)
9✔
349
                deps_blob.append((modname, ver))
9✔
350
            except AttributeError:
9✔
351
                deps_blob.append((modname, "installed"))
9✔
352

353
    modules_versions = "\n".join([f"{k}: {stat}" for k, stat in sorted(deps_blob)])
9✔
354

355
    installed_versions = [
9✔
356
        "INSTALLED VERSIONS",
357
        "------------------",
358
        f"python: {platform.python_version()}",
359
        f"{modules_versions}",
360
        f"Anaconda-based environment: {'yes' if Path(sys.base_prefix).joinpath('conda-meta').exists() else 'no'}",
361
    ]
362

363
    message = "\n".join(installed_versions)
9✔
364

365
    if not file:
9✔
366
        return message
9✔
367
    if isinstance(file, Path | os.PathLike):
9✔
368
        with open(file, "w", encoding="utf-8") as f:
9✔
369
            print(message, file=f)
9✔
370
    else:
371
        print(message, file=file)
×
372
    return None
9✔
373

374

375
# Test Data Utilities ###
376

377

378
def run_doctests():
9✔
379
    """Run the doctests for the module."""
380
    if pytest is None:
×
381
        raise ImportError(
×
382
            "The `pytest` package is required to run the doctests. "
383
            "You can install it with `pip install pytest` or `pip install xclim[dev]`."
384
        )
385

386
    cmd = [
×
387
        f"--rootdir={Path(__file__).absolute().parent}",
388
        "--numprocesses=0",
389
        "--xdoctest",
390
        f"{Path(__file__).absolute().parents[1]}",
391
    ]
392

393
    sys.exit(pytest.main(cmd))
×
394

395

396
def testing_setup_warnings():
9✔
397
    """Warn users about potential incompatibilities between xclim and xclim-testdata versions."""
398
    if (
9✔
399
        re.match(r"^\d+\.\d+\.\d+$", __xclim_version__)
400
        and TESTDATA_BRANCH != default_testdata_version
401
    ):
402
        # This does not need to be emitted on GitHub Workflows and ReadTheDocs
403
        if not os.getenv("CI") and not os.getenv("READTHEDOCS"):
×
404
            warnings.warn(
×
405
                f"`xclim` stable ({__xclim_version__}) is running tests against a non-default branch of the testing data. "
406
                "It is possible that changes to the testing data may be incompatible with some assertions in this version. "
407
                f"Please be sure to check {TESTDATA_REPO_URL} for more information.",
408
            )
409

410
    if re.match(r"^v\d+\.\d+\.\d+", TESTDATA_BRANCH):
9✔
411
        # Find the date of last modification of xclim source files to generate a calendar version
412
        install_date = dt.strptime(
9✔
413
            time.ctime(os.path.getmtime(xclim.__file__)),
414
            "%a %b %d %H:%M:%S %Y",
415
        )
416
        install_calendar_version = (
9✔
417
            f"{install_date.year}.{install_date.month}.{install_date.day}"
418
        )
419

420
        if Version(TESTDATA_BRANCH) > Version(install_calendar_version):
9✔
421
            warnings.warn(
×
422
                f"The installation date of `xclim` ({install_date.ctime()}) "
423
                f"predates the last release of testing data ({TESTDATA_BRANCH}). "
424
                "It is very likely that the testing data is incompatible with this build of `xclim`.",
425
            )
426

427

428
def load_registry(
9✔
429
    branch: str = TESTDATA_BRANCH, repo: str = TESTDATA_REPO_URL
430
) -> dict[str, str]:
431
    """Load the registry file for the test data.
432

433
    Returns
434
    -------
435
    dict
436
        Dictionary of filenames and hashes.
437
    """
438
    remote_registry = audit_url(f"{repo}/{branch}/data/registry.txt")
9✔
439

440
    if branch != default_testdata_version:
9✔
441
        custom_registry_folder = Path(
×
442
            str(ilr.files("xclim").joinpath(f"testing/{branch}"))
443
        )
444
        custom_registry_folder.mkdir(parents=True, exist_ok=True)
×
445
        registry_file = custom_registry_folder.joinpath("registry.txt")
×
446
        urlretrieve(remote_registry, registry_file)  # noqa: S310
×
447

448
    elif repo != default_testdata_repo_url:
9✔
449
        registry_file = Path(str(ilr.files("xclim").joinpath("testing/registry.txt")))
×
450
        urlretrieve(remote_registry, registry_file)  # noqa: S310
×
451

452
    registry_file = Path(str(ilr.files("xclim").joinpath("testing/registry.txt")))
9✔
453
    if not registry_file.exists():
9✔
454
        raise FileNotFoundError(f"Registry file not found: {registry_file}")
×
455

456
    # Load the registry file
457
    with registry_file.open(encoding="utf-8") as f:
9✔
458
        registry = {line.split()[0]: line.split()[1] for line in f}
9✔
459
    return registry
9✔
460

461

462
def nimbus(  # noqa: PR01
9✔
463
    repo: str = TESTDATA_REPO_URL,
464
    branch: str = TESTDATA_BRANCH,
465
    cache_dir: str | Path = TESTDATA_CACHE_DIR,
466
    data_updates: bool = True,
467
):
468
    """Pooch registry instance for xclim test data.
469

470
    Parameters
471
    ----------
472
    repo : str
473
        URL of the repository to use when fetching testing datasets.
474
    branch : str
475
        Branch of repository to use when fetching testing datasets.
476
    cache_dir : str or Path
477
        The path to the directory where the data files are stored.
478
    data_updates : bool
479
        If True, allow updates to the data files. Default is True.
480

481
    Returns
482
    -------
483
    pooch.Pooch
484
        The Pooch instance for accessing the xclim testing data.
485

486
    Notes
487
    -----
488
    There are three environment variables that can be used to control the behaviour of this registry:
489
        - ``XCLIM_TESTDATA_CACHE_DIR``: If this environment variable is set, it will be used as the base directory to
490
          store the data files. The directory should be an absolute path (i.e., it should start with ``/``).
491
          Otherwise,the default location will be used (based on ``platformdirs``, see :py:func:`pooch.os_cache`).
492
        - ``XCLIM_TESTDATA_REPO_URL``: If this environment variable is set, it will be used as the URL of the repository
493
          to use when fetching datasets. Otherwise, the default repository will be used.
494
        - ``XCLIM_TESTDATA_BRANCH``: If this environment variable is set, it will be used as the branch of the repository
495
          to use when fetching datasets. Otherwise, the default branch will be used.
496

497
    Examples
498
    --------
499
    Using the registry to download a file:
500

501
    .. code-block:: python
502

503
        import xarray as xr
504
        from xclim.testing.helpers import nimbus
505

506
        example_file = nimbus().fetch("example.nc")
507
        data = xr.open_dataset(example_file)
508
    """
509
    if pooch is None:
9✔
510
        raise ImportError(
×
511
            "The `pooch` package is required to fetch the xclim testing data. "
512
            "You can install it with `pip install pooch` or `pip install xclim[dev]`."
513
        )
514

515
    remote = audit_url(f"{repo}/{branch}/data")
9✔
516
    return pooch.create(
9✔
517
        path=cache_dir,
518
        base_url=remote,
519
        version=default_testdata_version,
520
        version_dev=branch,
521
        allow_updates=data_updates,
522
        registry=load_registry(branch=branch, repo=repo),
523
    )
524

525

526
# idea copied from raven that it borrowed from xclim that borrowed it from xarray that was borrowed from Seaborn
527
def open_dataset(
9✔
528
    name: str | os.PathLike[str],
529
    dap_url: str | None = None,
530
    branch: str = TESTDATA_BRANCH,
531
    repo: str = TESTDATA_REPO_URL,
532
    cache_dir: str | os.PathLike[str] | None = TESTDATA_CACHE_DIR,
533
    **kwargs,
534
) -> Dataset:
535
    r"""Open a dataset from the online GitHub-like repository.
536

537
    If a local copy is found then always use that to avoid network traffic.
538

539
    Parameters
540
    ----------
541
    name : str
542
        Name of the file containing the dataset.
543
    dap_url : str, optional
544
        URL to OPeNDAP folder where the data is stored. If supplied, supersedes github_url.
545
    branch : str
546
        Branch of the repository to use when fetching datasets.
547
    repo: str
548
        URL of the repository to use when fetching testing datasets.
549
    cache_dir : Path
550
        The directory in which to search for and write cached data.
551
    \*\*kwargs
552
        For NetCDF files, keywords passed to :py:func:`xarray.open_dataset`.
553

554
    Returns
555
    -------
556
    Union[Dataset, Path]
557

558
    Raises
559
    ------
560
    OSError
561
        If the file is not found in the cache directory or cannot be read.
562

563
    See Also
564
    --------
565
    xarray.open_dataset
566
    """
567
    if cache_dir is None:
9✔
568
        raise ValueError(
×
569
            "The cache directory must be set. "
570
            "Please set the `cache_dir` parameter or the `XCLIM_DATA_DIR` environment variable."
571
        )
572

573
    if dap_url:
9✔
574
        dap_target = urljoin(dap_url, str(name))
×
575
        try:
×
576
            return _open_dataset(audit_url(dap_target, context="OPeNDAP"), **kwargs)
×
577
        except URLError:
×
578
            raise
×
579
        except OSError as err:
×
580
            msg = f"OPeNDAP file not read. Verify that the service is available: {dap_target}"
×
581
            raise OSError(msg) from err
×
582

583
    local_file = Path(cache_dir).joinpath(name)
9✔
584
    if not local_file.exists():
9✔
585
        try:
×
586
            local_file = nimbus(branch=branch, repo=repo, cache_dir=cache_dir).fetch(
×
587
                name
588
            )
589
        except OSError as err:
×
590
            msg = f"File not found locally. Verify that the testing data is available in remote: {local_file}"
×
591
            raise OSError(msg) from err
×
592
    try:
9✔
593
        ds = _open_dataset(local_file, **kwargs)
9✔
594
        return ds
9✔
595
    except OSError:
×
596
        raise
×
597

598

599
def populate_testing_data(
9✔
600
    temp_folder: Path | None = None,
601
    repo: str = TESTDATA_REPO_URL,
602
    branch: str = TESTDATA_BRANCH,
603
    local_cache: Path = TESTDATA_CACHE_DIR,
604
) -> None:
605
    """Populate the local cache with the testing data.
606

607
    Parameters
608
    ----------
609
    temp_folder : Path, optional
610
        Path to a temporary folder to use as the local cache. If not provided, the default location will be used.
611
    repo : str, optional
612
        URL of the repository to use when fetching testing datasets.
613
    branch : str, optional
614
        Branch of xclim-testdata to use when fetching testing datasets.
615
    local_cache : Path
616
        The path to the local cache. Defaults to the location set by the platformdirs library.
617
        The testing data will be downloaded to this local cache.
618

619
    Returns
620
    -------
621
    None
622
    """
623
    # Create the Pooch instance
624
    n = nimbus(repo=repo, branch=branch, cache_dir=temp_folder or local_cache)
8✔
625

626
    # Download the files
627
    errored_files = []
8✔
628
    for file in load_registry():
8✔
629
        try:
8✔
630
            n.fetch(file)
8✔
631
        except HTTPError:
×
632
            msg = f"File `{file}` not accessible in remote repository."
×
633
            logging.error(msg)
×
634
            errored_files.append(file)
×
635
        except SocketBlockedError as err:  # noqa
×
636
            msg = (
×
637
                "Unable to access registry file online. Testing suite is being run with `--disable-socket`. "
638
                "If you intend to run tests with this option enabled, please download the file beforehand with the "
639
                "following console command: `$ xclim prefetch_testing_data`."
640
            )
641
            raise SocketBlockedError(msg) from err
×
642
        else:
643
            logging.info("Files were downloaded successfully.")
8✔
644

645
    if errored_files:
8✔
646
        logging.error(
×
647
            "The following files were unable to be downloaded: %s",
648
            errored_files,
649
        )
650

651

652
def gather_testing_data(
9✔
653
    worker_cache_dir: str | os.PathLike[str] | Path,
654
    worker_id: str,
655
    _cache_dir: str | os.PathLike[str] | None = TESTDATA_CACHE_DIR,
656
):
657
    """Gather testing data across workers."""
658
    if _cache_dir is None:
9✔
659
        raise ValueError(
×
660
            "The cache directory must be set. "
661
            "Please set the `cache_dir` parameter or the `XCLIM_DATA_DIR` environment variable."
662
        )
663
    cache_dir = Path(_cache_dir)
9✔
664

665
    if worker_id == "master":
9✔
666
        populate_testing_data(branch=TESTDATA_BRANCH)
×
667
    else:
668
        if platform.system() == "Windows":
9✔
669
            if not cache_dir.joinpath(default_testdata_version).exists():
1✔
670
                raise FileNotFoundError(
×
671
                    "Testing data not found and UNIX-style file-locking is not supported on Windows. "
672
                    "Consider running `$ xclim prefetch_testing_data` to download testing data beforehand."
673
                )
674
        else:
675
            cache_dir.mkdir(exist_ok=True, parents=True)
8✔
676
            lockfile = cache_dir.joinpath(".lock")
8✔
677
            test_data_being_written = FileLock(lockfile)
8✔
678
            with test_data_being_written:
8✔
679
                # This flag prevents multiple calls from re-attempting to download testing data in the same pytest run
680
                populate_testing_data(branch=TESTDATA_BRANCH)
8✔
681
                cache_dir.joinpath(".data_written").touch()
8✔
682
            with test_data_being_written.acquire():
8✔
683
                if lockfile.exists():
8✔
684
                    lockfile.unlink()
8✔
685
        copytree(cache_dir.joinpath(default_testdata_version), worker_cache_dir)
9✔
686

687

688
# Testing Utilities ###
689

690

691
def audit_url(url: str, context: str | None = None) -> str:
9✔
692
    """Check if the URL is well-formed.
693

694
    Raises
695
    ------
696
    URLError
697
        If the URL is not well-formed.
698
    """
699
    msg = ""
9✔
700
    result = urlparse(url)
9✔
701
    if result.scheme == "http":
9✔
702
        msg = f"{context if context else ''} URL is not using secure HTTP: '{url}'".strip()
×
703
    if not all([result.scheme, result.netloc]):
9✔
704
        msg = f"{context if context else ''} URL is not well-formed: '{url}'".strip()
×
705

706
    if msg:
9✔
707
        logger.error(msg)
×
708
        raise URLError(msg)
×
709
    return url
9✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc