19147573697

Committed 06 Nov 2025 07:34PM UTC coverage: 20.335% (-0.2%) from 20.507%

Build # 19147573697

Build Type

Pull #258

github

Committed by

web-flow

Commit Message

Merge cbf57e240 into f3ea17ca6

Pull Request Pull Request #258: Bump the actions group in /.github/workflows with 6 updates + cruft update

Run Details

0 of 16 new or added lines in 7 files covered. (0.0%)

7 existing lines in 6 files now uncovered.

97 of 477 relevant lines covered (20.34%)

1.22 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

28.85

/src/xdatasets/utils.py

import getpass
import os
import sys
import tempfile
import urllib.request
import warnings
from functools import reduce
from pathlib import Path

import intake


catalog_path = "https://raw.githubusercontent.com/hydrocloudservices/catalogs/main/catalogs/main.yaml"


def open_dataset(
    name: str,
    catalog,
    **kws,  # noqa: F841
):
    r"""
    Open a dataset from the online public repository (requires internet).

    Parameters
    ----------
    name : str
        Name of the file containing the dataset.
        e.g. 'era5_reanalysis_single_levels'.
    catalog : str
        URL for the intake catalog which provides access to the datasets.
    \*\*kws : dict, optional
        Currently not used.

    See Also
    --------
    xarray.open_dataset

    Notes
    -----
    Available datasets:
        `"era5_reanalysis_single_levels"`: ERA5 reanalysis subset (t2m and tp).
        `"cehq"`: CEHQ flow and water levels observations.
    """
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        try:
            import intake  # noqa: F401
        except ImportError as e:
            raise ImportError(
                "tutorial.open_dataset depends on intake and intake-xarray to download and manage datasets."
                " To proceed please install intake and intake-xarray.",
            ) from e

        cat = catalog
        dataset_info = [
            (category, dataset_name) for category in cat._entries.keys() for dataset_name in cat[category]._entries.keys() if dataset_name == name
        ]

        data = reduce(lambda array, index: array[index], dataset_info, cat)

        # add proxy infos
        proxies = urllib.request.getproxies()
        storage_options = data.storage_options
        storage_options["config_kwargs"]["proxies"] = proxies

        if data.describe()["driver"][0] == "geopandasfile":
            data = data(storage_options=storage_options).read()
        elif data.describe()["driver"][0] == "zarr":
            data = data(storage_options=storage_options).to_dask()
        else:
            raise NotImplementedError(
                f"Dataset {name} is not available. Please request further datasets to our github issues pages",
            )
    return data


class HiddenPrints:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = Path(os.devnull).open("w")

    def __exit__(self, exc_type, exc_val, exc_tb):  # noqa: F841
        sys.stdout.close()
        sys.stdout = self._original_stdout


def cache_catalog(url):
    """
    Cache the catalog in the system's temporary folder for easier access.

    This is especially useful when working behind firewalls or if the remote server containing the yaml files is down.
    Looks for http_proxy/https_proxy environment variable if the request goes through a proxy.

    Parameters
    ----------
    url : str
        URL for the intake catalog which provides access to the datasets.
        While this library provides its own intake catalog, users have the option to provide their own catalog,
        which can be particularly beneficial for private datasets or if different configurations are needed.
    """
    proxies = urllib.request.getproxies()
    proxy = urllib.request.ProxyHandler(proxies)
    opener = urllib.request.build_opener(proxy)
    urllib.request.install_opener(opener)

    tmp_dir = Path(tempfile.gettempdir()).joinpath(getpass.getuser()).joinpath("catalogs")
    tmp_dir.mkdir(parents=True, exist_ok=True)
    main_catalog_path = tmp_dir.joinpath(Path(url).name)

    try:
        urllib.request.urlretrieve(url, main_catalog_path)  # noqa: S310
    except urllib.error.URLError as e:
        raise urllib.error.URLError(
            "Could not reach the catalog, perhaps due to the presence of a proxy."
            "Try adding proxy information to the environment variables as follows before"
            "running xdatasets :"
            "import os"
            "proxy = 'http://<proxy>:<port>'"
            "os.environ['http_proxy'] = proxy"
            "os.environ['https_proxy'] = proxy",
        ) from e

    for value in intake.open_catalog(main_catalog_path)._entries.values():
        # FIXME: entry_path only seems to work using os.path, and not pathlib.Path. Why is that?
        entry_path = f"{os.path.dirname(url)}/{Path(value.describe()['args']['path']).name}"  # noqa: PTH120
        urllib.request.urlretrieve(  # noqa: S310
            entry_path,
            tmp_dir.joinpath(Path(entry_path).name),
        )
    return main_catalog_path

1	import getpass	6✔
2	import os	6✔
3	import sys	6✔
4	import tempfile	6✔
5	import urllib.request	6✔
6	import warnings	6✔
7	from functools import reduce	6✔
8	from pathlib import Path	6✔
9
10	import intake	6✔
11
12
13	catalog_path = "https://raw.githubusercontent.com/hydrocloudservices/catalogs/main/catalogs/main.yaml"	6✔
14
15
16	def open_dataset(	6✔
17	name: str,
18	catalog,
19	**kws, # noqa: F841
20	):
21	r"""
22	Open a dataset from the online public repository (requires internet).
23
24	Parameters
25	----------
26	name : str
27	Name of the file containing the dataset.
28	e.g. 'era5_reanalysis_single_levels'.
29	catalog : str
30	URL for the intake catalog which provides access to the datasets.
31	\\kws : dict, optional
32	Currently not used.
33
34	See Also
35	--------
36	xarray.open_dataset
37
38	Notes
39	-----
40	Available datasets:
41	`"era5_reanalysis_single_levels"`: ERA5 reanalysis subset (t2m and tp).
42	`"cehq"`: CEHQ flow and water levels observations.
43	"""
44	with warnings.catch_warnings():	×
45	warnings.simplefilter("ignore")	×
46
47	try:	×
48	import intake # noqa: F401	×
49	except ImportError as e:	×
50	raise ImportError(	×
51	"tutorial.open_dataset depends on intake and intake-xarray to download and manage datasets."
52	" To proceed please install intake and intake-xarray.",
53	) from e
54
55	cat = catalog	×
56	dataset_info = [	×
57	(category, dataset_name) for category in cat._entries.keys() for dataset_name in cat[category]._entries.keys() if dataset_name == name
58	]
59
60	data = reduce(lambda array, index: array[index], dataset_info, cat)	×
61
62	# add proxy infos
63	proxies = urllib.request.getproxies()	×
64	storage_options = data.storage_options	×
65	storage_options["config_kwargs"]["proxies"] = proxies	×
66
67	if data.describe()["driver"][0] == "geopandasfile":	×
68	data = data(storage_options=storage_options).read()	×
69	elif data.describe()["driver"][0] == "zarr":	×
70	data = data(storage_options=storage_options).to_dask()	×
71	else:
72	raise NotImplementedError(	×
73	f"Dataset {name} is not available. Please request further datasets to our github issues pages",
74	)
75	return data	×
76
77
78	class HiddenPrints:	6✔
79	def __enter__(self):	6✔
80	self._original_stdout = sys.stdout	×
81	sys.stdout = Path(os.devnull).open("w")	×
82
83	def __exit__(self, exc_type, exc_val, exc_tb): # noqa: F841	6✔
84	sys.stdout.close()	×
85	sys.stdout = self._original_stdout	×
86
87
88	def cache_catalog(url):	6✔
89	"""
90	Cache the catalog in the system's temporary folder for easier access.
91
92	This is especially useful when working behind firewalls or if the remote server containing the yaml files is down.
93	Looks for http_proxy/https_proxy environment variable if the request goes through a proxy.
94
95	Parameters
96	----------
97	url : str
98	URL for the intake catalog which provides access to the datasets.
99	While this library provides its own intake catalog, users have the option to provide their own catalog,
100	which can be particularly beneficial for private datasets or if different configurations are needed.
101	"""
102	proxies = urllib.request.getproxies()	×
103	proxy = urllib.request.ProxyHandler(proxies)	×
104	opener = urllib.request.build_opener(proxy)	×
105	urllib.request.install_opener(opener)	×
106
NEW 107	tmp_dir = Path(tempfile.gettempdir()).joinpath(getpass.getuser()).joinpath("catalogs")	×
UNCOV 108	tmp_dir.mkdir(parents=True, exist_ok=True)	×
109	main_catalog_path = tmp_dir.joinpath(Path(url).name)	×
110
111	try:	×
112	urllib.request.urlretrieve(url, main_catalog_path) # noqa: S310	×
113	except urllib.error.URLError as e:	×
114	raise urllib.error.URLError(	×
115	"Could not reach the catalog, perhaps due to the presence of a proxy."
116	"Try adding proxy information to the environment variables as follows before"
117	"running xdatasets :"
118	"import os"
119	"proxy = 'http://<proxy>:<port>'"
120	"os.environ['http_proxy'] = proxy"
121	"os.environ['https_proxy'] = proxy",
122	) from e
123
124	for value in intake.open_catalog(main_catalog_path)._entries.values():	×
125	# FIXME: entry_path only seems to work using os.path, and not pathlib.Path. Why is that?
126	entry_path = f"{os.path.dirname(url)}/{Path(value.describe()['args']['path']).name}" # noqa: PTH120	×
127	urllib.request.urlretrieve( # noqa: S310	×
128	entry_path,
129	tmp_dir.joinpath(Path(entry_path).name),
130	)
131	return main_catalog_path	×

hydrologie / xdatasets / 19147573697

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous