• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Ouranosinc / miranda / 15141552897

20 May 2025 03:24PM UTC coverage: 16.448% (+1.4%) from 15.049%
15141552897

Pull #241

github

web-flow
Merge 12ef5216f into 730c6f31e
Pull Request #241: Testing Data and Distributed Testing

115 of 194 new or added lines in 2 files covered. (59.28%)

20 existing lines in 2 files now uncovered.

1029 of 6256 relevant lines covered (16.45%)

1.38 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

25.0
/src/miranda/convert/corrections.py
1
"""Dataset corrections submodule."""
2

3
from __future__ import annotations
9✔
4

5
import datetime
9✔
6
import pathlib
9✔
7
from collections.abc import Iterator, Sequence
9✔
8
from functools import partial
9✔
9
from typing import Callable
9✔
10

11
import xarray as xr
9✔
12

13
from miranda.convert.utils import find_version_hash
9✔
14
from miranda.gis import conservative_regrid, subset_domain, threshold_mask
9✔
15
from miranda.treatments import (
9✔
16
    cf_units_conversion,
17
    clip_values,
18
    correct_unit_names,
19
    dimensions_compliance,
20
    ensure_correct_time_frequency,
21
    invert_value_sign,
22
    metadata_conversion,
23
    offset_time_dimension,
24
    preprocessing_corrections,
25
    transform_values,
26
    variable_conversion,
27
)
28
from miranda.treatments.utils import load_json_data_mappings
9✔
29

30
CONFIG_FOLDER = pathlib.Path(__file__).parent / "data"
9✔
31
CONFIG_FILES = {
9✔
32
    "EMDNA": "emdna_cf_attrs.json",
33
    "ESPO-G6-E5L": "espo-g6-e5l_attrs.json",
34
    "ESPO-G6-R2": "espo-g6-r2_attrs.json",
35
    "NEX-GDDP-CMIP6": "nex-gddp-cmip6_attrs.json",
36
    "agcfsr": "agcfsr_agmerra2_cf_attrs.json",
37
    "agmerra2": "agcfsr_agmerra2_cf_attrs.json",
38
    "cmip": "cmip5_cmip6_cordex_ouranos_attrs.json",
39
    "cordex": "cmip5_cmip6_cordex_ouranos_attrs.json",
40
    "eccc-canswe": "eccc-canswe_cf_attrs.json",
41
    "eccc-ahccd": "eccc-ahccd_cf_attrs.json",
42
    "eccc-obs": "eccc-obs_cf_attrs.json",
43
    "era5-land": "era5_era5-land_cf_attrs.json",
44
    "era5-land-monthly-means": "era5_era5-land_cf_attrs.json",
45
    "era5-pressure-levels": "era5_era5-land_cf_attrs.json",
46
    "era5-pressure-levels-monthly-means": "era5_era5-land_cf_attrs.json",
47
    "era5-pressure-levels-monthly-means-preliminary-back-extension": "era5_era5-land_cf_attrs.json",
48
    "era5-pressure-levels-preliminary-back-extension": "era5_era5-land_cf_attrs.json",
49
    "era5-single-levels": "era5_era5-land_cf_attrs.json",
50
    "era5-single-levels-monthly-means": "era5_era5-land_cf_attrs.json",
51
    "era5-single-levels-monthly-means-preliminary-back-extension": "era5_era5-land_cf_attrs.json",
52
    "era5-single-levels-preliminary-back-extension": "era5_era5-land_cf_attrs.json",
53
    "ets-grnch": "ets-grnch_cf_attrs.json",
54
    "melcc": "melcc_cf_attrs.json",
55
    "rdrs-v21": "eccc-rdrs_cf_attrs.json",
56
    "wfdei-gem-capa": "wfdei-gem-capa_cf_attrs.json",
57
}
58
for k, v in CONFIG_FILES.items():
9✔
59
    CONFIG_FILES[k] = CONFIG_FOLDER / v
9✔
60

61

62
def dataset_corrections(ds: xr.Dataset, project: str) -> xr.Dataset:
9✔
63
    """
64
    Convert variables to CF-compliant format.
65

66
    Parameters
67
    ----------
68
    ds : xr.Dataset
69
        Data to be converted.
70
    project : str
71
        Project name for decoding/handling purposes.
72

73
    Returns
74
    -------
75
    xr.Dataset
76
        The corrected dataset.
77
    """
UNCOV
78
    metadata_definition = load_json_data_mappings(project, CONFIG_FILES)
×
79

80
    ds = correct_unit_names(ds, project, metadata_definition)
×
81
    ds = transform_values(ds, project, metadata_definition)
×
82
    ds = invert_value_sign(ds, project, metadata_definition)
×
83
    ds = cf_units_conversion(ds, metadata_definition)
×
UNCOV
84
    ds = clip_values(ds, project, metadata_definition)
×
85

86
    ds = dimensions_compliance(ds, project, metadata_definition)
×
87
    ds = ensure_correct_time_frequency(ds, project, metadata_definition)
×
UNCOV
88
    ds = offset_time_dimension(ds, project, metadata_definition)
×
89

UNCOV
90
    ds = variable_conversion(ds, project, metadata_definition)
×
91

UNCOV
92
    ds = metadata_conversion(ds, project, metadata_definition)
×
93

UNCOV
94
    ds.attrs["history"] = (
×
95
        f"{datetime.datetime.now()}: "
96
        f"Variables converted from original files using miranda.convert.{dataset_corrections.__name__}. "
97
        f"{ds.attrs.get('history')}".strip()
98
    )
99

UNCOV
100
    return ds
×
101

102

103
def dataset_conversion(
9✔
104
    input_files: (
105
        str
106
        | pathlib.Path
107
        | Sequence[str | pathlib.Path]
108
        | Iterator[pathlib.Path]
109
        | xr.Dataset
110
    ),
111
    project: str,
112
    domain: str | None = None,
113
    mask: xr.Dataset | xr.DataArray | None = None,
114
    mask_cutoff: float | bool = False,
115
    regrid: bool = False,
116
    add_version_hashes: bool = True,
117
    preprocess: Callable | str | None = "auto",
118
    **xr_kwargs,
119
) -> xr.Dataset | xr.DataArray:
120
    r"""
121
    Convert an existing Xarray-compatible dataset to another format with variable corrections applied.
122

123
    Parameters
124
    ----------
125
    input_files : str or pathlib.Path or Sequence[str or pathlib.Path] or Iterator[pathlib.Path] or xr.Dataset
126
        Files or objects to be converted.
127
        If sent a list or GeneratorType, will open with :py:func:`xarray.open_mfdataset` and concatenate files.
128
    project : {"cordex", "cmip5", "cmip6", "ets-grnch", "isimip-ft", "pcic-candcs-u6", "converted"}
129
        Project name for decoding/handling purposes.
130
    domain : {"global", "nam", "can", "qc", "mtl"}, optional
131
        Domain to perform subsetting for. Default: None.
132
    mask : Optional[Union[xr.Dataset, xr.DataArray]]
133
        DataArray or single data_variable dataset containing mask.
134
    mask_cutoff : float or bool
135
        If land_sea_mask supplied, the threshold above which to mask with land_sea_mask. Default: False.
136
    regrid : bool
137
        Performing regridding with xesmf. Default: False.
138
    add_version_hashes : bool
139
        If True, version name and sha256sum of source file(s) will be added as a field among the global attributes.
140
    preprocess : callable or str, optional
141
        Preprocessing functions to perform over each Dataset.
142
        Default: "auto" - Run preprocessing fixes based on supplied fields from metadata definition.
143
        Callable - Runs function over Dataset (single) or supplied to `preprocess` (multifile dataset).
144
    \*\*xr_kwargs : Any
145
        Arguments passed directly to xarray.
146

147
    Returns
148
    -------
149
    xr.Dataset or xr.DataArray
150
        The corrected dataset.
151
    """
152
    if isinstance(input_files, xr.Dataset):
×
UNCOV
153
        ds = input_files
×
154
    else:
155
        if isinstance(input_files, (str, pathlib.Path)):
×
156
            if pathlib.Path(input_files).is_dir():
×
157
                files = []
×
158
                files.extend([f for f in pathlib.Path(input_files).glob("*.nc")])
×
UNCOV
159
                files.extend([f for f in pathlib.Path(input_files).glob("*.zarr")])
×
160
            else:
161
                files = [pathlib.Path(input_files)]
×
162
        elif isinstance(input_files, (Sequence, Iterator)):
×
UNCOV
163
            files = [pathlib.Path(f) for f in input_files]
×
164
        else:
165
            files = input_files
×
166
        version_hashes = dict()
×
167
        if add_version_hashes:
×
168
            for file in files:
×
UNCOV
169
                version_hashes[file.name] = find_version_hash(file)
×
170

171
        preprocess_kwargs = dict()
×
172
        if preprocess:
×
173
            if preprocess == "auto":
×
UNCOV
174
                preprocess_kwargs.update(
×
175
                    preprocess=partial(preprocessing_corrections, project=project)
176
                )
177
            elif isinstance(preprocess, Callable):
×
UNCOV
178
                preprocess_kwargs.update(preprocess=preprocess)
×
179

180
        if len(files) == 1:
×
181
            ds = xr.open_dataset(files[0], **xr_kwargs)
×
182
            for process in preprocess_kwargs.values():
×
UNCOV
183
                ds = process(ds)
×
184
        else:
185
            ds = xr.open_mfdataset(files, **xr_kwargs, **preprocess_kwargs)
×
186
        if version_hashes:
×
UNCOV
187
            ds.attrs.update(dict(original_files=str(version_hashes)))
×
188

UNCOV
189
    ds = dataset_corrections(ds, project)
×
190

191
    if domain:
×
UNCOV
192
        ds = subset_domain(ds, domain)
×
193

194
    if isinstance(mask, (str, pathlib.Path)):
×
195
        mask = xr.open_dataset(mask)
×
196
    if isinstance(mask, (xr.Dataset, xr.DataArray)):
×
197
        if regrid:
×
198
            mask = conservative_regrid(ds, mask)
×
UNCOV
199
        ds = threshold_mask(ds, mask=mask, mask_cutoff=mask_cutoff)
×
200

UNCOV
201
    return ds
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc