• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Ouranosinc / miranda / 2424712364

pending completion
2424712364

Pull #50

github

GitHub
Merge 936f16058 into 6d81d9443
Pull Request #50: revise structure to fit newest database definition

5 of 43 new or added lines in 6 files covered. (11.63%)

116 existing lines in 3 files now uncovered.

661 of 3287 relevant lines covered (20.11%)

0.6 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

34.62
/miranda/convert/_data_definitions.py
1
import json
3✔
2
import logging.config
3✔
3
import os
3✔
4
from pathlib import Path
3✔
5
from typing import List, Mapping, Union
3✔
6

7
from miranda.scripting import LOGGING_CONFIG
3✔
8
from miranda.storage import report_file_size
3✔
9

10
logging.config.dictConfig(LOGGING_CONFIG)
3✔
11

12
__all__ = [
3✔
13
    "gather_agcfsr",
14
    "gather_agmerra",
15
    "gather_era5_pressure_levels",
16
    "gather_era5_land",
17
    "gather_era5_single_levels",
18
    "gather_nrcan_gridded_obs",
19
    "gather_sc_earth",
20
    "gather_wfdei_gem_capa",
21
    "era5_variables",
22
    "nrcan_variables",
23
    "nasa_ag_variables",
24
    "sc_earth_variables",
25
    "wfdei_gem_capa_variables",
26
    "reanalysis_project_institutes",
27
    "xarray_frequencies_to_cmip6like",
28
]
29

30
data_folder = Path(__file__).parent / "data"
3✔
31
era5_variables = json.load(open(data_folder / "ecmwf_cf_attrs.json"))[
3✔
32
    "variable_entry"
33
].keys()
34
nrcan_variables = ["tasmin", "tasmax", "pr"]
3✔
35
nasa_ag_variables = json.load(open(data_folder / "nasa_cf_attrs.json"))[
3✔
36
    "variable_entry"
37
].keys()
38
sc_earth_variables = ["prcp", "tdew", "tmean", "trange", "wind"]
3✔
39
wfdei_gem_capa_variables = json.load(open(data_folder / "usask_cf_attrs.json"))[
3✔
40
    "variable_entry"
41
].keys()
42

43
reanalysis_project_institutes = {
3✔
44
    "cfsr": "ncar",
45
    "era5": "ecmwf",
46
    "era5-pressure-levels-preliminary-back-extension": "ecmwf",
47
    "era5-pressure-levels": "ecmwf",
48
    "era5-single-levels-preliminary-back-extension": "ecmwf",
49
    "era5-single-levels": "ecmwf",
50
    "era5-land": "ecmwf",
51
    "merra2": "nasa",
52
    "nrcan-gridded-10km": "nrcan",
53
    "wfdei-gem-capa": "usask",
54
}
55

56

57
# Manually map xarray frequencies to CMIP6/CMIP5 controlled vocabulary.
58
# see: https://github.com/ES-DOC/pyessv-archive
59
xarray_frequencies_to_cmip6like = {
3✔
60
    "H": "hr",
61
    "D": "day",
62
    "W": "sem",
63
    "M": "mon",
64
    "Q": "qtr",  # TODO does this make sense? does not exist in cmip6 CV
65
    "A": "yr",
66
    "Y": "yr",
67
}
68

69

70
def _gather(
3✔
71
    name: str,
72
    variables: Mapping[str, List[str]],
73
    source: Union[str, os.PathLike],
74
    back_extension: bool,
75
) -> Mapping[str, List[Path]]:
76
    # ERA5-Single-Levels source data
77
    source = Path(source)
×
78
    logging.info(f"Gathering {name} files from: {source.as_posix()}")
×
79
    infiles = list()
×
80
    for v in variables:
×
81
        infiles.extend(
×
82
            list(
83
                sorted(
84
                    source.rglob(
85
                        f"{v}*{name}{'-preliminary-back-extension' if back_extension else ''}*.nc"
86
                    )
87
                )
88
            )
89
        )
90
    logging.info(f"Found {len(infiles)} files, totalling {report_file_size(infiles)}.")
×
91
    return {name: infiles}
×
92

93

94
def gather_era5_single_levels(
3✔
95
    path: Union[str, os.PathLike], back_extension: bool = False
96
) -> Mapping[str, List[Path]]:
97
    # ERA5-Single-Levels source data
98
    return _gather(
×
99
        "era5-single-levels", era5_variables, source=path, back_extension=back_extension
100
    )
101

102

103
def gather_era5_pressure_levels(
3✔
104
    path: Union[str, os.PathLike], back_extension: bool = False
105
) -> Mapping[str, List[Path]]:
106
    # ERA5-Single-Levels source data
107
    return _gather(
×
108
        "era5-pressure-levels",
109
        era5_variables,
110
        source=path,
111
        back_extension=back_extension,
112
    )
113

114

115
def gather_era5_land(path: Union[str, os.PathLike]) -> Mapping[str, List[Path]]:
3✔
116
    # ERA5-Land source data
117
    return _gather("era5-land", era5_variables, source=path, back_extension=False)
×
118

119

120
def gather_era5_land_sea_mask(path: Union[str, os.PathLike]) -> Mapping[str, Path]:
3✔
121
    try:
×
NEW
122
        land_sea_mask = dict(lsm=next(Path(path).glob("sftlf*era5*.nc")))
×
123
    except StopIteration:
×
124
        logging.error("No land_sea_mask found for ERA5.")
×
125
        raise FileNotFoundError()
×
126
    return land_sea_mask
×
127

128

129
def gather_agmerra(path: Union[str, os.PathLike]) -> Mapping[str, List[Path]]:
3✔
130
    # agMERRA source data
131
    source_agmerra = Path(path)
×
132
    logging.info(f"Gathering agMERRA from: {source_agmerra.as_posix()}")
×
133
    infiles_agmerra = list()
×
134
    for v in nasa_ag_variables:
×
135
        infiles_agmerra.extend(list(sorted(source_agmerra.rglob(f"AgMERRA_*_{v}.nc4"))))
×
136
    logging.info(
×
137
        f"Found {len(infiles_agmerra)} files, totalling {report_file_size(infiles_agmerra)}."
138
    )
139
    return dict(cfsr=infiles_agmerra)
×
140

141

142
def gather_agcfsr(path: Union[str, os.PathLike]) -> Mapping[str, List[Path]]:
3✔
143
    # agCFSR source data
144
    source_agcfsr = Path(path)
×
145
    logging.info(f"Gathering CFSR from: {source_agcfsr.as_posix()}")
×
146
    infiles_agcfsr = list()
×
147
    for v in nasa_ag_variables:
×
148
        infiles_agcfsr.extend(list(sorted(source_agcfsr.rglob(f"AgCFSR_*_{v}.nc4"))))
×
149
    logging.info(
×
150
        f"Found {len(infiles_agcfsr)} files, totalling {report_file_size(infiles_agcfsr)}."
151
    )
152
    return dict(cfsr=infiles_agcfsr)
×
153

154

155
def gather_nrcan_gridded_obs(path: Union[str, os.PathLike]) -> Mapping[str, List[Path]]:
3✔
156
    # NRCan Gridded Obs source data
157
    source_nrcan = Path(path)
×
158
    logging.info(f"Gathering NRCAN Gridded Obs from {source_nrcan.as_posix()}")
×
159
    infiles_nrcan = list()
×
160
    for v in nrcan_variables:
×
161
        infiles_nrcan.extend(list(sorted(source_nrcan.joinpath(v).glob(f"*{v}_*.nc"))))
×
162
    logging.info(
×
163
        f"Found {len(infiles_nrcan)} files, totalling {report_file_size(infiles_nrcan)}."
164
    )
165
    return dict(nrcan=infiles_nrcan)
×
166

167

168
def gather_wfdei_gem_capa(path: Union[str, os.PathLike]) -> Mapping[str, List[Path]]:
3✔
169
    # WFDEI-GEM-CaPa source data
170
    source_wfdei = Path(path)
×
171
    logging.info(f"Gathering WFDEI-GEM_CaPa from: {source_wfdei.as_posix()}")
×
172
    infiles_wfdei = list()
×
173
    for v in wfdei_gem_capa_variables:
×
174
        infiles_wfdei.extend(list(sorted(source_wfdei.rglob(f"{v}_*.nc"))))
×
175
    logging.info(
×
176
        f"Found {len(infiles_wfdei)} files, totalling {report_file_size(infiles_wfdei)}."
177
    )
178
    return {"wfdei-gem-capa": infiles_wfdei}
×
179

180

181
def gather_sc_earth(path: Union[str, os.PathLike]) -> Mapping[str, List[Path]]:
3✔
182
    # SC-Earth source data
183
    source_sc_earth = Path(path)
×
184
    logging.info(f"Gathering SC-Earth from: {source_sc_earth.as_posix()}")
×
185
    infiles_sc_earth = list()
×
186
    for v in sc_earth_variables:
×
187
        infiles_sc_earth.extend(
×
188
            list(sorted(source_sc_earth.rglob(f"SC-Earth_{v}_*.nc")))
189
        )
190
    logging.info(
×
191
        f"Found {len(infiles_sc_earth)} files, totalling {report_file_size(infiles_sc_earth)}."
192
    )
193
    return {"wfdei-gem-capa": infiles_sc_earth}
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2024 Coveralls, Inc