• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

TUW-GEO / ecmwf_models / 11944977952

06 Nov 2024 01:46PM CUT coverage: 80.452%. Remained the same
11944977952

push

github

web-flow
Update CHANGELOG.rst

189 of 300 branches covered (63.0%)

712 of 885 relevant lines covered (80.45%)

4.82 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.44
/src/ecmwf_models/utils.py
1
# -*- coding: utf-8 -*-
2
# The MIT License (MIT)
3
#
4
# Copyright (c) 2019, TU Wien
5
#
6
# Permission is hereby granted, free of charge, to any person obtaining a copy
7
# of this software and associated documentation files (the "Software"), to deal
8
# in the Software without restriction, including without limitation the rights
9
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
# copies of the Software, and to permit persons to whom the Software is
11
# furnished to do so, subject to the following conditions:
12
#
13
# The above copyright notice and this permission notice shall be included in all
14
# copies or substantial portions of the Software.
15
#
16
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
# SOFTWARE.
23
"""
24
Utility functions for all data products in this package.
25
"""
26
import os
6✔
27
import warnings
6✔
28
from datetime import datetime
6✔
29
import pandas as pd
6✔
30
import numpy as np
6✔
31
from netCDF4 import Dataset
6✔
32
from collections import OrderedDict
6✔
33
from parse import parse
6✔
34
import yaml
6✔
35

36
from ecmwf_models.extract import save_gribs_from_grib
6✔
37
from repurpose.misc import find_first_at_depth
6✔
38

39
from ecmwf_models.globals import (DOTRC, CDS_API_URL, IMG_FNAME_TEMPLATE,
6✔
40
                                  IMG_FNAME_DATETIME_FORMAT,
41
                                  SUPPORTED_PRODUCTS)
42

43

44
def parse_product(inpath: str) -> str:
6✔
45
    """
46
    Tries to find out what product is stored in the path. This is done based
47
    on the name of the first file in the path that is found.
48

49
    Parameters
50
    ----------
51
    inpath: str
52
        Input path where ERA data was downloaded to. Contains annual folders.
53

54
    Returns
55
    -------
56
    product : str
57
        Product name
58
    """
59
    props = img_infer_file_props(inpath)
6✔
60

61
    if "era5-land" in props['product'].lower():
6!
62
        return "era5-land"  # also era5-land-t
×
63
    elif "era5" in props['product'].lower():
6!
64
        return "era5"  # also era5-t
6✔
65
    else:
66
        raise ValueError(
×
67
            f"Could not derive product name from data in {inpath}")
68

69

70
def parse_filetype(inpath: str) -> str:
6✔
71
    """
72
    Tries to find out the file type by parsing filenames in the passed
73
    directory.
74

75
    Parameters
76
    ----------
77
    inpath: str
78
        Input path where ERA data was downloaded to. Contains annual folders.
79

80
    Returns
81
    -------
82
    product : str
83
        Product name
84
    """
85
    props = img_infer_file_props(inpath)
6✔
86
    if props['ext'] == 'grb':
6✔
87
        return 'grib'
6✔
88
    else:
89
        return 'netcdf'
6✔
90

91

92
def load_var_table(name="era5", lut=False):
6✔
93
    """
94
    Load the variables table for supported variables to download.
95

96
    Parameters
97
    ----------
98
    lut : bool, optional (default: False)
99
        If set to true only names are loaded, so that they can be used
100
        for a LUT otherwise the full table is loaded
101
    """
102
    name = name.lower()
6✔
103
    if name == "era5":
6✔
104
        era_vars_csv = os.path.join(
6✔
105
            os.path.dirname(os.path.abspath(__file__)), "era5", "era5_lut.csv")
106
    elif name == "era5-land":
6!
107
        era_vars_csv = os.path.join(
6✔
108
            os.path.dirname(os.path.abspath(__file__)),
109
            "era5",
110
            "era5-land_lut.csv",
111
        )
112
    else:
113
        raise ValueError(name, "No LUT for the selected dataset found.")
×
114

115
    if lut:
6✔
116
        dat = pd.read_csv(era_vars_csv)[["dl_name", "long_name", "short_name"]]
6✔
117
    else:
118
        dat = pd.read_csv(era_vars_csv)
6✔
119

120
    return dat
6✔
121

122

123
def lookup(name, variables):
6✔
124
    """
125
    Search the passed elements in the lookup table, if one does not exists,
126
    print a Warning
127
    """
128
    lut = load_var_table(name=name, lut=True)
6✔
129

130
    selected = []
6✔
131
    for var in variables:
6✔
132
        found = False
6✔
133
        for row in lut.itertuples():
6!
134
            if var in row:
6✔
135
                selected.append(row.Index)
6✔
136
                found = True
6✔
137
                break
6✔
138
        if found:
6!
139
            continue
6✔
140
        else:
141
            raise ValueError(
×
142
                f"Passed variable {var} is not a supported variable.")
143

144
    return lut.loc[selected, :]
6✔
145

146

147
def get_default_params(name="era5"):
6✔
148
    """
149
    Read only lines that are marked as default variable in the csv file
150

151
    Parameters
152
    ----------
153
    name : str
154
        Name of the product to get the default parameters for
155
    """
156
    vars = load_var_table(name, lut=False)
6✔
157
    return vars.loc[vars.default == 1.0]
6✔
158

159

160
def default_variables(product="era5", format='dl_name'):
6✔
161
    """
162
    These variables are being downloaded, when None are passed by the user
163

164
    Parameters
165
    ---------
166
    product : str, optional (default: 'era5')
167
        Name of the era5 product to read the default variables for.
168
        Either 'era5' or 'era5-land'.
169
    format: str, optional (default: 'dl_name')
170
        'dl_name' for name as in the downloaded image data
171
        'short_name' for short name
172
        'long_name' for long name
173
    """
174
    lut = load_var_table(name=product)
6✔
175
    defaults = lut.loc[lut["default"] == 1][format].values
6✔
176
    return defaults.tolist()
6✔
177

178

179
def make_era5_land_definition_file(
6✔
180
    data_file,
181
    out_file,
182
    data_file_y_res=0.25,
183
    ref_var="lsm",
184
    threshold=0.5,
185
    exclude_antarctica=True,
186
):
187
    """
188
    Create a land grid definition file from a variable within a downloaded,
189
    regular (netcdf) era5 file.
190

191
    Parameters
192
    ----------
193
    data_file : str
194
        Path to the downloaded file that contains the image that is used as the
195
        reference for creating the land definition file.
196
    out_file: str
197
        Full output path to the land definition file to create.
198
    data_file_y_res : float, optional (default: 0.25)
199
        The resolution of the data file in latitude direction.
200
    ref_var: str, optional (default: 'lsm')
201
        A variable in the data_file that is the reference for the
202
        land definition.
203
        By default, we use the land-sea-mask variable.
204
    threshold: float, optional (default: 0.5)
205
        Threshold value below which a point is declared water,
206
        and above (or equal) which it is declared a land-point.
207
        If None is passed, then a point is declared a land point
208
        if it is not masked (numpy masked array) in the reference variable.
209
    exclude_antarctica: bool, optional (default: True)
210
        Cut off the definition file at -60° Lat to exclude Land Points
211
        in Antarctica.
212
    """
213
    lat_name, lon_name = "latitude", "longitude"
6✔
214
    ds_in = Dataset(data_file)
6✔
215
    ds_out = Dataset(out_file, "w", format="NETCDF4")
6✔
216

217
    for dim_name in ds_in.dimensions.keys():
6✔
218
        ds_out.createDimension(dim_name, size=ds_in.dimensions[dim_name].size)
6✔
219
        ds_out.createVariable(dim_name, "float32", (dim_name,), zlib=True)
6✔
220
        ds_out.variables[dim_name][:] = ds_in.variables[dim_name][:]
6✔
221

222
    ref = ds_in.variables[ref_var]
6✔
223

224
    land_mask = np.zeros(ref.shape)
6✔
225

226
    if np.isnan(threshold):
6!
227
        land_mask[~ref[:].mask] = 1.0
×
228
    else:
229
        land_mask[ref[:] >= threshold] = 1.0
6✔
230

231
    # drop values below -60° Lat
232
    if exclude_antarctica:
6!
233
        cut_off_lat = -60.0
6✔
234
        index_thres_lat = ((180.0 / data_file_y_res) + 1) - (
6✔
235
            (90.0 + cut_off_lat) / data_file_y_res)
236
        land_mask[int(index_thres_lat):, :] = np.nan
6✔
237
    else:
238
        cut_off_lat = None
×
239

240
    ds_out.createVariable("land", "float32", (lat_name, lon_name), zlib=True)
6✔
241
    ds_out.variables["land"][:] = land_mask
6✔
242

243
    land_attrs = OrderedDict([
6✔
244
        ("units", "(0,1)"),
245
        ("long_name", "Land-sea mask"),
246
        ("based_on_variable", ref_var),
247
        ("standard_name", "land_binary_mask"),
248
        ("threshold_land_>=", str(threshold)),
249
        ("cut_off_at", str(cut_off_lat)),
250
    ])
251

252
    for attr, val in land_attrs.items():
6✔
253
        ds_out.variables["land"].setncattr(attr, val)
6✔
254

255
    ds_in.close()
6✔
256
    ds_out.close()
6✔
257

258

259
def split_array(array, chunk_size):
6✔
260
    """
261
    Split an array into chunks of a given size.
262

263
    Parameters
264
    ----------
265
    array : array-like
266
        Array to split into chunks
267
    chunk_size : int
268
        Size of each chunk
269

270
    Returns
271
    -------
272
    chunks : list
273
        List of chunks
274
    """
275
    chunks = []
×
276
    for i in range(0, len(array), chunk_size):
×
277
        chunks.append(array[i:i + chunk_size])
×
278
    return chunks
×
279

280

281
def check_api_ready() -> bool:
6✔
282
    """
283
    Verify that the API is ready to be used. Otherwise raise an Error.
284

285
    Returns:
286
    --------
287
    api_ready: bool
288
        True if api is ready
289
    """
290
    if not os.path.isfile(DOTRC):
6!
291
        key = os.environ.get('CDSAPI_KEY')
6✔
292
        if "CDSAPI_URL" not in os.environ:
6!
293
            os.environ['CDSAPI_URL'] = CDS_API_URL
6✔
294

295
        if key is None:
6!
296
            raise ValueError(
6✔
297
                'Neither CDSAPI_KEY variable nor .cdsapirc file found, '
298
                'download will not work! '
299
                'Please create a .cdsapirc file with your API key. '
300
                'See: https://cds.climate.copernicus.eu/how-to-api')
301
        else:
302
            return True
×
303
    else:
304
        if "CDSAPI_URL" in os.environ:
×
305
            os.environ.pop("CDSAPI_URL")  # Use URL from file
×
306
        return True
×
307

308

309
def img_infer_file_props(img_root_path: str,
6✔
310
                         fntempl: str = IMG_FNAME_TEMPLATE,
311
                         start_from_last=False) -> dict:
312
    """
313
    Parse file names to retrieve properties from fntempl.
314
    Does not open any files.
315

316
    Parameters
317
    ----------
318
    img_root_path: str
319
        Root directory where annual directories are located
320
    fntempl: str, optional
321
        Filename template to parse filenames with
322
    start_from_last: bool, optional
323
        Use the last available file instead of the first one.
324
    """
325
    fname = find_first_at_depth(img_root_path, 2, reverse=start_from_last)
6✔
326

327
    if fname is None:
6!
328
        raise ValueError(f"No matching files for chosen template found in "
×
329
                         f"the directory {img_root_path}")
330
    else:
331
        file_args = parse(fntempl, fname)
6✔
332
        return file_args.named
6✔
333

334

335
def get_first_last_image_date(path, start_from_last=True):
6✔
336
    """
337
    Parse files in the given directory (or any subdir) using the passed
338
    filename template. props will contain all fields specified in the template.
339
    the `datetime` field is required and used to determine the last image date.
340

341
    Parameters
342
    ----------
343
    path: str
344
        Path to the directory containing the image files
345
    start_from_last: bool, optional (default: True')
346
        Get date from last available file instead of the first available one.
347

348
    Returns
349
    -------
350
    date: str
351
        Parse date from the last found image file that matches `fntempl`.
352
    """
353
    try:
6✔
354
        props = img_infer_file_props(
6✔
355
            path, fntempl=IMG_FNAME_TEMPLATE, start_from_last=start_from_last)
356
        dt = datetime.strptime(props['datetime'], IMG_FNAME_DATETIME_FORMAT)
6✔
357
        dt = datetime(dt.year, dt.month, dt.day)
6✔
358
    except ValueError:
×
359
        raise ValueError('Could not infer date from filenames. '
×
360
                         'Check filename template.')
361

362
    return str(dt)
6✔
363

364

365
def update_image_summary_file(data_path: str,
6✔
366
                              other_props: dict = None,
367
                              out_file=None):
368
    """
369
    Summarize image metadata as yml file
370

371
    Parameters
372
    ----------
373
    data_path: str
374
        Root path to the image archive
375
    other_props: dict, optional (default: None)
376
        Other properties to write into the yml file. E.g. download
377
        options to enable time series update.
378
    out_file: str, optional (default: None)
379
        Path to summary file. File will be created/updated.
380
        If not specified, then `data_path` is used. If a file already exists,
381
        it will be overwritten.
382
    """
383
    try:
6✔
384
        first_image_date = get_first_last_image_date(
6✔
385
            data_path, start_from_last=False)
386
        last_image_date = get_first_last_image_date(
6✔
387
            data_path, start_from_last=True)
388
    except ValueError:
×
389
        warnings.warn(f"Could not infer date from filenames in {data_path}")
×
390
        return
×
391

392
    props = img_infer_file_props(data_path, start_from_last=False)
6✔
393
    _ = props.pop("datetime")
6✔
394
    props['period_from'] = str(pd.to_datetime(first_image_date).date())
6✔
395
    props['period_to'] = str(pd.to_datetime(last_image_date).date())
6✔
396

397
    props['last_update'] = str(datetime.now())
6✔
398

399
    props['download_settings'] = {}
6✔
400

401
    if other_props is not None:
6!
402
        for k, v in other_props.items():
6✔
403
            props['download_settings'][k] = v
6✔
404

405
    if out_file is None:
6!
406
        out_file = os.path.join(data_path, f"overview.yml")
6✔
407

408
    with open(out_file, 'w') as f:
6✔
409
        yaml.dump(props, f, default_flow_style=False)
6✔
410

411

412
def assert_product(product: str) -> str:
6✔
413
    if product not in SUPPORTED_PRODUCTS:
6!
414
        raise ValueError(f"Got product {product} but expected one of "
×
415
                         f"{SUPPORTED_PRODUCTS}")
416
    return product
6✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc