• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OGGM / oggm / 14517998944

17 Apr 2025 02:26PM UTC coverage: 84.449%. First build
14517998944

Pull #1773

github

web-flow
Merge a334f0afa into 24a25ac25
Pull Request #1773: Move copdownload to AWS

24 of 32 new or added lines in 1 file covered. (75.0%)

12061 of 14282 relevant lines covered (84.45%)

3.68 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.22
/oggm/utils/_downloads.py
1
"""Automated data download and IO."""
2

3
# Builtins
4
import glob
9✔
5
import os
9✔
6
import gzip
9✔
7
import bz2
9✔
8
import hashlib
9✔
9
import shutil
9✔
10
import zipfile
9✔
11
import sys
9✔
12
import csv
9✔
13
import math
9✔
14
import logging
9✔
15
from functools import partial, wraps
9✔
16
import time
9✔
17
import fnmatch
9✔
18
import urllib.request
9✔
19
import urllib.error
9✔
20
from urllib.parse import urlparse
9✔
21
import socket
9✔
22
import multiprocessing
9✔
23
from netrc import netrc
9✔
24
import ftplib
9✔
25
import ssl
9✔
26
import tarfile
9✔
27
import json
9✔
28

29
# External libs
30
import pandas as pd
9✔
31
import numpy as np
9✔
32
import shapely.geometry as shpg
9✔
33
import requests
9✔
34

35
# Optional libs
36
try:
9✔
37
    import geopandas as gpd
9✔
38
except ImportError:
39
    pass
40
try:
9✔
41
    import salem
9✔
42
    from salem import wgs84
9✔
43
except ImportError:
44
    pass
45
try:
9✔
46
    import rasterio
9✔
47
    from rasterio.merge import merge as merge_tool
8✔
48
except ImportError:
49
    pass
50

51
# Locals
52
import oggm.cfg as cfg
9✔
53
from oggm.exceptions import (InvalidParamsError, NoInternetException,
9✔
54
                             DownloadVerificationFailedException,
55
                             DownloadCredentialsMissingException,
56
                             HttpDownloadError, HttpContentTooShortError,
57
                             InvalidDEMError, FTPSDownloadError)
58

59
# Python 3.12+ gives a deprecation warning if TarFile.extraction_filter is None.
60
# https://docs.python.org/3.12/library/tarfile.html#tarfile-extraction-filter
61
if hasattr(tarfile, "fully_trusted_filter"):
9✔
62
    tarfile.TarFile.extraction_filter = staticmethod(tarfile.fully_trusted_filter)  # type: ignore
9✔
63

64
# Module logger
65
logger = logging.getLogger('.'.join(__name__.split('.')[:-1]))
9✔
66

67
# Github repository and commit hash/branch name/tag name on that repository
68
# The given commit will be downloaded from github and used as source for
69
# all sample data
70
SAMPLE_DATA_GH_REPO = 'OGGM/oggm-sample-data'
9✔
71
SAMPLE_DATA_COMMIT = 'e6da53aea8438a06d7702769e73fe5df8ab0d669'
9✔
72

73
CHECKSUM_URL = 'https://cluster.klima.uni-bremen.de/data/downloads.sha256.hdf'
9✔
74
CHECKSUM_VALIDATION_URL = CHECKSUM_URL + '.sha256'
9✔
75
CHECKSUM_LIFETIME = 24 * 60 * 60
9✔
76

77
# Recommended url for runs
78
DEFAULT_BASE_URL = ('https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.6/'
9✔
79
                    'L3-L5_files/2023.3/elev_bands/W5E5_spinup')
80

81
# Web mercator proj constants
82
WEB_N_PIX = 256
9✔
83
WEB_EARTH_RADUIS = 6378137.
9✔
84

85
DEM_SOURCES = ['GIMP', 'ARCTICDEM', 'RAMP', 'TANDEM', 'AW3D30', 'MAPZEN', 'DEM3',
9✔
86
               'ASTER', 'SRTM', 'REMA', 'ALASKA', 'COPDEM30', 'COPDEM90', 'NASADEM']
87
DEM_SOURCES_PER_GLACIER = None
9✔
88

89
_RGI_METADATA = dict()
9✔
90

91
DEM3REG = {
9✔
92
    'ISL': [-25., -13., 63., 67.],  # Iceland
93
    'SVALBARD': [9., 35.99, 75., 84.],
94
    'JANMAYEN': [-10., -7., 70., 72.],
95
    'FJ': [36., 68., 79., 90.],  # Franz Josef Land
96
    'FAR': [-8., -6., 61., 63.],  # Faroer
97
    'BEAR': [18., 20., 74., 75.],  # Bear Island
98
    'SHL': [-3., 0., 60., 61.],  # Shetland
99
    # Antarctica tiles as UTM zones, large files
100
    '01-15': [-180., -91., -90, -60.],
101
    '16-30': [-91., -1., -90., -60.],
102
    '31-45': [-1., 89., -90., -60.],
103
    '46-60': [89., 189., -90., -60.],
104
    # Greenland tiles
105
    'GL-North': [-72., -11., 76., 84.],
106
    'GL-West': [-62., -42., 64., 76.],
107
    'GL-South': [-52., -40., 59., 64.],
108
    'GL-East': [-42., -17., 64., 76.]
109
}
110

111
# Function
112
tuple2int = partial(np.array, dtype=np.int64)
9✔
113

114
lock = None
9✔
115

116

117
def mkdir(path, reset=False):
9✔
118
    """Checks if directory exists and if not, create one.
119

120
    Parameters
121
    ----------
122
    reset: erase the content of the directory if exists
123

124
    Returns
125
    -------
126
    the path
127
    """
128

129
    if reset and os.path.exists(path):
8✔
130
        shutil.rmtree(path)
×
131
    os.makedirs(path, exist_ok=True)
8✔
132
    return path
8✔
133

134

135
def del_empty_dirs(s_dir):
9✔
136
    """Delete empty directories."""
137
    b_empty = True
1✔
138
    for s_target in os.listdir(s_dir):
1✔
139
        s_path = os.path.join(s_dir, s_target)
1✔
140
        if os.path.isdir(s_path):
1✔
141
            if not del_empty_dirs(s_path):
1✔
142
                b_empty = False
×
143
        else:
144
            b_empty = False
1✔
145
    if b_empty:
1✔
146
        os.rmdir(s_dir)
1✔
147
    return b_empty
1✔
148

149

150
def findfiles(root_dir, endswith):
9✔
151
    """Finds all files with a specific ending in a directory
152

153
    Parameters
154
    ----------
155
    root_dir : str
156
       The directory to search fo
157
    endswith : str
158
       The file ending (e.g. '.hgt'
159

160
    Returns
161
    -------
162
    the list of files
163
    """
164
    out = []
×
165
    for dirpath, dirnames, filenames in os.walk(root_dir):
×
166
        for filename in [f for f in filenames if f.endswith(endswith)]:
×
167
            out.append(os.path.join(dirpath, filename))
×
168
    return out
×
169

170

171
def get_lock():
9✔
172
    """Get multiprocessing lock."""
173
    global lock
174
    if lock is None:
8✔
175
        # Global Lock
176
        if cfg.PARAMS.get('use_mp_spawn', False):
8✔
177
            lock = multiprocessing.get_context('spawn').Lock()
×
178
        else:
179
            lock = multiprocessing.Lock()
8✔
180
    return lock
8✔
181

182

183
def get_dl_verify_data(section):
9✔
184
    """Returns a pandas DataFrame with all known download object hashes.
185

186
    The returned dictionary resolves str: cache_obj_name (without section)
187
    to a tuple int(size) and bytes(sha256)
188
    """
189

190
    verify_key = 'dl_verify_data_' + section
8✔
191
    if verify_key in cfg.DATA:
8✔
192
        return cfg.DATA[verify_key]
3✔
193

194
    verify_file_path = os.path.join(cfg.CACHE_DIR, 'downloads.sha256.hdf')
8✔
195

196
    def verify_file(force=False):
8✔
197
        """Check the hash file's own hash"""
198
        if not cfg.PARAMS['has_internet']:
8✔
199
            return
×
200

201
        if not force and os.path.isfile(verify_file_path) and \
8✔
202
           os.path.getmtime(verify_file_path) + CHECKSUM_LIFETIME > time.time():
203
            return
7✔
204

205
        logger.info('Checking the download verification file checksum...')
8✔
206
        try:
8✔
207
            with requests.get(CHECKSUM_VALIDATION_URL) as req:
8✔
208
                req.raise_for_status()
8✔
209
                verify_file_sha256 = req.text.split(maxsplit=1)[0]
8✔
210
                verify_file_sha256 = bytearray.fromhex(verify_file_sha256)
8✔
211
        except Exception as e:
×
212
            verify_file_sha256 = None
×
213
            logger.warning('Failed getting verification checksum: ' + repr(e))
×
214

215
        if os.path.isfile(verify_file_path) and verify_file_sha256:
8✔
216
            sha256 = hashlib.sha256()
8✔
217
            with open(verify_file_path, 'rb') as f:
8✔
218
                for b in iter(lambda: f.read(0xFFFF), b''):
8✔
219
                    sha256.update(b)
8✔
220
            if sha256.digest() != verify_file_sha256:
8✔
221
                logger.warning('%s changed or invalid, deleting.'
×
222
                               % (verify_file_path))
223
                os.remove(verify_file_path)
×
224
            else:
225
                os.utime(verify_file_path)
8✔
226

227
    if not np.any(['dl_verify_data_' in k for k in cfg.DATA.keys()]):
8✔
228
        # We check the hash file only once per session
229
        # no need to do it at each call
230
        verify_file()
8✔
231

232
    if not os.path.isfile(verify_file_path):
8✔
233
        if not cfg.PARAMS['has_internet']:
8✔
234
            return pd.DataFrame()
×
235

236
        logger.info('Downloading %s to %s...'
8✔
237
                    % (CHECKSUM_URL, verify_file_path))
238

239
        with requests.get(CHECKSUM_URL, stream=True) as req:
8✔
240
            if req.status_code == 200:
8✔
241
                mkdir(os.path.dirname(verify_file_path))
8✔
242
                with open(verify_file_path, 'wb') as f:
8✔
243
                    for b in req.iter_content(chunk_size=0xFFFF):
8✔
244
                        if b:
8✔
245
                            f.write(b)
8✔
246

247
        logger.info('Done downloading.')
8✔
248

249
        verify_file(force=True)
8✔
250

251
    if not os.path.isfile(verify_file_path):
8✔
252
        logger.warning('Downloading and verifying checksums failed.')
×
253
        return pd.DataFrame()
×
254

255
    try:
8✔
256
        data = pd.read_hdf(verify_file_path, key=section)
8✔
257
    except KeyError:
8✔
258
        data = pd.DataFrame()
8✔
259

260
    cfg.DATA[verify_key] = data
8✔
261

262
    return data
8✔
263

264

265
def _call_dl_func(dl_func, cache_path):
9✔
266
    """Helper so the actual call to downloads can be overridden
267
    """
268
    return dl_func(cache_path)
8✔
269

270

271
def _cached_download_helper(cache_obj_name, dl_func, reset=False):
9✔
272
    """Helper function for downloads.
273

274
    Takes care of checking if the file is already cached.
275
    Only calls the actual download function when no cached version exists.
276
    """
277
    cache_dir = cfg.PATHS['dl_cache_dir']
8✔
278
    cache_ro = cfg.PARAMS['dl_cache_readonly']
8✔
279

280
    # A lot of logic below could be simplified but it's also not too important
281
    wd = cfg.PATHS.get('working_dir')
8✔
282
    if wd:
8✔
283
        # this is for real runs
284
        fb_cache_dir = os.path.join(wd, 'cache')
7✔
285
        check_fb_dir = False
7✔
286
    else:
287
        # Nothing have been set up yet, this is bad - find a place to write
288
        # This should happen on read-only cluster only but still
289
        wd = os.environ.get('OGGM_WORKDIR')
8✔
290
        if wd is not None and os.path.isdir(wd):
8✔
291
            fb_cache_dir = os.path.join(wd, 'cache')
×
292
        else:
293
            fb_cache_dir = os.path.join(cfg.CACHE_DIR, 'cache')
8✔
294
        check_fb_dir = True
8✔
295

296
    if not cache_dir:
8✔
297
        # Defaults to working directory: it must be set!
298
        if not cfg.PATHS['working_dir']:
×
299
            raise InvalidParamsError("Need a valid PATHS['working_dir']!")
×
300
        cache_dir = fb_cache_dir
×
301
        cache_ro = False
×
302

303
    fb_path = os.path.join(fb_cache_dir, cache_obj_name)
8✔
304
    if not reset and os.path.isfile(fb_path):
8✔
305
        return fb_path
×
306

307
    cache_path = os.path.join(cache_dir, cache_obj_name)
8✔
308
    if not reset and os.path.isfile(cache_path):
8✔
309
        return cache_path
6✔
310

311
    if cache_ro:
8✔
312
        if check_fb_dir:
×
313
            # Add a manual check that we are caching sample data download
314
            if 'oggm-sample-data' not in fb_path:
×
315
                raise InvalidParamsError('Attempting to download something '
×
316
                                         'with invalid global settings.')
317
        cache_path = fb_path
×
318

319
    if not cfg.PARAMS['has_internet']:
8✔
320
        raise NoInternetException("Download required, but "
1✔
321
                                  "`has_internet` is False.")
322

323
    mkdir(os.path.dirname(cache_path))
8✔
324

325
    try:
8✔
326
        cache_path = _call_dl_func(dl_func, cache_path)
8✔
327
    except BaseException:
×
328
        if os.path.exists(cache_path):
×
329
            os.remove(cache_path)
×
330
        raise
×
331

332
    return cache_path
8✔
333

334

335
def _verified_download_helper(cache_obj_name, dl_func, reset=False):
9✔
336
    """Helper function for downloads.
337

338
    Verifies the size and hash of the downloaded file against the included
339
    list of known static files.
340
    Uses _cached_download_helper to perform the actual download.
341
    """
342
    path = _cached_download_helper(cache_obj_name, dl_func, reset)
8✔
343

344
    dl_verify = cfg.PARAMS.get('dl_verify', False)
8✔
345

346
    if dl_verify and path and cache_obj_name not in cfg.DL_VERIFIED:
8✔
347
        cache_section, cache_path = cache_obj_name.split('/', 1)
1✔
348
        data = get_dl_verify_data(cache_section)
1✔
349
        if cache_path not in data.index:
1✔
350
            logger.info('No known hash for %s' % cache_obj_name)
×
351
            cfg.DL_VERIFIED[cache_obj_name] = True
×
352
        else:
353
            # compute the hash
354
            sha256 = hashlib.sha256()
1✔
355
            with open(path, 'rb') as f:
1✔
356
                for b in iter(lambda: f.read(0xFFFF), b''):
1✔
357
                    sha256.update(b)
1✔
358
            sha256 = sha256.digest()
1✔
359
            size = os.path.getsize(path)
1✔
360

361
            # check
362
            data = data.loc[cache_path]
1✔
363
            if data['size'] != size or bytes(data['sha256']) != sha256:
1✔
364
                err = '%s failed to verify!\nis: %s %s\nexpected: %s %s' % (
1✔
365
                    path, size, sha256.hex(), data.iloc[0], data.iloc[1].hex())
366
                raise DownloadVerificationFailedException(msg=err, path=path)
1✔
367
            logger.info('%s verified successfully.' % path)
1✔
368
            cfg.DL_VERIFIED[cache_obj_name] = True
1✔
369

370
    return path
8✔
371

372

373
def _requests_urlretrieve(url, path, reporthook, auth=None, timeout=None):
9✔
374
    """Implements the required features of urlretrieve on top of requests
375
    """
376

377
    chunk_size = 128 * 1024
8✔
378
    chunk_count = 0
8✔
379

380
    with requests.get(url, stream=True, auth=auth, timeout=timeout) as r:
8✔
381
        if r.status_code != 200:
8✔
382
            raise HttpDownloadError(r.status_code, url)
×
383
        r.raise_for_status()
8✔
384

385
        size = r.headers.get('content-length') or -1
8✔
386
        size = int(size)
8✔
387

388
        if reporthook:
8✔
389
            reporthook(chunk_count, chunk_size, size)
8✔
390

391
        with open(path, 'wb') as f:
8✔
392
            for chunk in r.iter_content(chunk_size=chunk_size):
8✔
393
                if not chunk:
8✔
394
                    continue
×
395
                f.write(chunk)
8✔
396
                chunk_count += 1
8✔
397
                if reporthook:
8✔
398
                    reporthook(chunk_count, chunk_size, size)
8✔
399

400
        if chunk_count * chunk_size < size:
8✔
401
            raise HttpContentTooShortError()
×
402

403

404
def _classic_urlretrieve(url, path, reporthook, auth=None, timeout=None):
9✔
405
    """Thin wrapper around pythons urllib urlretrieve
406
    """
407

408
    ourl = url
×
409
    if auth:
×
410
        u = urlparse(url)
×
411
        if '@' not in u.netloc:
×
412
            netloc = auth[0] + ':' + auth[1] + '@' + u.netloc
×
413
            url = u._replace(netloc=netloc).geturl()
×
414

415
    old_def_timeout = socket.getdefaulttimeout()
×
416
    if timeout is not None:
×
417
        socket.setdefaulttimeout(timeout)
×
418

419
    try:
×
420
        urllib.request.urlretrieve(url, path, reporthook)
×
421
    except urllib.error.HTTPError as e:
×
422
        raise HttpDownloadError(e.code, ourl)
×
423
    except urllib.error.ContentTooShortError:
×
424
        raise HttpContentTooShortError()
×
425
    finally:
426
        socket.setdefaulttimeout(old_def_timeout)
×
427

428

429
class ImplicitFTPTLS(ftplib.FTP_TLS):
9✔
430
    """ FTP_TLS subclass that automatically wraps sockets in SSL to support
431
        implicit FTPS.
432

433
        Taken from https://stackoverflow.com/a/36049814
434
    """
435

436
    def __init__(self, *args, **kwargs):
9✔
437
        super().__init__(*args, **kwargs)
×
438
        self._sock = None
×
439

440
    @property
9✔
441
    def sock(self):
9✔
442
        """Return the socket."""
443
        return self._sock
×
444

445
    @sock.setter
9✔
446
    def sock(self, value):
9✔
447
        """When modifying the socket, ensure that it is ssl wrapped."""
448
        if value is not None and not isinstance(value, ssl.SSLSocket):
×
449
            value = self.context.wrap_socket(value)
×
450
        self._sock = value
×
451

452

453
def url_exists(url):
9✔
454
    """Checks if a given a URL exists or not."""
455
    request = requests.get(url)
2✔
456
    return request.status_code < 400
2✔
457

458

459
def _ftps_retrieve(url, path, reporthook, auth=None, timeout=None):
9✔
460
    """ Wrapper around ftplib to download from FTPS server
461
    """
462

463
    if not auth:
×
464
        raise DownloadCredentialsMissingException('No authentication '
×
465
                                                  'credentials given!')
466

467
    upar = urlparse(url)
×
468

469
    # Decide if Implicit or Explicit FTPS is used based on the port in url
470
    if upar.port == 990:
×
471
        ftps = ImplicitFTPTLS()
×
472
    elif upar.port == 21:
×
473
        ftps = ftplib.FTP_TLS()
×
474

475
    try:
×
476
        # establish ssl connection
477
        ftps.connect(host=upar.hostname, port=upar.port, timeout=timeout)
×
478
        ftps.login(user=auth[0], passwd=auth[1])
×
479
        ftps.prot_p()
×
480

481
        logger.info('Established connection %s' % upar.hostname)
×
482

483
        # meta for progress bar size
484
        count = 0
×
485
        total = ftps.size(upar.path)
×
486
        bs = 12*1024
×
487

488
        def _ftps_progress(data):
×
489
            outfile.write(data)
×
490
            nonlocal count
491
            count += 1
×
492
            reporthook(count, count*bs, total)
×
493

494
        with open(path, 'wb') as outfile:
×
495
            ftps.retrbinary('RETR ' + upar.path, _ftps_progress, blocksize=bs)
×
496

497
    except (ftplib.error_perm, socket.timeout, socket.gaierror) as err:
×
498
        raise FTPSDownloadError(err)
×
499
    finally:
500
        ftps.close()
×
501

502

503
def _get_url_cache_name(url):
9✔
504
    """Returns the cache name for any given url.
505
    """
506

507
    res = urlparse(url)
8✔
508
    return res.netloc.split(':', 1)[0] + res.path
8✔
509

510

511
def oggm_urlretrieve(url, cache_obj_name=None, reset=False,
9✔
512
                     reporthook=None, auth=None, timeout=None):
513
    """Wrapper around urlretrieve, to implement our caching logic.
514

515
    Instead of accepting a destination path, it decided where to store the file
516
    and returns the local path.
517

518
    auth is expected to be either a tuple of ('username', 'password') or None.
519
    """
520

521
    if cache_obj_name is None:
8✔
522
        cache_obj_name = _get_url_cache_name(url)
8✔
523

524
    def _dlf(cache_path):
8✔
525
        logger.info("Downloading %s to %s..." % (url, cache_path))
8✔
526
        try:
8✔
527
            _requests_urlretrieve(url, cache_path, reporthook, auth, timeout)
8✔
528
        except requests.exceptions.InvalidSchema:
×
529
            if 'ftps://' in url:
×
530
                _ftps_retrieve(url, cache_path, reporthook, auth, timeout)
×
531
            else:
532
                _classic_urlretrieve(url, cache_path, reporthook, auth,
×
533
                                     timeout)
534
        return cache_path
8✔
535

536
    return _verified_download_helper(cache_obj_name, _dlf, reset)
8✔
537

538

539
def _progress_urlretrieve(url, cache_name=None, reset=False,
9✔
540
                          auth=None, timeout=None):
541
    """Downloads a file, returns its local path, and shows a progressbar."""
542

543
    try:
8✔
544
        from progressbar import DataTransferBar, UnknownLength
8✔
545
        pbar = None
8✔
546

547
        def _upd(count, size, total):
8✔
548
            nonlocal pbar
549
            if pbar is None:
8✔
550
                pbar = DataTransferBar()
8✔
551
                if not pbar.is_terminal:
8✔
552
                    pbar.min_poll_interval = 15
8✔
553
            if pbar.max_value is None:
8✔
554
                if total > 0:
8✔
555
                    pbar.start(total)
8✔
556
                else:
557
                    pbar.start(UnknownLength)
2✔
558
            pbar.update(min(count * size, total))
8✔
559
            sys.stdout.flush()
8✔
560
        res = oggm_urlretrieve(url, cache_obj_name=cache_name, reset=reset,
8✔
561
                               reporthook=_upd, auth=auth, timeout=timeout)
562
        try:
8✔
563
            pbar.finish()
8✔
564
        except BaseException:
6✔
565
            pass
6✔
566
        return res
8✔
567
    except ImportError:
568
        return oggm_urlretrieve(url, cache_obj_name=cache_name,
569
                                reset=reset, auth=auth, timeout=timeout)
570

571

572
def aws_file_download(aws_path, cache_name=None, reset=False):
9✔
573
    with get_lock():
×
574
        return _aws_file_download_unlocked(aws_path, cache_name, reset)
×
575

576

577
def _aws_file_download_unlocked(aws_path, cache_name=None, reset=False):
9✔
578
    """Download a file from the AWS drive s3://astgtmv2/
579

580
    **Note:** you need AWS credentials for this to work.
581

582
    Parameters
583
    ----------
584
    aws_path: path relative to s3://astgtmv2/
585
    """
586

587
    while aws_path.startswith('/'):
×
588
        aws_path = aws_path[1:]
×
589

590
    if cache_name is not None:
×
591
        cache_obj_name = cache_name
×
592
    else:
593
        cache_obj_name = 'astgtmv2/' + aws_path
×
594

595
    def _dlf(cache_path):
×
596
        raise NotImplementedError("Downloads from AWS are no longer supported")
597

598
    return _verified_download_helper(cache_obj_name, _dlf, reset)
×
599

600

601
def file_downloader(www_path, retry_max=3, sleep_on_retry=5,
9✔
602
                    cache_name=None, reset=False, auth=None,
603
                    timeout=None):
604
    """A slightly better downloader: it tries more than once."""
605

606
    local_path = None
8✔
607
    retry_counter = 0
8✔
608
    while retry_counter < retry_max:
8✔
609
        # Try to download
610
        try:
8✔
611
            retry_counter += 1
8✔
612
            local_path = _progress_urlretrieve(www_path, cache_name=cache_name,
8✔
613
                                               reset=reset, auth=auth,
614
                                               timeout=timeout)
615
            # if no error, exit
616
            break
8✔
617
        except HttpDownloadError as err:
1✔
618
            # This works well for py3
619
            if err.code == 404 or err.code == 300:
×
620
                # Ok so this *should* be an ocean tile
621
                return None
×
622
            elif err.code >= 500 and err.code < 600:
×
623
                logger.info(f"Downloading {www_path} failed with "
×
624
                            f"HTTP error {err.code}, "
625
                            f"retrying in {sleep_on_retry} seconds... "
626
                            f"{retry_counter}/{retry_max}")
627
                time.sleep(sleep_on_retry)
×
628
                continue
×
629
            else:
630
                raise
×
631
        except HttpContentTooShortError as err:
1✔
632
            logger.info("Downloading %s failed with ContentTooShortError"
×
633
                        " error %s, retrying in %s seconds... %s/%s" %
634
                        (www_path, err.code, sleep_on_retry, retry_counter, retry_max))
635
            time.sleep(sleep_on_retry)
×
636
            continue
×
637
        except DownloadVerificationFailedException as err:
1✔
638
            if (cfg.PATHS['dl_cache_dir'] and
1✔
639
                  err.path.startswith(cfg.PATHS['dl_cache_dir']) and
640
                  cfg.PARAMS['dl_cache_readonly']):
641
                if not cache_name:
×
642
                    cache_name = _get_url_cache_name(www_path)
×
643
                cache_name = "GLOBAL_CACHE_INVALID/" + cache_name
×
644
                retry_counter -= 1
×
645
                logger.info("Global cache for %s is invalid!")
×
646
            else:
647
                try:
1✔
648
                    os.remove(err.path)
1✔
649
                except FileNotFoundError:
×
650
                    pass
×
651
                logger.info("Downloading %s failed with "
1✔
652
                            "DownloadVerificationFailedException\n %s\n"
653
                            "The file might have changed or is corrupted. "
654
                            "File deleted. Re-downloading... %s/%s" %
655
                            (www_path, err.msg, retry_counter, retry_max))
656
            continue
1✔
657
        except requests.ConnectionError as err:
1✔
658
            if err.args[0].__class__.__name__ == 'MaxRetryError':
×
659
                # if request tried often enough we don't have to do this
660
                # this error does happen for not existing ASTERv3 files
661
                return None
×
662
            else:
663
                # in other cases: try again
664
                logger.info("Downloading %s failed with ConnectionError, "
×
665
                            "retrying in %s seconds... %s/%s" %
666
                            (www_path, sleep_on_retry, retry_counter, retry_max))
667
                time.sleep(sleep_on_retry)
×
668
                continue
×
669
        except FTPSDownloadError as err:
1✔
670
            logger.info("Downloading %s failed with FTPSDownloadError"
×
671
                        " error: '%s', retrying in %s seconds... %s/%s" %
672
                        (www_path, err.orgerr, sleep_on_retry, retry_counter, retry_max))
673
            time.sleep(sleep_on_retry)
×
674
            continue
×
675

676
    # See if we managed (fail is allowed)
677
    if not local_path or not os.path.exists(local_path):
8✔
678
        logger.warning('Downloading %s failed.' % www_path)
×
679

680
    return local_path
8✔
681

682

683
def locked_func(func):
9✔
684
    """To decorate a function that needs to be locked for multiprocessing"""
685
    @wraps(func)
9✔
686
    def wrapper(*args, **kwargs):
9✔
687
        with get_lock():
6✔
688
            return func(*args, **kwargs)
6✔
689
    return wrapper
9✔
690

691

692
def file_extractor(file_path):
9✔
693
    """For archives with only one file inside extract the file to tmpdir."""
694

695
    filename, file_extension = os.path.splitext(file_path)
6✔
696
    # Second one for tar.gz files
697
    f2, ex2 = os.path.splitext(filename)
6✔
698
    if ex2 == '.tar':
6✔
699
        filename, file_extension = f2, '.tar.gz'
1✔
700
    bname = os.path.basename(file_path)
6✔
701

702
    # This is to give a unique name to the tmp file
703
    hid = hashlib.md5(file_path.encode()).hexdigest()[:7] + '_'
6✔
704

705
    # extract directory
706
    tmpdir = cfg.PATHS['tmp_dir']
6✔
707
    mkdir(tmpdir)
6✔
708

709
    # Check output extension
710
    def _check_ext(f):
6✔
711
        _, of_ext = os.path.splitext(f)
6✔
712
        if of_ext not in ['.nc', '.tif']:
6✔
713
            raise InvalidParamsError('Extracted file extension not recognized'
×
714
                                     ': {}'.format(of_ext))
715
        return of_ext
6✔
716

717
    if file_extension == '.zip':
6✔
718
        with zipfile.ZipFile(file_path) as zf:
6✔
719
            members = zf.namelist()
6✔
720
            if len(members) != 1:
6✔
721
                raise RuntimeError('Cannot extract multiple files')
×
722
            o_name = hid + members[0]
6✔
723
            o_path = os.path.join(tmpdir, o_name)
6✔
724
            of_ext = _check_ext(o_path)
6✔
725
            if not os.path.exists(o_path):
6✔
726
                logger.info('Extracting {} to {}...'.format(bname, o_path))
6✔
727
                with open(o_path, 'wb') as f:
6✔
728
                    f.write(zf.read(members[0]))
6✔
729
    elif file_extension == '.gz':
4✔
730
        # Gzip files cannot be inspected. It's always only one file
731
        # Decide on its name
732
        o_name = hid + os.path.basename(filename)
2✔
733
        o_path = os.path.join(tmpdir, o_name)
2✔
734
        of_ext = _check_ext(o_path)
2✔
735
        if not os.path.exists(o_path):
2✔
736
            logger.info('Extracting {} to {}...'.format(bname, o_path))
2✔
737
            with gzip.GzipFile(file_path) as zf:
2✔
738
                with open(o_path, 'wb') as outfile:
2✔
739
                    for line in zf:
2✔
740
                        outfile.write(line)
2✔
741
    elif file_extension == '.bz2':
4✔
742
        # bzip2 files cannot be inspected. It's always only one file
743
        # Decide on its name
744
        o_name = hid + os.path.basename(filename)
4✔
745
        o_path = os.path.join(tmpdir, o_name)
4✔
746
        of_ext = _check_ext(o_path)
4✔
747
        if not os.path.exists(o_path):
4✔
748
            logger.info('Extracting {} to {}...'.format(bname, o_path))
4✔
749
            with bz2.open(file_path) as zf:
4✔
750
                with open(o_path, 'wb') as outfile:
4✔
751
                    for line in zf:
4✔
752
                        outfile.write(line)
4✔
753
    elif file_extension in ['.tar.gz', '.tar']:
1✔
754
        with tarfile.open(file_path) as zf:
1✔
755
            members = zf.getmembers()
1✔
756
            if len(members) != 1:
1✔
757
                raise RuntimeError('Cannot extract multiple files')
×
758
            o_name = hid + members[0].name
1✔
759
            o_path = os.path.join(tmpdir, o_name)
1✔
760
            of_ext = _check_ext(o_path)
1✔
761
            if not os.path.exists(o_path):
1✔
762
                logger.info('Extracting {} to {}...'.format(bname, o_path))
1✔
763
                with open(o_path, 'wb') as f:
1✔
764
                    f.write(zf.extractfile(members[0]).read())
1✔
765
    else:
766
        raise InvalidParamsError('Extension not recognized: '
1✔
767
                                 '{}'.format(file_extension))
768

769
    # Be sure we don't overfill the folder
770
    cfg.get_lru_handler(tmpdir, ending=of_ext).append(o_path)
6✔
771

772
    return o_path
6✔
773

774

775
def download_with_authentication(wwwfile, key):
9✔
776
    """ Uses credentials from a local .netrc file to download files
777

778
    Parameters
779
    ----------
780
    wwwfile : str
781
        path to the file to download
782
    key : str
783
        the machine to to look at in the .netrc file
784

785
    Returns
786
    -------
787

788
    """
789
    # Check the cache first. Use dummy download function to assure nothing is
790
    # tried to be downloaded without credentials:
791

792
    def _always_none(foo):
×
793
        return None
×
794

795
    cache_obj_name = _get_url_cache_name(wwwfile)
×
796
    dest_file = _verified_download_helper(cache_obj_name, _always_none)
×
797

798
    # Grab auth parameters
799
    if not dest_file:
×
800
        authfile = os.path.expanduser('~/.netrc')
×
801

802
        if not os.path.isfile(authfile):
×
803
            raise DownloadCredentialsMissingException(
×
804
                (authfile, ' does not exist. Add necessary credentials for ',
805
                 key, ' with `oggm_netrc_credentials. You may have to ',
806
                 'register at the respective service first.'))
807

808
        try:
×
809
            netrc(authfile).authenticators(key)[0]
×
810
        except TypeError:
×
811
            raise DownloadCredentialsMissingException(
×
812
                ('Credentials for ', key, ' are not in ', authfile, '. Add ',
813
                 'credentials for with `oggm_netrc_credentials`.'))
814

815
        dest_file = file_downloader(
×
816
            wwwfile, auth=(netrc(authfile).authenticators(key)[0],
817
                           netrc(authfile).authenticators(key)[2]))
818

819
    return dest_file
×
820

821

822
def download_oggm_files():
9✔
823
    with get_lock():
8✔
824
        return _download_oggm_files_unlocked()
8✔
825

826

827
def _download_oggm_files_unlocked():
9✔
828
    """Checks if the demo data is already on the cache and downloads it."""
829

830
    zip_url = 'https://github.com/%s/archive/%s.zip' % \
8✔
831
              (SAMPLE_DATA_GH_REPO, SAMPLE_DATA_COMMIT)
832
    odir = os.path.join(cfg.CACHE_DIR)
8✔
833
    sdir = os.path.join(cfg.CACHE_DIR,
8✔
834
                        'oggm-sample-data-%s' % SAMPLE_DATA_COMMIT)
835

836
    # download only if necessary
837
    if not os.path.exists(sdir):
8✔
838
        ofile = file_downloader(zip_url)
8✔
839
        with zipfile.ZipFile(ofile) as zf:
8✔
840
            zf.extractall(odir)
8✔
841
        assert os.path.isdir(sdir)
8✔
842

843
    # list of files for output
844
    out = dict()
8✔
845
    for root, directories, filenames in os.walk(sdir):
8✔
846
        for filename in filenames:
8✔
847
            if filename in out:
8✔
848
                # This was a stupid thing, and should not happen
849
                # TODO: duplicates in sample data...
850
                k = os.path.join(os.path.basename(root), filename)
8✔
851
                assert k not in out
8✔
852
                out[k] = os.path.join(root, filename)
8✔
853
            else:
854
                out[filename] = os.path.join(root, filename)
8✔
855

856
    return out
8✔
857

858

859
def _download_srtm_file(zone):
9✔
860
    with get_lock():
1✔
861
        return _download_srtm_file_unlocked(zone)
1✔
862

863

864
def _download_srtm_file_unlocked(zone):
9✔
865
    """Checks if the srtm data is in the directory and if not, download it.
866
    """
867

868
    # extract directory
869
    tmpdir = cfg.PATHS['tmp_dir']
1✔
870
    mkdir(tmpdir)
1✔
871
    outpath = os.path.join(tmpdir, 'srtm_' + zone + '.tif')
1✔
872

873
    # check if extracted file exists already
874
    if os.path.exists(outpath):
1✔
875
        return outpath
×
876

877
    # Did we download it yet?
878
    wwwfile = ('http://srtm.csi.cgiar.org/wp-content/uploads/files/srtm_5x5/'
1✔
879
               'TIFF/srtm_' + zone + '.zip')
880
    dest_file = file_downloader(wwwfile)
1✔
881

882
    # None means we tried hard but we couldn't find it
883
    if not dest_file:
1✔
884
        return None
×
885

886
    # ok we have to extract it
887
    if not os.path.exists(outpath):
1✔
888
        with zipfile.ZipFile(dest_file) as zf:
1✔
889
            zf.extractall(tmpdir)
1✔
890

891
    # See if we're good, don't overfill the tmp directory
892
    assert os.path.exists(outpath)
1✔
893
    cfg.get_lru_handler(tmpdir).append(outpath)
1✔
894
    return outpath
1✔
895

896

897
def _download_nasadem_file(zone):
9✔
898
    with get_lock():
1✔
899
        return _download_nasadem_file_unlocked(zone)
1✔
900

901

902
def _download_nasadem_file_unlocked(zone):
9✔
903
    """Checks if the NASADEM data is in the directory and if not, download it.
904
    """
905

906
    # extract directory
907
    tmpdir = cfg.PATHS['tmp_dir']
1✔
908
    mkdir(tmpdir)
1✔
909
    wwwfile = ('https://e4ftl01.cr.usgs.gov/MEASURES/NASADEM_HGT.001/'
1✔
910
               '2000.02.11/NASADEM_HGT_{}.zip'.format(zone))
911
    demfile = '{}.hgt'.format(zone)
1✔
912
    outpath = os.path.join(tmpdir, demfile)
1✔
913

914
    # check if extracted file exists already
915
    if os.path.exists(outpath):
1✔
916
        return outpath
×
917

918
    # Did we download it yet?
919
    dest_file = file_downloader(wwwfile)
1✔
920

921
    # None means we tried hard but we couldn't find it
922
    if not dest_file:
1✔
923
        return None
×
924

925
    # ok we have to extract it
926
    if not os.path.exists(outpath):
1✔
927
        with zipfile.ZipFile(dest_file) as zf:
1✔
928
            zf.extract(demfile, path=tmpdir)
1✔
929

930
    # See if we're good, don't overfill the tmp directory
931
    assert os.path.exists(outpath)
1✔
932
    cfg.get_lru_handler(tmpdir).append(outpath)
1✔
933
    return outpath
1✔
934

935

936
def _download_tandem_file(zone):
9✔
937
    with get_lock():
1✔
938
        return _download_tandem_file_unlocked(zone)
1✔
939

940

941
def _download_tandem_file_unlocked(zone):
9✔
942
    """Checks if the tandem data is in the directory and if not, download it.
943
    """
944

945
    # extract directory
946
    tmpdir = cfg.PATHS['tmp_dir']
1✔
947
    mkdir(tmpdir)
1✔
948
    bname = zone.split('/')[-1] + '_DEM.tif'
1✔
949
    wwwfile = ('https://download.geoservice.dlr.de/TDM90/files/DEM/'
1✔
950
               '{}.zip'.format(zone))
951
    outpath = os.path.join(tmpdir, bname)
1✔
952

953
    # check if extracted file exists already
954
    if os.path.exists(outpath):
1✔
955
        return outpath
×
956

957
    dest_file = download_with_authentication(wwwfile, 'geoservice.dlr.de')
1✔
958

959
    # That means we tried hard but we couldn't find it
960
    if not dest_file:
1✔
961
        return None
×
962
    elif not zipfile.is_zipfile(dest_file):
1✔
963
        # If the TanDEM-X tile does not exist, a invalid file is created.
964
        # See https://github.com/OGGM/oggm/issues/893 for more details
965
        return None
1✔
966

967
    # ok we have to extract it
968
    if not os.path.exists(outpath):
1✔
969
        with zipfile.ZipFile(dest_file) as zf:
1✔
970
            for fn in zf.namelist():
1✔
971
                if 'DEM/' + bname in fn:
1✔
972
                    break
×
973
            with open(outpath, 'wb') as fo:
1✔
974
                fo.write(zf.read(fn))
1✔
975

976
    # See if we're good, don't overfill the tmp directory
977
    assert os.path.exists(outpath)
1✔
978
    cfg.get_lru_handler(tmpdir).append(outpath)
1✔
979
    return outpath
1✔
980

981

982
def _download_dem3_viewpano(zone):
9✔
983
    with get_lock():
1✔
984
        return _download_dem3_viewpano_unlocked(zone)
1✔
985

986

987
def _download_dem3_viewpano_unlocked(zone):
9✔
988
    """Checks if the DEM3 data is in the directory and if not, download it.
989
    """
990

991
    # extract directory
992
    tmpdir = cfg.PATHS['tmp_dir']
1✔
993
    mkdir(tmpdir)
1✔
994
    outpath = os.path.join(tmpdir, zone + '.tif')
1✔
995
    extract_dir = os.path.join(tmpdir, 'tmp_' + zone)
1✔
996
    mkdir(extract_dir, reset=True)
1✔
997

998
    # check if extracted file exists already
999
    if os.path.exists(outpath):
1✔
1000
        return outpath
×
1001

1002
    # OK, so see if downloaded already
1003
    # some files have a newer version 'v2'
1004
    if zone in ['R33', 'R34', 'R35', 'R36', 'R37', 'R38', 'Q32', 'Q33', 'Q34',
1✔
1005
                'Q35', 'Q36', 'Q37', 'Q38', 'Q39', 'Q40', 'P31', 'P32', 'P33',
1006
                'P34', 'P35', 'P36', 'P37', 'P38', 'P39', 'P40']:
1007
        ifile = 'http://viewfinderpanoramas.org/dem3/' + zone + 'v2.zip'
×
1008
    elif zone in DEM3REG.keys():
1✔
1009
        # We prepared these files as tif already
1010
        ifile = ('https://cluster.klima.uni-bremen.de/~oggm/dem/'
1✔
1011
                 'DEM3_MERGED/{}.tif'.format(zone))
1012
        return file_downloader(ifile)
1✔
1013
    else:
1014
        ifile = 'http://viewfinderpanoramas.org/dem3/' + zone + '.zip'
1✔
1015

1016
    dfile = file_downloader(ifile)
1✔
1017

1018
    # None means we tried hard but we couldn't find it
1019
    if not dfile:
1✔
1020
        return None
×
1021

1022
    # ok we have to extract it
1023
    with zipfile.ZipFile(dfile) as zf:
1✔
1024
        zf.extractall(extract_dir)
1✔
1025

1026
    # Serious issue: sometimes, if a southern hemisphere URL is queried for
1027
    # download and there is none, a NH zip file is downloaded.
1028
    # Example: http://viewfinderpanoramas.org/dem3/SN29.zip yields N29!
1029
    # BUT: There are southern hemisphere files that download properly. However,
1030
    # the unzipped folder has the file name of
1031
    # the northern hemisphere file. Some checks if correct file exists:
1032
    if len(zone) == 4 and zone.startswith('S'):
1✔
1033
        zonedir = os.path.join(extract_dir, zone[1:])
×
1034
    else:
1035
        zonedir = os.path.join(extract_dir, zone)
1✔
1036
    globlist = glob.glob(os.path.join(zonedir, '*.hgt'))
1✔
1037

1038
    # take care of the special file naming cases
1039
    if zone in DEM3REG.keys():
1✔
1040
        globlist = glob.glob(os.path.join(extract_dir, '*', '*.hgt'))
×
1041

1042
    if not globlist:
1✔
1043
        # Final resort
1044
        globlist = (findfiles(extract_dir, '.hgt') or
×
1045
                    findfiles(extract_dir, '.HGT'))
1046
        if not globlist:
×
1047
            raise RuntimeError("We should have some files here, but we don't")
×
1048

1049
    # merge the single HGT files (can be a bit ineffective, because not every
1050
    # single file might be exactly within extent...)
1051
    rfiles = [rasterio.open(s) for s in globlist]
1✔
1052
    dest, output_transform = merge_tool(rfiles)
1✔
1053
    profile = rfiles[0].profile
1✔
1054
    if 'affine' in profile:
1✔
1055
        profile.pop('affine')
×
1056
    profile['transform'] = output_transform
1✔
1057
    profile['height'] = dest.shape[1]
1✔
1058
    profile['width'] = dest.shape[2]
1✔
1059
    profile['driver'] = 'GTiff'
1✔
1060
    with rasterio.open(outpath, 'w', **profile) as dst:
1✔
1061
        dst.write(dest)
1✔
1062
    for rf in rfiles:
1✔
1063
        rf.close()
1✔
1064

1065
    # delete original files to spare disk space
1066
    for s in globlist:
1✔
1067
        os.remove(s)
1✔
1068
    del_empty_dirs(tmpdir)
1✔
1069

1070
    # See if we're good, don't overfill the tmp directory
1071
    assert os.path.exists(outpath)
1✔
1072
    cfg.get_lru_handler(tmpdir).append(outpath)
1✔
1073
    return outpath
1✔
1074

1075

1076
def _download_aster_file(zone):
9✔
1077
    with get_lock():
1✔
1078
        return _download_aster_file_unlocked(zone)
1✔
1079

1080

1081
def _download_aster_file_unlocked(zone):
9✔
1082
    """Checks if the ASTER data is in the directory and if not, download it.
1083
    """
1084

1085
    # extract directory
1086
    tmpdir = cfg.PATHS['tmp_dir']
1✔
1087
    mkdir(tmpdir)
1✔
1088
    wwwfile = ('https://e4ftl01.cr.usgs.gov/ASTT/ASTGTM.003/'
1✔
1089
               '2000.03.01/{}.zip'.format(zone))
1090
    outpath = os.path.join(tmpdir, zone + '_dem.tif')
1✔
1091

1092
    # check if extracted file exists already
1093
    if os.path.exists(outpath):
1✔
1094
        return outpath
×
1095

1096
    # download from NASA Earthdata with credentials
1097
    dest_file = download_with_authentication(wwwfile, 'urs.earthdata.nasa.gov')
1✔
1098

1099
    # That means we tried hard but we couldn't find it
1100
    if not dest_file:
1✔
1101
        return None
×
1102

1103
    # ok we have to extract it
1104
    if not os.path.exists(outpath):
1✔
1105
        with zipfile.ZipFile(dest_file) as zf:
1✔
1106
            zf.extractall(tmpdir)
1✔
1107

1108
    # See if we're good, don't overfill the tmp directory
1109
    assert os.path.exists(outpath)
1✔
1110
    cfg.get_lru_handler(tmpdir).append(outpath)
1✔
1111
    return outpath
1✔
1112

1113

1114
def _download_topo_file_from_cluster(fname):
9✔
1115
    with get_lock():
×
1116
        return _download_topo_file_from_cluster_unlocked(fname)
×
1117

1118

1119
def _download_topo_file_from_cluster_unlocked(fname):
9✔
1120
    """Checks if the special topo data is in the directory and if not,
1121
    download it from the cluster.
1122
    """
1123

1124
    # extract directory
1125
    tmpdir = cfg.PATHS['tmp_dir']
×
1126
    mkdir(tmpdir)
×
1127
    outpath = os.path.join(tmpdir, fname)
×
1128

1129
    url = 'https://cluster.klima.uni-bremen.de/data/dems/'
×
1130
    url += fname + '.zip'
×
1131
    dfile = file_downloader(url)
×
1132

1133
    if not os.path.exists(outpath):
×
1134
        logger.info('Extracting ' + fname + '.zip to ' + outpath + '...')
×
1135
        with zipfile.ZipFile(dfile) as zf:
×
1136
            zf.extractall(tmpdir)
×
1137

1138
    # See if we're good, don't overfill the tmp directory
1139
    assert os.path.exists(outpath)
×
1140
    cfg.get_lru_handler(tmpdir).append(outpath)
×
1141
    return outpath
×
1142

1143

1144
def _download_copdem_file(tileurl):
9✔
1145
    with get_lock():
×
NEW
1146
        return _download_copdem_file_unlocked(tileurl)
×
1147

1148

1149
def _download_copdem_file_unlocked(tileurl):
9✔
1150
    """Checks if Copernicus DEM file is in the directory, if not download it.
1151
    """
NEW
1152
    if not tileurl:
×
NEW
1153
        return None
×
NEW
1154
    dest_file = file_downloader(tileurl)
×
1155
    # None means we tried hard but we couldn't find it
1156
    if not dest_file:
×
1157
        return None
×
NEW
1158
    return dest_file
×
1159

1160

1161
def _download_aw3d30_file(zone):
9✔
1162
    with get_lock():
1✔
1163
        return _download_aw3d30_file_unlocked(zone)
1✔
1164

1165

1166
def _download_aw3d30_file_unlocked(fullzone):
9✔
1167
    """Checks if the AW3D30 data is in the directory and if not, download it.
1168
    """
1169

1170
    # extract directory
1171
    tmpdir = cfg.PATHS['tmp_dir']
1✔
1172
    mkdir(tmpdir)
1✔
1173

1174
    # tarfiles are extracted in directories per each tile
1175
    tile = fullzone.split('/')[1]
1✔
1176
    demfile = os.path.join(tmpdir, tile, tile + '_AVE_DSM.tif')
1✔
1177

1178
    # check if extracted file exists already
1179
    if os.path.exists(demfile):
1✔
1180
        return demfile
×
1181

1182
    # Did we download it yet?
1183
    ftpfile = ('ftp://ftp.eorc.jaxa.jp/pub/ALOS/ext1/AW3D30/release_v1804/'
1✔
1184
               + fullzone + '.tar.gz')
1185
    try:
1✔
1186
        dest_file = file_downloader(ftpfile, timeout=180)
1✔
1187
    except urllib.error.URLError:
×
1188
        # This error is raised if file is not available, could be water
1189
        return None
×
1190

1191
    # None means we tried hard but we couldn't find it
1192
    if not dest_file:
1✔
1193
        return None
×
1194

1195
    # ok we have to extract it
1196
    if not os.path.exists(demfile):
1✔
1197
        from oggm.utils import robust_tar_extract
1✔
1198
        dempath = os.path.dirname(demfile)
1✔
1199
        robust_tar_extract(dest_file, dempath)
1✔
1200

1201
    # See if we're good, don't overfill the tmp directory
1202
    assert os.path.exists(demfile)
1✔
1203
    # this tarfile contains several files
1204
    for file in os.listdir(dempath):
1✔
1205
        cfg.get_lru_handler(tmpdir).append(os.path.join(dempath, file))
1✔
1206
    return demfile
1✔
1207

1208

1209
def _download_mapzen_file(zone):
9✔
1210
    with get_lock():
1✔
1211
        return _download_mapzen_file_unlocked(zone)
1✔
1212

1213

1214
def _download_mapzen_file_unlocked(zone):
9✔
1215
    """Checks if the mapzen data is in the directory and if not, download it.
1216
    """
1217
    bucket = 'elevation-tiles-prod'
1✔
1218
    prefix = 'geotiff'
1✔
1219
    url = 'http://s3.amazonaws.com/%s/%s/%s' % (bucket, prefix, zone)
1✔
1220

1221
    # That's all
1222
    return file_downloader(url, timeout=180)
1✔
1223

1224

1225
def get_prepro_gdir(rgi_version, rgi_id, border, prepro_level, base_url=None):
9✔
1226
    with get_lock():
2✔
1227
        return _get_prepro_gdir_unlocked(rgi_version, rgi_id, border,
2✔
1228
                                         prepro_level, base_url=base_url)
1229

1230

1231
def get_prepro_base_url(base_url=None, rgi_version=None, border=None,
9✔
1232
                        prepro_level=None):
1233
    """Extended base url where to find the desired gdirs."""
1234

1235
    if base_url is None:
2✔
1236
        raise InvalidParamsError('Starting with v1.6, users now have to '
×
1237
                                 'explicitly indicate the url they want '
1238
                                 'to start from.')
1239

1240
    if not base_url.endswith('/'):
2✔
1241
        base_url += '/'
×
1242

1243
    if rgi_version is None:
2✔
1244
        rgi_version = cfg.PARAMS['rgi_version']
1✔
1245

1246
    if border is None:
2✔
1247
        border = cfg.PARAMS['border']
1✔
1248

1249
    url = base_url
2✔
1250
    url += 'RGI{}/'.format(rgi_version)
2✔
1251
    url += 'b_{:03d}/'.format(int(border))
2✔
1252
    url += 'L{:d}/'.format(prepro_level)
2✔
1253
    return url
2✔
1254

1255

1256
def _get_prepro_gdir_unlocked(rgi_version, rgi_id, border, prepro_level,
9✔
1257
                              base_url=None):
1258

1259
    url = get_prepro_base_url(rgi_version=rgi_version, border=border,
2✔
1260
                              prepro_level=prepro_level, base_url=base_url)
1261
    if len(rgi_id) == 23:
2✔
1262
        # RGI7
1263
        url += '{}/{}.tar'.format(rgi_id[:17], rgi_id[:20])
×
1264
    else:
1265
        url += '{}/{}.tar'.format(rgi_id[:8], rgi_id[:11])
2✔
1266
    tar_base = file_downloader(url)
2✔
1267
    if tar_base is None:
2✔
1268
        raise RuntimeError('Could not find file at ' + url)
×
1269

1270
    return tar_base
2✔
1271

1272

1273
def get_geodetic_mb_dataframe(file_path=None):
9✔
1274
    """Fetches the reference geodetic dataframe for calibration.
1275

1276
    Currently that's the data from Hughonnet et al 2021, corrected for
1277
    outliers and with void filled. The data preparation script is
1278
    available at
1279
    https://nbviewer.jupyter.org/urls/cluster.klima.uni-bremen.de/~oggm/geodetic_ref_mb/convert.ipynb
1280

1281
    Parameters
1282
    ----------
1283
    file_path : str
1284
        in case you have your own file to parse (check the format first!)
1285

1286
    Returns
1287
    -------
1288
    a DataFrame with the data.
1289
    """
1290

1291
    # fetch the file online or read custom file
1292
    if file_path is None:
7✔
1293
        base_url = 'https://cluster.klima.uni-bremen.de/~oggm/geodetic_ref_mb/'
7✔
1294
        file_name = 'hugonnet_2021_ds_rgi60_pergla_rates_10_20_worldwide_filled.hdf'
7✔
1295
        file_path = file_downloader(base_url + file_name)
7✔
1296

1297
    # Did we open it yet?
1298
    if file_path in cfg.DATA:
7✔
1299
        return cfg.DATA[file_path]
4✔
1300

1301
    # If not let's go
1302
    extension = os.path.splitext(file_path)[1]
7✔
1303
    if extension == '.csv':
7✔
1304
        df = pd.read_csv(file_path, index_col=0)
×
1305
    elif extension == '.hdf':
7✔
1306
        df = pd.read_hdf(file_path)
7✔
1307

1308
    # Check for missing data (old files)
1309
    if len(df.loc[df['dmdtda'].isnull()]) > 0:
7✔
1310
        raise InvalidParamsError('The reference file you are using has missing '
×
1311
                                 'data and is probably outdated (sorry for '
1312
                                 'that). Delete the file at '
1313
                                 f'{file_path} and start again.')
1314
    cfg.DATA[file_path] = df
7✔
1315
    return df
7✔
1316

1317

1318
def get_temp_bias_dataframe(dataset='w5e5'):
9✔
1319
    """Fetches the reference geodetic dataframe for calibration.
1320

1321
    Currently, that's the data from Hughonnet et al. (2021), corrected for
1322
    outliers and with void filled. The data preparation script is
1323
    available at
1324
    https://nbviewer.jupyter.org/urls/cluster.klima.uni-bremen.de/~oggm/geodetic_ref_mb/convert.ipynb
1325

1326
    Parameters
1327
    ----------
1328
    file_path : str
1329
        in case you have your own file to parse (check the format first!)
1330

1331
    Returns
1332
    -------
1333
    a DataFrame with the data.
1334
    """
1335

1336
    if dataset != 'w5e5':
1✔
1337
        raise NotImplementedError(f'No such dataset available yet: {dataset}')
1338

1339
    # fetch the file online
1340
    base_url = ('https://cluster.klima.uni-bremen.de/~oggm/ref_mb_params/oggm_v1.6/'
1✔
1341
                'w5e5_temp_bias_v2023.4.csv')
1342

1343
    file_path = file_downloader(base_url)
1✔
1344

1345
    # Did we open it yet?
1346
    if file_path in cfg.DATA:
1✔
1347
        return cfg.DATA[file_path]
1✔
1348

1349
    # If not let's go
1350
    extension = os.path.splitext(file_path)[1]
1✔
1351
    if extension == '.csv':
1✔
1352
        df = pd.read_csv(file_path, index_col=0)
1✔
1353
    elif extension == '.hdf':
×
1354
        df = pd.read_hdf(file_path)
×
1355

1356
    cfg.DATA[file_path] = df
1✔
1357
    return df
1✔
1358

1359

1360
def srtm_zone(lon_ex, lat_ex):
9✔
1361
    """Returns a list of SRTM zones covering the desired extent.
1362
    """
1363

1364
    # SRTM are sorted in tiles of 5 degrees
1365
    srtm_x0 = -180.
1✔
1366
    srtm_y0 = 60.
1✔
1367
    srtm_dx = 5.
1✔
1368
    srtm_dy = -5.
1✔
1369

1370
    # quick n dirty solution to be sure that we will cover the whole range
1371
    mi, ma = np.min(lon_ex), np.max(lon_ex)
1✔
1372
    # int() to avoid Deprec warning:
1373
    lon_ex = np.linspace(mi, ma, int(np.ceil((ma - mi) + 3)))
1✔
1374
    mi, ma = np.min(lat_ex), np.max(lat_ex)
1✔
1375
    # int() to avoid Deprec warning
1376
    lat_ex = np.linspace(mi, ma, int(np.ceil((ma - mi) + 3)))
1✔
1377

1378
    zones = []
1✔
1379
    for lon in lon_ex:
1✔
1380
        for lat in lat_ex:
1✔
1381
            dx = lon - srtm_x0
1✔
1382
            dy = lat - srtm_y0
1✔
1383
            if dy > 0:
1✔
1384
                continue
×
1385
            zx = np.ceil(dx / srtm_dx)
1✔
1386
            zy = np.ceil(dy / srtm_dy)
1✔
1387
            zones.append('{:02.0f}_{:02.0f}'.format(zx, zy))
1✔
1388
    return list(sorted(set(zones)))
1✔
1389

1390

1391
def _tandem_path(lon_tile, lat_tile):
9✔
1392

1393
    # OK we have a proper tile now
1394
    # This changed in December 2022
1395

1396
    # First folder level is sorted from S to N
1397
    level_0 = 'S' if lat_tile < 0 else 'N'
1✔
1398
    level_0 += '{:02d}'.format(abs(lat_tile))
1✔
1399

1400
    # Second folder level is sorted from W to E, but in 10 steps
1401
    level_1 = 'W' if lon_tile < 0 else 'E'
1✔
1402
    level_1 += '{:03d}'.format(divmod(abs(lon_tile), 10)[0] * 10)
1✔
1403

1404
    # Level 2 is formatting, but depends on lat
1405
    level_2 = 'W' if lon_tile < 0 else 'E'
1✔
1406
    if abs(lat_tile) <= 60:
1✔
1407
        level_2 += '{:03d}'.format(abs(lon_tile))
1✔
1408
    elif abs(lat_tile) <= 80:
1✔
1409
        level_2 += '{:03d}'.format(divmod(abs(lon_tile), 2)[0] * 2)
1✔
1410
    else:
1411
        level_2 += '{:03d}'.format(divmod(abs(lon_tile), 4)[0] * 4)
1✔
1412

1413
    # Final path
1414
    out = (level_0 + '/' + level_1 + '/' +
1✔
1415
           'TDM1_DEM__30_{}{}'.format(level_0, level_2))
1416
    return out
1✔
1417

1418

1419
def tandem_zone(lon_ex, lat_ex):
9✔
1420
    """Returns a list of TanDEM-X zones covering the desired extent.
1421
    """
1422

1423
    # Files are one by one tiles, so lets loop over them
1424
    # For higher lats they are stored in steps of 2 and 4. My code below
1425
    # is probably giving more files than needed but better safe than sorry
1426
    lat_tiles = np.arange(np.floor(lat_ex[0]), np.ceil(lat_ex[1]+1e-9),
1✔
1427
                          dtype=int)
1428
    zones = []
1✔
1429
    for lat in lat_tiles:
1✔
1430
        if abs(lat) < 60:
1✔
1431
            l0 = np.floor(lon_ex[0])
1✔
1432
            l1 = np.floor(lon_ex[1])
1✔
1433
        elif abs(lat) < 80:
1✔
1434
            l0 = divmod(lon_ex[0], 2)[0] * 2
1✔
1435
            l1 = divmod(lon_ex[1], 2)[0] * 2
1✔
1436
        elif abs(lat) < 90:
1✔
1437
            l0 = divmod(lon_ex[0], 4)[0] * 4
1✔
1438
            l1 = divmod(lon_ex[1], 4)[0] * 4
1✔
1439
        lon_tiles = np.arange(l0, l1+1, dtype=int)
1✔
1440
        for lon in lon_tiles:
1✔
1441
            zones.append(_tandem_path(lon, lat))
1✔
1442
    return list(sorted(set(zones)))
1✔
1443

1444

1445
def _aw3d30_path(lon_tile, lat_tile):
9✔
1446

1447
    # OK we have a proper tile now
1448

1449
    # Folders are sorted with N E S W in 5 degree steps
1450
    # But in N and E the lower boundary is indicated
1451
    # e.g. N060 contains N060 - N064
1452
    # e.g. E000 contains E000 - E004
1453
    # but S and W indicate the upper boundary:
1454
    # e.g. S010 contains S006 - S010
1455
    # e.g. W095 contains W091 - W095
1456

1457
    # get letters
1458
    ns = 'S' if lat_tile < 0 else 'N'
1✔
1459
    ew = 'W' if lon_tile < 0 else 'E'
1✔
1460

1461
    # get lat/lon
1462
    lon = abs(5 * np.floor(lon_tile/5))
1✔
1463
    lat = abs(5 * np.floor(lat_tile/5))
1✔
1464

1465
    folder = '%s%.3d%s%.3d' % (ns, lat, ew, lon)
1✔
1466
    filename = '%s%.3d%s%.3d' % (ns, abs(lat_tile), ew, abs(lon_tile))
1✔
1467

1468
    # Final path
1469
    out = folder + '/' + filename
1✔
1470
    return out
1✔
1471

1472

1473
def aw3d30_zone(lon_ex, lat_ex):
9✔
1474
    """Returns a list of AW3D30 zones covering the desired extent.
1475
    """
1476

1477
    # Files are one by one tiles, so lets loop over them
1478
    lon_tiles = np.arange(np.floor(lon_ex[0]), np.ceil(lon_ex[1]+1e-9),
1✔
1479
                          dtype=int)
1480
    lat_tiles = np.arange(np.floor(lat_ex[0]), np.ceil(lat_ex[1]+1e-9),
1✔
1481
                          dtype=int)
1482
    zones = []
1✔
1483
    for lon in lon_tiles:
1✔
1484
        for lat in lat_tiles:
1✔
1485
            zones.append(_aw3d30_path(lon, lat))
1✔
1486
    return list(sorted(set(zones)))
1✔
1487

1488

1489
def _extent_to_polygon(lon_ex, lat_ex, to_crs=None):
9✔
1490

1491
    if lon_ex[0] == lon_ex[1] and lat_ex[0] == lat_ex[1]:
1✔
1492
        out = shpg.Point(lon_ex[0], lat_ex[0])
1✔
1493
    else:
1494
        x = [lon_ex[0], lon_ex[1], lon_ex[1], lon_ex[0], lon_ex[0]]
1✔
1495
        y = [lat_ex[0], lat_ex[0], lat_ex[1], lat_ex[1], lat_ex[0]]
1✔
1496
        out = shpg.Polygon(np.array((x, y)).T)
1✔
1497
    if to_crs is not None:
1✔
1498
        out = salem.transform_geometry(out, to_crs=to_crs)
1✔
1499
    return out
1✔
1500

1501

1502
def arcticdem_zone(lon_ex, lat_ex):
9✔
1503
    """Returns a list of Arctic-DEM zones covering the desired extent.
1504
    """
1505

1506
    gdf = gpd.read_file(get_demo_file('ArcticDEM_Tile_Index_Rel7_by_tile.shp'))
1✔
1507
    p = _extent_to_polygon(lon_ex, lat_ex, to_crs=gdf.crs)
1✔
1508
    gdf = gdf.loc[gdf.intersects(p)]
1✔
1509
    return gdf.tile.values if len(gdf) > 0 else []
1✔
1510

1511

1512
def rema_zone(lon_ex, lat_ex):
9✔
1513
    """Returns a list of REMA-DEM zones covering the desired extent.
1514
    """
1515

1516
    gdf = gpd.read_file(get_demo_file('REMA_Tile_Index_Rel1.1.shp'))
1✔
1517
    p = _extent_to_polygon(lon_ex, lat_ex, to_crs=gdf.crs)
1✔
1518
    gdf = gdf.loc[gdf.intersects(p)]
1✔
1519
    return gdf.tile.values if len(gdf) > 0 else []
1✔
1520

1521

1522
def alaska_dem_zone(lon_ex, lat_ex):
9✔
1523
    """Returns a list of Alaska-DEM zones covering the desired extent.
1524
    """
1525

1526
    gdf = gpd.read_file(get_demo_file('Alaska_albers_V3_tiles.shp'))
1✔
1527
    p = _extent_to_polygon(lon_ex, lat_ex, to_crs=gdf.crs)
1✔
1528
    gdf = gdf.loc[gdf.intersects(p)]
1✔
1529
    return gdf.tile.values if len(gdf) > 0 else []
1✔
1530

1531

1532
def _copdem_tilelist():
9✔
1533
    """Fetches the list of tiles."""
1534

1535
    key = 'copdem_tilelist'
1✔
1536

1537
    # Did we open it yet?
1538
    if key in cfg.DATA:
1✔
1539
        return cfg.DATA[key]
1✔
1540

1541
    # If not let's go
1542
    list_url = ('https://cluster.klima.uni-bremen.de/~oggm/'
1✔
1543
                'test_files/copdem/copdem_tiles_202504.csv')
1544
    with open(file_downloader(list_url), 'r') as f:
1✔
1545
        reader = csv.reader(f)
1✔
1546
        next(reader)  # skip header
1✔
1547
        coord_set = set(tuple(row) for row in reader)
1✔
1548

1549
    cfg.DATA[key] = coord_set
1✔
1550
    return coord_set
1✔
1551

1552

1553
def copdem_zone(lon_ex, lat_ex, source):
9✔
1554
    """Returns a list of Copernicus DEM urls and tilenames.
1555

1556
    None if url is not available.
1557

1558
    New: we now go for the AWS bucket from OpenTopography: https://doi.org/10.5069/G9028PQB
1559
    """
1560

1561
    # because we use both meters and arc secs in our filenames...
1562
    dxm = source[-2:]
1✔
1563
    if dxm == '90':
1✔
1564
        asec = '30'
1✔
1565
    elif dxm == '30':
1✔
1566
        asec = '10'
1✔
1567
    else:
1568
        raise InvalidDEMError('COPDEM Version not valid.')
×
1569

1570
    # Available tiles
1571
    tile_list = _copdem_tilelist()
1✔
1572

1573
    # adding small buffer for unlikely case where one lon/lat_ex == xx.0
1574
    lons = np.arange(np.floor(lon_ex[0]-1e-9), np.ceil(lon_ex[1]+1e-9))
1✔
1575
    lats = np.arange(np.floor(lat_ex[0]-1e-9), np.ceil(lat_ex[1]+1e-9))
1✔
1576

1577
    base_url = f'https://opentopography.s3.sdsc.edu/raster/COP{dxm}/COP{dxm}_hh'
1✔
1578

1579
    flist = []
1✔
1580
    for lat in lats:
1✔
1581
        # north or south?
1582
        ns = 'S' if lat < 0 else 'N'
1✔
1583
        for lon in lons:
1✔
1584
            # east or west?
1585
            ew = 'W' if lon < 0 else 'E'
1✔
1586
            lat_str = '{}{:02.0f}'.format(ns, abs(lat))
1✔
1587
            lon_str = '{}{:03.0f}'.format(ew, abs(lon))
1✔
1588
            tilename = f'Copernicus_DSM_{asec}_{lat_str}_00_{lon_str}_00_DEM'
1✔
1589
            if (lat_str, lon_str) in tile_list:
1✔
1590
                out = (f'{base_url}/{tilename}.tif', tilename)
1✔
1591
            else:
NEW
1592
                out = (None, tilename)
×
1593
            flist.append(out)
1✔
1594

1595
    return flist
1✔
1596

1597

1598
def dem3_viewpano_zone(lon_ex, lat_ex):
9✔
1599
    """Returns a list of DEM3 zones covering the desired extent.
1600

1601
    http://viewfinderpanoramas.org/Coverage%20map%20viewfinderpanoramas_org3.htm
1602
    """
1603

1604
    for _f in DEM3REG.keys():
1✔
1605

1606
        if (np.min(lon_ex) >= DEM3REG[_f][0]) and \
1✔
1607
           (np.max(lon_ex) <= DEM3REG[_f][1]) and \
1608
           (np.min(lat_ex) >= DEM3REG[_f][2]) and \
1609
           (np.max(lat_ex) <= DEM3REG[_f][3]):
1610

1611
            # test some weird inset files in Antarctica
1612
            if (np.min(lon_ex) >= -91.) and (np.max(lon_ex) <= -90.) and \
1✔
1613
               (np.min(lat_ex) >= -72.) and (np.max(lat_ex) <= -68.):
1614
                return ['SR15']
×
1615

1616
            elif (np.min(lon_ex) >= -47.) and (np.max(lon_ex) <= -43.) and \
1✔
1617
                 (np.min(lat_ex) >= -61.) and (np.max(lat_ex) <= -60.):
1618
                return ['SP23']
×
1619

1620
            elif (np.min(lon_ex) >= 162.) and (np.max(lon_ex) <= 165.) and \
1✔
1621
                 (np.min(lat_ex) >= -68.) and (np.max(lat_ex) <= -66.):
1622
                return ['SQ58']
×
1623

1624
            # test some rogue Greenland tiles as well
1625
            elif (np.min(lon_ex) >= -72.) and (np.max(lon_ex) <= -66.) and \
1✔
1626
                 (np.min(lat_ex) >= 76.) and (np.max(lat_ex) <= 80.):
1627
                return ['T19']
×
1628

1629
            elif (np.min(lon_ex) >= -72.) and (np.max(lon_ex) <= -66.) and \
1✔
1630
                 (np.min(lat_ex) >= 80.) and (np.max(lat_ex) <= 83.):
1631
                return ['U19']
×
1632

1633
            elif (np.min(lon_ex) >= -66.) and (np.max(lon_ex) <= -60.) and \
1✔
1634
                 (np.min(lat_ex) >= 80.) and (np.max(lat_ex) <= 83.):
1635
                return ['U20']
×
1636

1637
            elif (np.min(lon_ex) >= -60.) and (np.max(lon_ex) <= -54.) and \
1✔
1638
                 (np.min(lat_ex) >= 80.) and (np.max(lat_ex) <= 83.):
1639
                return ['U21']
×
1640

1641
            elif (np.min(lon_ex) >= -54.) and (np.max(lon_ex) <= -48.) and \
1✔
1642
                 (np.min(lat_ex) >= 80.) and (np.max(lat_ex) <= 83.):
1643
                return ['U22']
×
1644

1645
            elif (np.min(lon_ex) >= -25.) and (np.max(lon_ex) <= -13.) and \
1✔
1646
                 (np.min(lat_ex) >= 63.) and (np.max(lat_ex) <= 67.):
1647
                return ['ISL']
1✔
1648

1649
            else:
1650
                return [_f]
1✔
1651

1652
    # if the tile doesn't have a special name, its name can be found like this:
1653
    # corrected SRTMs are sorted in tiles of 6 deg longitude and 4 deg latitude
1654
    srtm_x0 = -180.
1✔
1655
    srtm_y0 = 0.
1✔
1656
    srtm_dx = 6.
1✔
1657
    srtm_dy = 4.
1✔
1658

1659
    # quick n dirty solution to be sure that we will cover the whole range
1660
    mi, ma = np.min(lon_ex), np.max(lon_ex)
1✔
1661
    # TODO: Fabien, find out what Johannes wanted with this +3
1662
    # +3 is just for the number to become still a bit larger
1663
    # int() to avoid Deprec warning
1664
    lon_ex = np.linspace(mi, ma, int(np.ceil((ma - mi) / srtm_dy) + 3))
1✔
1665
    mi, ma = np.min(lat_ex), np.max(lat_ex)
1✔
1666
    # int() to avoid Deprec warning
1667
    lat_ex = np.linspace(mi, ma, int(np.ceil((ma - mi) / srtm_dx) + 3))
1✔
1668

1669
    zones = []
1✔
1670
    for lon in lon_ex:
1✔
1671
        for lat in lat_ex:
1✔
1672
            dx = lon - srtm_x0
1✔
1673
            dy = lat - srtm_y0
1✔
1674
            zx = np.ceil(dx / srtm_dx)
1✔
1675
            # convert number to letter
1676
            zy = chr(int(abs(dy / srtm_dy)) + ord('A'))
1✔
1677
            if lat >= 0:
1✔
1678
                zones.append('%s%02.0f' % (zy, zx))
1✔
1679
            else:
1680
                zones.append('S%s%02.0f' % (zy, zx))
×
1681
    return list(sorted(set(zones)))
1✔
1682

1683

1684
def aster_zone(lon_ex, lat_ex):
9✔
1685
    """Returns a list of ASTGTMV3 zones covering the desired extent.
1686

1687
    ASTER v3 tiles are 1 degree x 1 degree
1688
    N50 contains 50 to 50.9
1689
    E10 contains 10 to 10.9
1690
    S70 contains -69.99 to -69.0
1691
    W20 contains -19.99 to -19.0
1692
    """
1693

1694
    # adding small buffer for unlikely case where one lon/lat_ex == xx.0
1695
    lons = np.arange(np.floor(lon_ex[0]-1e-9), np.ceil(lon_ex[1]+1e-9))
1✔
1696
    lats = np.arange(np.floor(lat_ex[0]-1e-9), np.ceil(lat_ex[1]+1e-9))
1✔
1697

1698
    zones = []
1✔
1699
    for lat in lats:
1✔
1700
        # north or south?
1701
        ns = 'S' if lat < 0 else 'N'
1✔
1702
        for lon in lons:
1✔
1703
            # east or west?
1704
            ew = 'W' if lon < 0 else 'E'
1✔
1705
            filename = 'ASTGTMV003_{}{:02.0f}{}{:03.0f}'.format(ns, abs(lat),
1✔
1706
                                                                ew, abs(lon))
1707
            zones.append(filename)
1✔
1708
    return list(sorted(set(zones)))
1✔
1709

1710

1711
def nasadem_zone(lon_ex, lat_ex):
9✔
1712
    """Returns a list of NASADEM zones covering the desired extent.
1713

1714
    NASADEM tiles are 1 degree x 1 degree
1715
    N50 contains 50 to 50.9
1716
    E10 contains 10 to 10.9
1717
    S70 contains -69.99 to -69.0
1718
    W20 contains -19.99 to -19.0
1719
    """
1720

1721
    # adding small buffer for unlikely case where one lon/lat_ex == xx.0
1722
    lons = np.arange(np.floor(lon_ex[0]-1e-9), np.ceil(lon_ex[1]+1e-9))
1✔
1723
    lats = np.arange(np.floor(lat_ex[0]-1e-9), np.ceil(lat_ex[1]+1e-9))
1✔
1724

1725
    zones = []
1✔
1726
    for lat in lats:
1✔
1727
        # north or south?
1728
        ns = 's' if lat < 0 else 'n'
1✔
1729
        for lon in lons:
1✔
1730
            # east or west?
1731
            ew = 'w' if lon < 0 else 'e'
1✔
1732
            filename = '{}{:02.0f}{}{:03.0f}'.format(ns, abs(lat), ew,
1✔
1733
                                                     abs(lon))
1734
            zones.append(filename)
1✔
1735
    return list(sorted(set(zones)))
1✔
1736

1737

1738
def mapzen_zone(lon_ex, lat_ex, dx_meter=None, zoom=None):
9✔
1739
    """Returns a list of AWS mapzen zones covering the desired extent.
1740

1741
    For mapzen one has to specify the level of detail (zoom) one wants. The
1742
    best way in OGGM is to specify dx_meter of the underlying map and OGGM
1743
    will decide which zoom level works best.
1744
    """
1745

1746
    if dx_meter is None and zoom is None:
1✔
1747
        raise InvalidParamsError('Need either zoom level or dx_meter.')
×
1748

1749
    bottom, top = lat_ex
1✔
1750
    left, right = lon_ex
1✔
1751
    ybound = 85.0511
1✔
1752
    if bottom <= -ybound:
1✔
1753
        bottom = -ybound
1✔
1754
    if top <= -ybound:
1✔
1755
        top = -ybound
1✔
1756
    if bottom > ybound:
1✔
1757
        bottom = ybound
1✔
1758
    if top > ybound:
1✔
1759
        top = ybound
1✔
1760
    if right >= 180:
1✔
1761
        right = 179.999
1✔
1762
    if left >= 180:
1✔
1763
        left = 179.999
1✔
1764

1765
    if dx_meter:
1✔
1766
        # Find out the zoom so that we are close to the desired accuracy
1767
        lat = np.max(np.abs([bottom, top]))
1✔
1768
        zoom = int(np.ceil(math.log2((math.cos(lat * math.pi / 180) *
1✔
1769
                                      2 * math.pi * WEB_EARTH_RADUIS) /
1770
                                     (WEB_N_PIX * dx_meter))))
1771

1772
        # According to this we should just always stay above 10 (sorry)
1773
        # https://github.com/tilezen/joerd/blob/master/docs/data-sources.md
1774
        zoom = 10 if zoom < 10 else zoom
1✔
1775

1776
    # Code from planetutils
1777
    size = 2 ** zoom
1✔
1778
    xt = lambda x: int((x + 180.0) / 360.0 * size)
1✔
1779
    yt = lambda y: int((1.0 - math.log(math.tan(math.radians(y)) +
1✔
1780
                                       (1 / math.cos(math.radians(y))))
1781
                        / math.pi) / 2.0 * size)
1782
    tiles = []
1✔
1783
    for x in range(xt(left), xt(right) + 1):
1✔
1784
        for y in range(yt(top), yt(bottom) + 1):
1✔
1785
            tiles.append('/'.join(map(str, [zoom, x, str(y) + '.tif'])))
1✔
1786
    return tiles
1✔
1787

1788

1789
def get_demo_file(fname):
9✔
1790
    """Returns the path to the desired OGGM-sample-file.
1791

1792
    If Sample data is not cached it will be downloaded from
1793
    https://github.com/OGGM/oggm-sample-data
1794

1795
    Parameters
1796
    ----------
1797
    fname : str
1798
        Filename of the desired OGGM-sample-file
1799

1800
    Returns
1801
    -------
1802
    str
1803
        Absolute path to the desired file.
1804
    """
1805

1806
    d = download_oggm_files()
7✔
1807
    if fname in d:
7✔
1808
        return d[fname]
7✔
1809
    else:
1810
        return None
1✔
1811

1812

1813
def get_wgms_files():
9✔
1814
    """Get the path to the default WGMS-RGI link file and the data dir.
1815

1816
    Returns
1817
    -------
1818
    (file, dir) : paths to the files
1819
    """
1820

1821
    download_oggm_files()
4✔
1822
    sdir = os.path.join(cfg.CACHE_DIR,
4✔
1823
                        'oggm-sample-data-%s' % SAMPLE_DATA_COMMIT,
1824
                        'wgms')
1825
    datadir = os.path.join(sdir, 'mbdata')
4✔
1826
    assert os.path.exists(datadir)
4✔
1827

1828
    outf = os.path.join(sdir, 'rgi_wgms_links_20220112.csv')
4✔
1829
    outf = pd.read_csv(outf, dtype={'RGI_REG': object})
4✔
1830

1831
    return outf, datadir
4✔
1832

1833

1834
def get_glathida_file():
9✔
1835
    """Get the path to the default GlaThiDa-RGI link file.
1836

1837
    Returns
1838
    -------
1839
    file : paths to the file
1840
    """
1841

1842
    # Roll our own
1843
    download_oggm_files()
1✔
1844
    sdir = os.path.join(cfg.CACHE_DIR,
1✔
1845
                        'oggm-sample-data-%s' % SAMPLE_DATA_COMMIT,
1846
                        'glathida')
1847
    outf = os.path.join(sdir, 'rgi_glathida_links.csv')
1✔
1848
    assert os.path.exists(outf)
1✔
1849
    return outf
1✔
1850

1851

1852
def get_rgi_dir(version=None, reset=False):
9✔
1853
    """Path to the RGI directory.
1854

1855
    If the RGI files are not present, download them.
1856

1857
    Parameters
1858
    ----------
1859
    version : str
1860
        '5', '6', '62', '70G', '70C'
1861
        defaults to None (linking to the one specified in cfg.PARAMS)
1862
    reset : bool
1863
        If True, deletes the RGI directory first and downloads the data
1864

1865
    Returns
1866
    -------
1867
    str
1868
        path to the RGI directory
1869
    """
1870

1871
    with get_lock():
1✔
1872
        return _get_rgi_dir_unlocked(version=version, reset=reset)
1✔
1873

1874

1875
def _get_rgi_dir_unlocked(version=None, reset=False):
9✔
1876

1877
    rgi_dir = cfg.PATHS['rgi_dir']
1✔
1878
    if version is None:
1✔
1879
        version = cfg.PARAMS['rgi_version']
×
1880

1881
    if len(version) == 1:
1✔
1882
        version += '0'
1✔
1883

1884
    # Be sure the user gave a sensible path to the RGI dir
1885
    if not rgi_dir:
1✔
1886
        raise InvalidParamsError('The RGI data directory has to be'
×
1887
                                 'specified explicitly.')
1888
    rgi_dir = os.path.abspath(os.path.expanduser(rgi_dir))
1✔
1889
    rgi_dir = os.path.join(rgi_dir, 'RGIV' + version)
1✔
1890
    mkdir(rgi_dir, reset=reset)
1✔
1891

1892
    pattern = '*_rgi{}_*.zip'.format(version)
1✔
1893
    test_file = os.path.join(rgi_dir, f'*_rgi*{version}_manifest.txt')
1✔
1894

1895
    if version == '50':
1✔
1896
        dfile = 'http://www.glims.org/RGI/rgi50_files/rgi50.zip'
1✔
1897
    elif version == '60':
1✔
1898
        dfile = 'http://www.glims.org/RGI/rgi60_files/00_rgi60.zip'
1✔
1899
    elif version == '61':
×
1900
        dfile = 'https://cluster.klima.uni-bremen.de/data/rgi/rgi_61.zip'
×
1901
    elif version == '62':
×
1902
        dfile = 'https://cluster.klima.uni-bremen.de/~oggm/rgi/rgi62.zip'
×
1903
    elif version == '70G':
×
1904
        pattern = 'RGI2000-*.zip'
×
1905
        test_file = os.path.join(rgi_dir, 'RGI2000-v7.0-G-01_alaska', 'README.md')
×
1906
        dfile = 'https://daacdata.apps.nsidc.org/pub/DATASETS/nsidc0770_rgi_v7/'
×
1907
        dfile += 'global_files/RGI2000-v7.0-G-global.zip'
×
1908
    elif version == '70C':
×
1909
        pattern = 'RGI2000-*.zip'
×
1910
        test_file = os.path.join(rgi_dir, 'RGI2000-v7.0-C-01_alaska', 'README.md')
×
1911
        dfile = 'https://daacdata.apps.nsidc.org/pub/DATASETS/nsidc0770_rgi_v7/'
×
1912
        dfile += 'global_files/RGI2000-v7.0-C-global.zip'
×
1913
    else:
1914
        raise InvalidParamsError(f'RGI version not valid: {version}')
×
1915

1916
    if len(glob.glob(test_file)) == 0:
1✔
1917
        # if not there download it
1918
        ofile = file_downloader(dfile, reset=reset)
1✔
1919
        if ofile is None:
1✔
1920
            raise RuntimeError(f'Could not download RGI file: {dfile}')
×
1921
        # Extract root
1922
        try:
1✔
1923
            with zipfile.ZipFile(ofile) as zf:
1✔
1924
                zf.extractall(rgi_dir)
1✔
1925
        except zipfile.BadZipFile:
×
1926
            raise zipfile.BadZipFile(f'RGI file BadZipFile error: {ofile}')
×
1927
        # Extract subdirs
1928
        for root, dirs, files in os.walk(cfg.PATHS['rgi_dir']):
1✔
1929
            for filename in fnmatch.filter(files, pattern):
1✔
1930
                zfile = os.path.join(root, filename)
1✔
1931
                with zipfile.ZipFile(zfile) as zf:
1✔
1932
                    ex_root = zfile.replace('.zip', '')
1✔
1933
                    mkdir(ex_root)
1✔
1934
                    zf.extractall(ex_root)
1✔
1935
                # delete the zipfile after success
1936
                os.remove(zfile)
1✔
1937
        if len(glob.glob(test_file)) == 0:
1✔
1938
            raise RuntimeError('Could not find a readme file in the RGI '
×
1939
                               'directory: ' + rgi_dir)
1940
    return rgi_dir
1✔
1941

1942

1943
def get_rgi_region_file(region, version=None, reset=False):
9✔
1944
    """Path to the RGI region file.
1945

1946
    If the RGI files are not present, download them.
1947

1948
    Parameters
1949
    ----------
1950
    region : str
1951
        from '01' to '19'
1952
    version : str
1953
        '62', '70G', '70C', defaults to None (linking to the one specified in cfg.PARAMS)
1954
    reset : bool
1955
        If True, deletes the RGI directory first and downloads the data
1956

1957
    Returns
1958
    -------
1959
    str
1960
        path to the RGI shapefile
1961
    """
1962

1963
    rgi_dir = get_rgi_dir(version=version, reset=reset)
1✔
1964
    if version in ['70G', '70C']:
1✔
1965
        f = list(glob.glob(rgi_dir + f"/*/*-{region}_*.shp"))
×
1966
    else:
1967
        f = list(glob.glob(rgi_dir + "/*/*{}_*.shp".format(region)))
1✔
1968
    assert len(f) == 1
1✔
1969
    return f[0]
1✔
1970

1971

1972
def get_rgi_glacier_entities(rgi_ids, version=None):
9✔
1973
    """Get a list of glacier outlines selected from their RGI IDs.
1974

1975
    Will download RGI data if not present.
1976

1977
    Parameters
1978
    ----------
1979
    rgi_ids : list of str
1980
        the glaciers you want the outlines for
1981
    version : str
1982
        the rgi version ('62', '70G', '70C')
1983

1984
    Returns
1985
    -------
1986
    geopandas.GeoDataFrame
1987
        containing the desired RGI glacier outlines
1988
    """
1989

1990
    if version is None:
×
1991
        if len(rgi_ids[0]) == 14:
×
1992
            # RGI6
1993
            version = rgi_ids[0].split('-')[0][-2:]
×
1994
        else:
1995
            # RGI7 RGI2000-v7.0-G-02-00003
1996
            assert rgi_ids[0].split('-')[1] == 'v7.0'
×
1997
            version = '70' + rgi_ids[0].split('-')[2]
×
1998

1999
    if version in ['70G', '70C']:
×
2000
        regions = [s.split('-')[-2] for s in rgi_ids]
×
2001
    else:
2002
        regions = [s.split('-')[1].split('.')[0] for s in rgi_ids]
×
2003

2004
    selection = []
×
2005
    for reg in sorted(np.unique(regions)):
×
2006
        sh = gpd.read_file(get_rgi_region_file(reg, version=version))
×
2007
        try:
×
2008
            selection.append(sh.loc[sh.RGIId.isin(rgi_ids)])
×
2009
        except AttributeError:
×
2010
            selection.append(sh.loc[sh.rgi_id.isin(rgi_ids)])
×
2011

2012
    # Make a new dataframe of those
2013
    selection = pd.concat(selection)
×
2014
    selection.set_crs(crs=sh.crs, inplace=True, allow_override=True)  # for geolocalisation
×
2015
    if len(selection) != len(rgi_ids):
×
2016
        raise RuntimeError('Could not find all RGI ids')
×
2017

2018
    return selection
×
2019

2020

2021
def get_rgi_intersects_dir(version=None, reset=False):
9✔
2022
    """Path to the RGI directory containing the intersect files.
2023

2024
    If the files are not present, download them.
2025

2026
    Parameters
2027
    ----------
2028
    version : str
2029
        '5', '6', '70G', defaults to None (linking to the one specified in cfg.PARAMS)
2030
    reset : bool
2031
        If True, deletes the intersects before redownloading them
2032

2033
    Returns
2034
    -------
2035
    str
2036
        path to the directory
2037
    """
2038

2039
    with get_lock():
1✔
2040
        return _get_rgi_intersects_dir_unlocked(version=version, reset=reset)
1✔
2041

2042

2043
def _get_rgi_intersects_dir_unlocked(version=None, reset=False):
9✔
2044

2045
    rgi_dir = cfg.PATHS['rgi_dir']
1✔
2046
    if version is None:
1✔
2047
        version = cfg.PARAMS['rgi_version']
×
2048

2049
    if len(version) == 1:
1✔
2050
        version += '0'
1✔
2051

2052
    # Be sure the user gave a sensible path to the RGI dir
2053
    if not rgi_dir:
1✔
2054
        raise InvalidParamsError('The RGI data directory has to be'
×
2055
                                 'specified explicitly.')
2056

2057
    rgi_dir = os.path.abspath(os.path.expanduser(rgi_dir))
1✔
2058
    mkdir(rgi_dir)
1✔
2059

2060
    dfile = 'https://cluster.klima.uni-bremen.de/data/rgi/'
1✔
2061
    dfile += 'RGI_V{}_Intersects.zip'.format(version)
1✔
2062
    if version == '62':
1✔
2063
        dfile = ('https://cluster.klima.uni-bremen.de/~oggm/rgi/'
×
2064
                 'rgi62_Intersects.zip')
2065
    if version == '70G':
1✔
2066
        dfile = 'https://daacdata.apps.nsidc.org/pub/DATASETS/nsidc0770_rgi_v7/'
×
2067
        dfile += 'global_files/RGI2000-v7.0-I-global.zip'
×
2068

2069
    odir = os.path.join(rgi_dir, 'RGI_V' + version + '_Intersects')
1✔
2070
    if reset and os.path.exists(odir):
1✔
2071
        shutil.rmtree(odir)
×
2072

2073
    # A lot of code for backwards compat (sigh...)
2074
    if version in ['50', '60']:
1✔
2075
        test_file = os.path.join(odir, 'Intersects_OGGM_Manifest.txt')
1✔
2076
        if not os.path.exists(test_file):
1✔
2077
            # if not there download it
2078
            ofile = file_downloader(dfile, reset=reset)
1✔
2079
            # Extract root
2080
            with zipfile.ZipFile(ofile) as zf:
1✔
2081
                zf.extractall(odir)
1✔
2082
            if not os.path.exists(test_file):
1✔
2083
                raise RuntimeError('Could not find a manifest file in the RGI '
×
2084
                                   'directory: ' + odir)
2085
    elif version == '62':
×
2086
        test_file = os.path.join(odir, '*ntersect*anifest.txt')
×
2087
        if len(glob.glob(test_file)) == 0:
×
2088
            # if not there download it
2089
            ofile = file_downloader(dfile, reset=reset)
×
2090
            # Extract root
2091
            with zipfile.ZipFile(ofile) as zf:
×
2092
                zf.extractall(odir)
×
2093
            # Extract subdirs
2094
            pattern = '*_rgi{}_*.zip'.format(version)
×
2095
            for root, dirs, files in os.walk(cfg.PATHS['rgi_dir']):
×
2096
                for filename in fnmatch.filter(files, pattern):
×
2097
                    zfile = os.path.join(root, filename)
×
2098
                    with zipfile.ZipFile(zfile) as zf:
×
2099
                        ex_root = zfile.replace('.zip', '')
×
2100
                        mkdir(ex_root)
×
2101
                        zf.extractall(ex_root)
×
2102
                    # delete the zipfile after success
2103
                    os.remove(zfile)
×
2104
            if len(glob.glob(test_file)) == 0:
×
2105
                raise RuntimeError('Could not find a manifest file in the RGI '
×
2106
                                   'directory: ' + odir)
2107
    elif version == '70G':
×
2108
        test_file = os.path.join(odir, 'README.md')
×
2109
        if len(glob.glob(test_file)) == 0:
×
2110
            # if not there download it
2111
            ofile = file_downloader(dfile, reset=reset)
×
2112
            # Extract root
2113
            with zipfile.ZipFile(ofile) as zf:
×
2114
                zf.extractall(odir)
×
2115
            # Extract subdirs
2116
            pattern = 'RGI2000-*.zip'
×
2117
            for root, dirs, files in os.walk(cfg.PATHS['rgi_dir']):
×
2118
                for filename in fnmatch.filter(files, pattern):
×
2119
                    zfile = os.path.join(root, filename)
×
2120
                    with zipfile.ZipFile(zfile) as zf:
×
2121
                        ex_root = zfile.replace('.zip', '')
×
2122
                        mkdir(ex_root)
×
2123
                        zf.extractall(ex_root)
×
2124
                    # delete the zipfile after success
2125
                    os.remove(zfile)
×
2126
            if len(glob.glob(test_file)) == 0:
×
2127
                raise RuntimeError('Could not find a README file in the RGI intersects'
×
2128
                                   'directory: ' + odir)
2129

2130
    return odir
1✔
2131

2132

2133
def get_rgi_intersects_region_file(region=None, version=None, reset=False):
9✔
2134
    """Path to the RGI regional intersect file.
2135

2136
    If the RGI files are not present, download them.
2137

2138
    Parameters
2139
    ----------
2140
    region : str
2141
        from '00' to '19', with '00' being the global file (deprecated).
2142
        From RGI version '61' onwards, please use `get_rgi_intersects_entities`
2143
        with a list of glaciers instead of relying to the global file.
2144
    version : str
2145
        '5', '6', '61', '70G'. Defaults the one specified in cfg.PARAMS
2146
    reset : bool
2147
        If True, deletes the intersect file before redownloading it
2148

2149
    Returns
2150
    -------
2151
    str
2152
        path to the RGI intersects shapefile
2153
    """
2154

2155
    if version is None:
1✔
2156
        version = cfg.PARAMS['rgi_version']
×
2157
    if len(version) == 1:
1✔
2158
        version += '0'
1✔
2159

2160
    rgi_dir = get_rgi_intersects_dir(version=version, reset=reset)
1✔
2161

2162
    if region == '00':
1✔
2163
        if version in ['50', '60']:
1✔
2164
            version = 'AllRegs'
1✔
2165
            region = '*'
1✔
2166
        else:
2167
            raise InvalidParamsError("From RGI version 61 onwards, please use "
×
2168
                                     "get_rgi_intersects_entities() instead.")
2169

2170
    if version == '70G':
1✔
2171
        f = list(glob.glob(os.path.join(rgi_dir, "*", f'*-{region}_*.shp')))
×
2172
    else:
2173
        f = list(glob.glob(os.path.join(rgi_dir, "*", '*intersects*' + region +
1✔
2174
                                        '_rgi*' + version + '*.shp')))
2175
    assert len(f) == 1
1✔
2176
    return f[0]
1✔
2177

2178

2179
def get_rgi_intersects_entities(rgi_ids, version=None):
9✔
2180
    """Get a list of glacier intersects selected from their RGI IDs.
2181

2182
    Parameters
2183
    ----------
2184
    rgi_ids: list of str
2185
        list of rgi_ids you want to look for intersections for
2186
    version: str
2187
        '5', '6', '61', '70G'. Defaults the one specified in cfg.PARAMS
2188

2189
    Returns
2190
    -------
2191
    geopandas.GeoDataFrame
2192
        with the selected intersects
2193
    """
2194

2195
    if version is None:
×
2196
        version = cfg.PARAMS['rgi_version']
×
2197

2198
    if len(version) == 1:
×
2199
        version += '0'
×
2200

2201
    # RGI V6 or 7
2202
    if version == '70G':
×
2203
        regions = [s.split('-')[-2] for s in rgi_ids]
×
2204
    else:
2205
        try:
×
2206
            regions = [s.split('-')[3] for s in rgi_ids]
×
2207
        except IndexError:
×
2208
            regions = [s.split('-')[1].split('.')[0] for s in rgi_ids]
×
2209

2210
    selection = []
×
2211
    for reg in sorted(np.unique(regions)):
×
2212
        sh = gpd.read_file(get_rgi_intersects_region_file(reg, version=version))
×
2213
        if version == '70G':
×
2214
            selection.append(sh.loc[sh.rgi_g_id_1.isin(rgi_ids) |
×
2215
                                    sh.rgi_g_id_2.isin(rgi_ids)])
2216
        else:
2217
            selection.append(sh.loc[sh.RGIId_1.isin(rgi_ids) |
×
2218
                                    sh.RGIId_2.isin(rgi_ids)])
2219

2220
    # Make a new dataframe of those
2221
    selection = pd.concat(selection)
×
2222
    selection.set_crs(crs=sh.crs, inplace=True, allow_override=True)  # for geolocalisation
×
2223

2224
    return selection
×
2225

2226

2227
def get_rgi7c_to_g_links(region, version='70C', reset=False):
9✔
2228
    """Path to the RGI7 glacier complex to glacier json file.
2229

2230
    This is for version 7C only!
2231

2232
    Parameters
2233
    ----------
2234
    region : str
2235
        from '01' to '19'
2236
    version : str
2237
        '70C', defaults to 70C
2238
    reset : bool
2239
        If True, deletes the RGI directory first and downloads the data
2240

2241
    Returns
2242
    -------
2243
    a dictionary containing the links
2244
    """
2245
    jfile = get_rgi_region_file(region, version=version, reset=reset)
×
2246
    jfile = jfile.replace('.shp', '-CtoG_links.json')
×
2247
    with open(jfile) as f:
×
2248
        out = json.load(f)
×
2249
    return out
×
2250

2251

2252
def is_dem_source_available(source, lon_ex, lat_ex):
9✔
2253
    """Checks if a DEM source is available for your purpose.
2254

2255
    This is only a very rough check! It doesn't mean that the data really is
2256
    available, but at least it's worth a try.
2257

2258
    Parameters
2259
    ----------
2260
    source : str, required
2261
        the source you want to check for
2262
    lon_ex : tuple or int, required
2263
        a (min_lon, max_lon) tuple delimiting the requested area longitudes
2264
    lat_ex : tuple or int, required
2265
        a (min_lat, max_lat) tuple delimiting the requested area latitudes
2266

2267
    Returns
2268
    -------
2269
    True or False
2270
    """
2271
    from oggm.utils import tolist
7✔
2272
    lon_ex = tolist(lon_ex, length=2)
7✔
2273
    lat_ex = tolist(lat_ex, length=2)
7✔
2274

2275
    def _in_grid(grid_json, lon, lat):
7✔
2276
        i, j = cfg.DATA['dem_grids'][grid_json].transform(lon, lat,
1✔
2277
                                                          maskout=True)
2278
        return np.all(~ (i.mask | j.mask))
1✔
2279

2280
    if source == 'GIMP':
7✔
2281
        return _in_grid('gimpdem_90m_v01.1.json', lon_ex, lat_ex)
1✔
2282
    elif source == 'ARCTICDEM':
7✔
2283
        return _in_grid('arcticdem_mosaic_100m_v3.0.json', lon_ex, lat_ex)
1✔
2284
    elif source == 'RAMP':
7✔
2285
        return _in_grid('AntarcticDEM_wgs84.json', lon_ex, lat_ex)
1✔
2286
    elif source == 'REMA':
7✔
2287
        return _in_grid('REMA_100m_dem.json', lon_ex, lat_ex)
1✔
2288
    elif source == 'ALASKA':
7✔
2289
        return _in_grid('Alaska_albers_V3.json', lon_ex, lat_ex)
×
2290
    elif source == 'TANDEM':
7✔
2291
        return True
1✔
2292
    elif source == 'AW3D30':
7✔
2293
        return np.min(lat_ex) > -60
1✔
2294
    elif source == 'MAPZEN':
7✔
2295
        return True
1✔
2296
    elif source == 'DEM3':
7✔
2297
        return True
1✔
2298
    elif source == 'ASTER':
7✔
2299
        return True
1✔
2300
    elif source == 'SRTM':
7✔
2301
        return np.max(np.abs(lat_ex)) < 60
1✔
2302
    elif source in ['COPDEM30', 'COPDEM90']:
7✔
2303
        return True
1✔
2304
    elif source == 'NASADEM':
7✔
2305
        return (np.min(lat_ex) > -56) and (np.max(lat_ex) < 60)
×
2306
    elif source == 'USER':
7✔
2307
        return True
×
2308
    elif source is None:
7✔
2309
        return True
7✔
2310

2311

2312
def default_dem_source(rgi_id):
9✔
2313
    """Current default DEM source at a given location.
2314

2315
    Parameters
2316
    ----------
2317
    rgi_id : str
2318
        the RGI id
2319

2320
    Returns
2321
    -------
2322
    the chosen DEM source
2323
    """
2324
    rgi_reg = 'RGI{}'.format(rgi_id[6:8])
1✔
2325
    rgi_id = rgi_id[:14]
1✔
2326
    if cfg.DEM_SOURCE_TABLE.get(rgi_reg) is None:
1✔
2327
        fp = get_demo_file('rgi62_dem_frac.h5')
1✔
2328
        cfg.DEM_SOURCE_TABLE[rgi_reg] = pd.read_hdf(fp)
1✔
2329

2330
    sel = cfg.DEM_SOURCE_TABLE[rgi_reg].loc[rgi_id]
1✔
2331
    for s in ['NASADEM', 'COPDEM90', 'COPDEM30', 'GIMP', 'REMA',
1✔
2332
              'RAMP', 'TANDEM', 'MAPZEN']:
2333
        if sel.loc[s] > 0.75:
1✔
2334
            return s
1✔
2335
    # If nothing works, try COPDEM again
2336
    return 'COPDEM90'
×
2337

2338

2339
def get_topo_file(lon_ex=None, lat_ex=None, gdir=None, *,
9✔
2340
                  dx_meter=None, zoom=None, source=None):
2341
    """Path(s) to the DEM file(s) covering the desired extent.
2342

2343
    If the needed files for covering the extent are not present, download them.
2344

2345
    The default behavior is to try a list of DEM sources in order, and
2346
    stop once the downloaded data is covering a large enough part of the
2347
    glacier. The DEM sources are tested in the following order:
2348

2349
    'NASADEM' -> 'COPDEM' -> 'GIMP' -> 'REMA' -> 'TANDEM' -> 'MAPZEN'
2350

2351
    To force usage of a certain data source, use the ``source`` kwarg argument.
2352

2353
    Parameters
2354
    ----------
2355
    lon_ex : tuple or int, required
2356
        a (min_lon, max_lon) tuple delimiting the requested area longitudes
2357
    lat_ex : tuple or int, required
2358
        a (min_lat, max_lat) tuple delimiting the requested area latitudes
2359
    gdir : GlacierDirectory, required if source=None
2360
        the glacier id, used to decide on the DEM source
2361
    rgi_region : str, optional
2362
        the RGI region number (required for the GIMP DEM)
2363
    rgi_subregion : str, optional
2364
        the RGI subregion str (useful for RGI Reg 19)
2365
    dx_meter : float, required for source='MAPZEN'
2366
        the resolution of the glacier map (to decide the zoom level of mapzen)
2367
    zoom : int, optional
2368
        if you know the zoom already (for MAPZEN only)
2369
    source : str or list of str, optional
2370
        Name of specific DEM source. see utils.DEM_SOURCES for a list
2371

2372
    Returns
2373
    -------
2374
    tuple: (list with path(s) to the DEM file(s), data source str)
2375
    """
2376
    from oggm.utils import tolist
7✔
2377
    lon_ex = tolist(lon_ex, length=2)
7✔
2378
    lat_ex = tolist(lat_ex, length=2)
7✔
2379

2380
    if source is not None and not isinstance(source, str):
7✔
2381
        # check all user options
2382
        for s in source:
×
2383
            demf, source_str = get_topo_file(lon_ex=lon_ex, lat_ex=lat_ex,
×
2384
                                             gdir=gdir, source=s)
2385
            if demf[0]:
×
2386
                return demf, source_str
×
2387

2388
    # Did the user specify a specific DEM file?
2389
    if 'dem_file' in cfg.PATHS and os.path.isfile(cfg.PATHS['dem_file']):
7✔
2390
        source = 'USER' if source is None else source
7✔
2391
        if source == 'USER':
7✔
2392
            return [cfg.PATHS['dem_file']], source
7✔
2393

2394
    # Some logic to decide which source to take if unspecified
2395
    if source is None:
1✔
2396
        if gdir is None:
×
2397
            raise InvalidParamsError('gdir is needed if source=None')
×
2398
        source = getattr(gdir, 'rgi_dem_source')
×
2399
        if source is None:
×
2400
            source = default_dem_source(gdir.rgi_id)
×
2401

2402
    if source not in DEM_SOURCES:
1✔
2403
        raise InvalidParamsError('`source` must be one of '
×
2404
                                 '{}'.format(DEM_SOURCES))
2405

2406
    # OK go
2407
    files = []
1✔
2408
    if source == 'GIMP':
1✔
2409
        _file = _download_topo_file_from_cluster('gimpdem_90m_v01.1.tif')
1✔
2410
        files.append(_file)
1✔
2411

2412
    if source == 'ARCTICDEM':
1✔
2413
        zones = arcticdem_zone(lon_ex, lat_ex)
1✔
2414
        for z in zones:
1✔
2415
            with get_lock():
1✔
2416
                url = 'https://cluster.klima.uni-bremen.de/~oggm/'
1✔
2417
                url += 'dem/ArcticDEM_100m_v3.0/'
1✔
2418
                url += '{}_100m_v3.0/{}_100m_v3.0_reg_dem.tif'.format(z, z)
1✔
2419
                files.append(file_downloader(url))
1✔
2420

2421
    if source == 'RAMP':
1✔
2422
        _file = _download_topo_file_from_cluster('AntarcticDEM_wgs84.tif')
1✔
2423
        files.append(_file)
1✔
2424

2425
    if source == 'ALASKA':
1✔
2426
        zones = alaska_dem_zone(lon_ex, lat_ex)
1✔
2427
        for z in zones:
1✔
2428
            with get_lock():
1✔
2429
                url = 'https://cluster.klima.uni-bremen.de/~oggm/'
1✔
2430
                url += 'dem/Alaska_albers_V3/'
1✔
2431
                url += '{}_Alaska_albers_V3/'.format(z)
1✔
2432
                url += '{}_Alaska_albers_V3.tif'.format(z)
1✔
2433
                files.append(file_downloader(url))
1✔
2434

2435
    if source == 'REMA':
1✔
2436
        zones = rema_zone(lon_ex, lat_ex)
1✔
2437
        for z in zones:
1✔
2438
            with get_lock():
1✔
2439
                url = 'https://cluster.klima.uni-bremen.de/~oggm/'
1✔
2440
                url += 'dem/REMA_100m_v1.1/'
1✔
2441
                url += '{}_100m_v1.1/{}_100m_v1.1_reg_dem.tif'.format(z, z)
1✔
2442
                files.append(file_downloader(url))
1✔
2443

2444
    if source == 'TANDEM':
1✔
2445
        zones = tandem_zone(lon_ex, lat_ex)
1✔
2446
        for z in zones:
1✔
2447
            files.append(_download_tandem_file(z))
1✔
2448

2449
    if source == 'AW3D30':
1✔
2450
        zones = aw3d30_zone(lon_ex, lat_ex)
1✔
2451
        for z in zones:
1✔
2452
            files.append(_download_aw3d30_file(z))
1✔
2453

2454
    if source == 'MAPZEN':
1✔
2455
        zones = mapzen_zone(lon_ex, lat_ex, dx_meter=dx_meter, zoom=zoom)
1✔
2456
        for z in zones:
1✔
2457
            files.append(_download_mapzen_file(z))
1✔
2458

2459
    if source == 'ASTER':
1✔
2460
        zones = aster_zone(lon_ex, lat_ex)
1✔
2461
        for z in zones:
1✔
2462
            files.append(_download_aster_file(z))
1✔
2463

2464
    if source == 'DEM3':
1✔
2465
        zones = dem3_viewpano_zone(lon_ex, lat_ex)
1✔
2466
        for z in zones:
1✔
2467
            files.append(_download_dem3_viewpano(z))
1✔
2468

2469
    if source == 'SRTM':
1✔
2470
        zones = srtm_zone(lon_ex, lat_ex)
1✔
2471
        for z in zones:
1✔
2472
            files.append(_download_srtm_file(z))
1✔
2473

2474
    if source in ['COPDEM30', 'COPDEM90']:
1✔
2475
        tilenames = copdem_zone(lon_ex, lat_ex, source)
×
NEW
2476
        for tile_url, _ in tilenames:
×
NEW
2477
            files.append(_download_copdem_file(tile_url))
×
2478

2479
    if source == 'NASADEM':
1✔
2480
        zones = nasadem_zone(lon_ex, lat_ex)
1✔
2481
        for z in zones:
1✔
2482
            files.append(_download_nasadem_file(z))
1✔
2483

2484
    # filter for None (e.g. oceans)
2485
    files = [s for s in files if s]
1✔
2486
    if files:
1✔
2487
        return files, source
1✔
2488
    else:
2489
        raise InvalidDEMError('Source: {2} no topography file available for '
×
2490
                              'extent lat:{0}, lon:{1}!'.
2491
                              format(lat_ex, lon_ex, source))
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc