• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OGGM / oggm / 4510759150

pending completion
4510759150

Pull #1549

github

GitHub
Merge f8c800840 into 71e58edf5
Pull Request #1549: Update test image

11167 of 13069 relevant lines covered (85.45%)

3.56 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

77.86
/oggm/utils/_downloads.py
1
"""Automated data download and IO."""
2

3
# Builtins
4
import glob
9✔
5
import os
9✔
6
import gzip
9✔
7
import bz2
9✔
8
import hashlib
9✔
9
import shutil
9✔
10
import zipfile
9✔
11
import sys
9✔
12
import math
9✔
13
import logging
9✔
14
from functools import partial, wraps
9✔
15
import time
9✔
16
import fnmatch
9✔
17
import urllib.request
9✔
18
import urllib.error
9✔
19
from urllib.parse import urlparse
9✔
20
import socket
9✔
21
import multiprocessing
9✔
22
from netrc import netrc
9✔
23
import ftplib
9✔
24
import ssl
9✔
25
import tarfile
9✔
26

27
# External libs
28
import pandas as pd
9✔
29
import numpy as np
9✔
30
import shapely.geometry as shpg
9✔
31
import requests
9✔
32

33
# Optional libs
34
try:
9✔
35
    import geopandas as gpd
9✔
36
except ImportError:
37
    pass
38
try:
9✔
39
    import salem
9✔
40
    from salem import wgs84
9✔
41
except ImportError:
42
    pass
43
try:
9✔
44
    import rasterio
9✔
45
    try:
8✔
46
        # rasterio V > 1.0
47
        from rasterio.merge import merge as merge_tool
8✔
48
    except ImportError:
49
        from rasterio.tools.merge import merge as merge_tool
50
except ImportError:
51
    pass
52
try:
9✔
53
    ModuleNotFoundError
9✔
54
except NameError:
×
55
    ModuleNotFoundError = ImportError
×
56

57
# Locals
58
import oggm.cfg as cfg
9✔
59
from oggm.exceptions import (InvalidParamsError, NoInternetException,
9✔
60
                             DownloadVerificationFailedException,
61
                             DownloadCredentialsMissingException,
62
                             HttpDownloadError, HttpContentTooShortError,
63
                             InvalidDEMError, FTPSDownloadError)
64

65
# Module logger
66
logger = logging.getLogger('.'.join(__name__.split('.')[:-1]))
9✔
67

68
# Github repository and commit hash/branch name/tag name on that repository
69
# The given commit will be downloaded from github and used as source for
70
# all sample data
71
SAMPLE_DATA_GH_REPO = 'OGGM/oggm-sample-data'
9✔
72
SAMPLE_DATA_COMMIT = '0fa95d0e65eb9f5ab2c355a6ea0c8812c0a81cd0'
9✔
73

74
CHECKSUM_URL = 'https://cluster.klima.uni-bremen.de/data/downloads.sha256.hdf'
9✔
75
CHECKSUM_VALIDATION_URL = CHECKSUM_URL + '.sha256'
9✔
76
CHECKSUM_LIFETIME = 24 * 60 * 60
9✔
77

78
# Web mercator proj constants
79
WEB_N_PIX = 256
9✔
80
WEB_EARTH_RADUIS = 6378137.
9✔
81

82
DEM_SOURCES = ['GIMP', 'ARCTICDEM', 'RAMP', 'TANDEM', 'AW3D30', 'MAPZEN', 'DEM3',
9✔
83
               'ASTER', 'SRTM', 'REMA', 'ALASKA', 'COPDEM30', 'COPDEM90', 'NASADEM']
84
DEM_SOURCES_PER_GLACIER = None
9✔
85

86
_RGI_METADATA = dict()
9✔
87

88
DEM3REG = {
9✔
89
    'ISL': [-25., -13., 63., 67.],  # Iceland
90
    'SVALBARD': [9., 35.99, 75., 84.],
91
    'JANMAYEN': [-10., -7., 70., 72.],
92
    'FJ': [36., 68., 79., 90.],  # Franz Josef Land
93
    'FAR': [-8., -6., 61., 63.],  # Faroer
94
    'BEAR': [18., 20., 74., 75.],  # Bear Island
95
    'SHL': [-3., 0., 60., 61.],  # Shetland
96
    # Antarctica tiles as UTM zones, large files
97
    '01-15': [-180., -91., -90, -60.],
98
    '16-30': [-91., -1., -90., -60.],
99
    '31-45': [-1., 89., -90., -60.],
100
    '46-60': [89., 189., -90., -60.],
101
    # Greenland tiles
102
    'GL-North': [-72., -11., 76., 84.],
103
    'GL-West': [-62., -42., 64., 76.],
104
    'GL-South': [-52., -40., 59., 64.],
105
    'GL-East': [-42., -17., 64., 76.]
106
}
107

108
# Function
109
tuple2int = partial(np.array, dtype=np.int64)
9✔
110

111
lock = None
9✔
112

113

114
def mkdir(path, reset=False):
9✔
115
    """Checks if directory exists and if not, create one.
116

117
    Parameters
118
    ----------
119
    reset: erase the content of the directory if exists
120

121
    Returns
122
    -------
123
    the path
124
    """
125

126
    if reset and os.path.exists(path):
8✔
127
        shutil.rmtree(path)
×
128
    try:
8✔
129
        os.makedirs(path)
8✔
130
    except FileExistsError:
8✔
131
        pass
8✔
132
    return path
8✔
133

134

135
def del_empty_dirs(s_dir):
9✔
136
    """Delete empty directories."""
137
    b_empty = True
1✔
138
    for s_target in os.listdir(s_dir):
1✔
139
        s_path = os.path.join(s_dir, s_target)
1✔
140
        if os.path.isdir(s_path):
1✔
141
            if not del_empty_dirs(s_path):
1✔
142
                b_empty = False
×
143
        else:
144
            b_empty = False
1✔
145
    if b_empty:
1✔
146
        os.rmdir(s_dir)
1✔
147
    return b_empty
1✔
148

149

150
def findfiles(root_dir, endswith):
9✔
151
    """Finds all files with a specific ending in a directory
152

153
    Parameters
154
    ----------
155
    root_dir : str
156
       The directory to search fo
157
    endswith : str
158
       The file ending (e.g. '.hgt'
159

160
    Returns
161
    -------
162
    the list of files
163
    """
164
    out = []
×
165
    for dirpath, dirnames, filenames in os.walk(root_dir):
×
166
        for filename in [f for f in filenames if f.endswith(endswith)]:
×
167
            out.append(os.path.join(dirpath, filename))
×
168
    return out
×
169

170

171
def get_lock():
9✔
172
    """Get multiprocessing lock."""
173
    global lock
174
    if lock is None:
8✔
175
        # Global Lock
176
        if cfg.PARAMS.get('use_mp_spawn', False):
8✔
177
            lock = multiprocessing.get_context('spawn').Lock()
×
178
        else:
179
            lock = multiprocessing.Lock()
8✔
180
    return lock
8✔
181

182

183
def get_dl_verify_data(section):
9✔
184
    """Returns a pandas DataFrame with all known download object hashes.
185

186
    The returned dictionary resolves str: cache_obj_name (without section)
187
    to a tuple int(size) and bytes(sha256)
188
    """
189

190
    verify_key = 'dl_verify_data_' + section
8✔
191
    if verify_key in cfg.DATA:
8✔
192
        return cfg.DATA[verify_key]
3✔
193

194
    verify_file_path = os.path.join(cfg.CACHE_DIR, 'downloads.sha256.hdf')
8✔
195

196
    def verify_file(force=False):
8✔
197
        """Check the hash file's own hash"""
198
        if not cfg.PARAMS['has_internet']:
8✔
199
            return
×
200

201
        if not force and os.path.isfile(verify_file_path) and \
8✔
202
           os.path.getmtime(verify_file_path) + CHECKSUM_LIFETIME > time.time():
203
            return
7✔
204

205
        logger.info('Checking the download verification file checksum...')
8✔
206
        try:
8✔
207
            with requests.get(CHECKSUM_VALIDATION_URL) as req:
8✔
208
                req.raise_for_status()
8✔
209
                verify_file_sha256 = req.text.split(maxsplit=1)[0]
8✔
210
                verify_file_sha256 = bytearray.fromhex(verify_file_sha256)
8✔
211
        except Exception as e:
×
212
            verify_file_sha256 = None
×
213
            logger.warning('Failed getting verification checksum: ' + repr(e))
×
214

215
        if os.path.isfile(verify_file_path) and verify_file_sha256:
8✔
216
            sha256 = hashlib.sha256()
8✔
217
            with open(verify_file_path, 'rb') as f:
8✔
218
                for b in iter(lambda: f.read(0xFFFF), b''):
8✔
219
                    sha256.update(b)
8✔
220
            if sha256.digest() != verify_file_sha256:
8✔
221
                logger.warning('%s changed or invalid, deleting.'
×
222
                               % (verify_file_path))
223
                os.remove(verify_file_path)
×
224
            else:
225
                os.utime(verify_file_path)
8✔
226

227
    if not np.any(['dl_verify_data_' in k for k in cfg.DATA.keys()]):
8✔
228
        # We check the hash file only once per session
229
        # no need to do it at each call
230
        verify_file()
8✔
231

232
    if not os.path.isfile(verify_file_path):
8✔
233
        if not cfg.PARAMS['has_internet']:
8✔
234
            return pd.DataFrame()
×
235

236
        logger.info('Downloading %s to %s...'
8✔
237
                    % (CHECKSUM_URL, verify_file_path))
238

239
        with requests.get(CHECKSUM_URL, stream=True) as req:
8✔
240
            if req.status_code == 200:
8✔
241
                mkdir(os.path.dirname(verify_file_path))
8✔
242
                with open(verify_file_path, 'wb') as f:
8✔
243
                    for b in req.iter_content(chunk_size=0xFFFF):
8✔
244
                        if b:
8✔
245
                            f.write(b)
8✔
246

247
        logger.info('Done downloading.')
8✔
248

249
        verify_file(force=True)
8✔
250

251
    if not os.path.isfile(verify_file_path):
8✔
252
        logger.warning('Downloading and verifying checksums failed.')
×
253
        return pd.DataFrame()
×
254

255
    try:
8✔
256
        data = pd.read_hdf(verify_file_path, key=section)
8✔
257
    except KeyError:
8✔
258
        data = pd.DataFrame()
8✔
259

260
    cfg.DATA[verify_key] = data
8✔
261

262
    return data
8✔
263

264

265
def _call_dl_func(dl_func, cache_path):
9✔
266
    """Helper so the actual call to downloads can be overridden
267
    """
268
    return dl_func(cache_path)
8✔
269

270

271
def _cached_download_helper(cache_obj_name, dl_func, reset=False):
9✔
272
    """Helper function for downloads.
273

274
    Takes care of checking if the file is already cached.
275
    Only calls the actual download function when no cached version exists.
276
    """
277
    cache_dir = cfg.PATHS['dl_cache_dir']
8✔
278
    cache_ro = cfg.PARAMS['dl_cache_readonly']
8✔
279

280
    # A lot of logic below could be simplified but it's also not too important
281
    wd = cfg.PATHS.get('working_dir')
8✔
282
    if wd:
8✔
283
        # this is for real runs
284
        fb_cache_dir = os.path.join(wd, 'cache')
7✔
285
        check_fb_dir = False
7✔
286
    else:
287
        # Nothing have been set up yet, this is bad - find a place to write
288
        # This should happen on read-only cluster only but still
289
        wd = os.environ.get('OGGM_WORKDIR')
8✔
290
        if wd is not None and os.path.isdir(wd):
8✔
291
            fb_cache_dir = os.path.join(wd, 'cache')
×
292
        else:
293
            fb_cache_dir = os.path.join(cfg.CACHE_DIR, 'cache')
8✔
294
        check_fb_dir = True
8✔
295

296
    if not cache_dir:
8✔
297
        # Defaults to working directory: it must be set!
298
        if not cfg.PATHS['working_dir']:
×
299
            raise InvalidParamsError("Need a valid PATHS['working_dir']!")
×
300
        cache_dir = fb_cache_dir
×
301
        cache_ro = False
×
302

303
    fb_path = os.path.join(fb_cache_dir, cache_obj_name)
8✔
304
    if not reset and os.path.isfile(fb_path):
8✔
305
        return fb_path
×
306

307
    cache_path = os.path.join(cache_dir, cache_obj_name)
8✔
308
    if not reset and os.path.isfile(cache_path):
8✔
309
        return cache_path
6✔
310

311
    if cache_ro:
8✔
312
        if check_fb_dir:
×
313
            # Add a manual check that we are caching sample data download
314
            if 'oggm-sample-data' not in fb_path:
×
315
                raise InvalidParamsError('Attempting to download something '
×
316
                                         'with invalid global settings.')
317
        cache_path = fb_path
×
318

319
    if not cfg.PARAMS['has_internet']:
8✔
320
        raise NoInternetException("Download required, but "
1✔
321
                                  "`has_internet` is False.")
322

323
    mkdir(os.path.dirname(cache_path))
8✔
324

325
    try:
8✔
326
        cache_path = _call_dl_func(dl_func, cache_path)
8✔
327
    except BaseException:
×
328
        if os.path.exists(cache_path):
×
329
            os.remove(cache_path)
×
330
        raise
×
331

332
    return cache_path
8✔
333

334

335
def _verified_download_helper(cache_obj_name, dl_func, reset=False):
9✔
336
    """Helper function for downloads.
337

338
    Verifies the size and hash of the downloaded file against the included
339
    list of known static files.
340
    Uses _cached_download_helper to perform the actual download.
341
    """
342
    path = _cached_download_helper(cache_obj_name, dl_func, reset)
8✔
343

344
    dl_verify = cfg.PARAMS.get('dl_verify', False)
8✔
345

346
    if dl_verify and path and cache_obj_name not in cfg.DL_VERIFIED:
8✔
347
        cache_section, cache_path = cache_obj_name.split('/', 1)
1✔
348
        data = get_dl_verify_data(cache_section)
1✔
349
        if cache_path not in data.index:
1✔
350
            logger.info('No known hash for %s' % cache_obj_name)
×
351
            cfg.DL_VERIFIED[cache_obj_name] = True
×
352
        else:
353
            # compute the hash
354
            sha256 = hashlib.sha256()
1✔
355
            with open(path, 'rb') as f:
1✔
356
                for b in iter(lambda: f.read(0xFFFF), b''):
1✔
357
                    sha256.update(b)
1✔
358
            sha256 = sha256.digest()
1✔
359
            size = os.path.getsize(path)
1✔
360

361
            # check
362
            data = data.loc[cache_path]
1✔
363
            if data['size'] != size or bytes(data['sha256']) != sha256:
1✔
364
                err = '%s failed to verify!\nis: %s %s\nexpected: %s %s' % (
1✔
365
                    path, size, sha256.hex(), data[0], data[1].hex())
366
                raise DownloadVerificationFailedException(msg=err, path=path)
1✔
367
            logger.info('%s verified successfully.' % path)
1✔
368
            cfg.DL_VERIFIED[cache_obj_name] = True
1✔
369

370
    return path
8✔
371

372

373
def _requests_urlretrieve(url, path, reporthook, auth=None, timeout=None):
9✔
374
    """Implements the required features of urlretrieve on top of requests
375
    """
376

377
    chunk_size = 128 * 1024
8✔
378
    chunk_count = 0
8✔
379

380
    with requests.get(url, stream=True, auth=auth, timeout=timeout) as r:
8✔
381
        if r.status_code != 200:
8✔
382
            raise HttpDownloadError(r.status_code, url)
×
383
        r.raise_for_status()
8✔
384

385
        size = r.headers.get('content-length') or -1
8✔
386
        size = int(size)
8✔
387

388
        if reporthook:
8✔
389
            reporthook(chunk_count, chunk_size, size)
8✔
390

391
        with open(path, 'wb') as f:
8✔
392
            for chunk in r.iter_content(chunk_size=chunk_size):
8✔
393
                if not chunk:
8✔
394
                    continue
×
395
                f.write(chunk)
8✔
396
                chunk_count += 1
8✔
397
                if reporthook:
8✔
398
                    reporthook(chunk_count, chunk_size, size)
8✔
399

400
        if chunk_count * chunk_size < size:
8✔
401
            raise HttpContentTooShortError()
×
402

403

404
def _classic_urlretrieve(url, path, reporthook, auth=None, timeout=None):
9✔
405
    """Thin wrapper around pythons urllib urlretrieve
406
    """
407

408
    ourl = url
×
409
    if auth:
×
410
        u = urlparse(url)
×
411
        if '@' not in u.netloc:
×
412
            netloc = auth[0] + ':' + auth[1] + '@' + u.netloc
×
413
            url = u._replace(netloc=netloc).geturl()
×
414

415
    old_def_timeout = socket.getdefaulttimeout()
×
416
    if timeout is not None:
×
417
        socket.setdefaulttimeout(timeout)
×
418

419
    try:
×
420
        urllib.request.urlretrieve(url, path, reporthook)
×
421
    except urllib.error.HTTPError as e:
×
422
        raise HttpDownloadError(e.code, ourl)
×
423
    except urllib.error.ContentTooShortError as e:
×
424
        raise HttpContentTooShortError()
×
425
    finally:
426
        socket.setdefaulttimeout(old_def_timeout)
×
427

428

429
class ImplicitFTPTLS(ftplib.FTP_TLS):
9✔
430
    """ FTP_TLS subclass that automatically wraps sockets in SSL to support
431
        implicit FTPS.
432

433
        Taken from https://stackoverflow.com/a/36049814
434
    """
435

436
    def __init__(self, *args, **kwargs):
9✔
437
        super().__init__(*args, **kwargs)
×
438
        self._sock = None
×
439

440
    @property
9✔
441
    def sock(self):
9✔
442
        """Return the socket."""
443
        return self._sock
×
444

445
    @sock.setter
9✔
446
    def sock(self, value):
9✔
447
        """When modifying the socket, ensure that it is ssl wrapped."""
448
        if value is not None and not isinstance(value, ssl.SSLSocket):
×
449
            value = self.context.wrap_socket(value)
×
450
        self._sock = value
×
451

452

453
def url_exists(url):
9✔
454
    """Checks if a given a URL exists or not."""
455
    request = requests.get(url)
2✔
456
    return request.status_code < 400
2✔
457

458

459
def _ftps_retrieve(url, path, reporthook, auth=None, timeout=None):
9✔
460
    """ Wrapper around ftplib to download from FTPS server
461
    """
462

463
    if not auth:
×
464
        raise DownloadCredentialsMissingException('No authentication '
×
465
                                                  'credentials given!')
466

467
    upar = urlparse(url)
×
468

469
    # Decide if Implicit or Explicit FTPS is used based on the port in url
470
    if upar.port == 990:
×
471
        ftps = ImplicitFTPTLS()
×
472
    elif upar.port == 21:
×
473
        ftps = ftplib.FTP_TLS()
×
474

475
    try:
×
476
        # establish ssl connection
477
        ftps.connect(host=upar.hostname, port=upar.port, timeout=timeout)
×
478
        ftps.login(user=auth[0], passwd=auth[1])
×
479
        ftps.prot_p()
×
480

481
        logger.info('Established connection %s' % upar.hostname)
×
482

483
        # meta for progress bar size
484
        count = 0
×
485
        total = ftps.size(upar.path)
×
486
        bs = 12*1024
×
487

488
        def _ftps_progress(data):
×
489
            outfile.write(data)
×
490
            nonlocal count
491
            count += 1
×
492
            reporthook(count, count*bs, total)
×
493

494
        with open(path, 'wb') as outfile:
×
495
            ftps.retrbinary('RETR ' + upar.path, _ftps_progress, blocksize=bs)
×
496

497
    except (ftplib.error_perm, socket.timeout, socket.gaierror) as err:
×
498
        raise FTPSDownloadError(err)
×
499
    finally:
500
        ftps.close()
×
501

502

503
def _get_url_cache_name(url):
9✔
504
    """Returns the cache name for any given url.
505
    """
506

507
    res = urlparse(url)
8✔
508
    return res.netloc.split(':', 1)[0] + res.path
8✔
509

510

511
def oggm_urlretrieve(url, cache_obj_name=None, reset=False,
9✔
512
                     reporthook=None, auth=None, timeout=None):
513
    """Wrapper around urlretrieve, to implement our caching logic.
514

515
    Instead of accepting a destination path, it decided where to store the file
516
    and returns the local path.
517

518
    auth is expected to be either a tuple of ('username', 'password') or None.
519
    """
520

521
    if cache_obj_name is None:
8✔
522
        cache_obj_name = _get_url_cache_name(url)
8✔
523

524
    def _dlf(cache_path):
8✔
525
        logger.info("Downloading %s to %s..." % (url, cache_path))
8✔
526
        try:
8✔
527
            _requests_urlretrieve(url, cache_path, reporthook, auth, timeout)
8✔
528
        except requests.exceptions.InvalidSchema:
×
529
            if 'ftps://' in url:
×
530
                _ftps_retrieve(url, cache_path, reporthook, auth, timeout)
×
531
            else:
532
                _classic_urlretrieve(url, cache_path, reporthook, auth,
×
533
                                     timeout)
534
        return cache_path
8✔
535

536
    return _verified_download_helper(cache_obj_name, _dlf, reset)
8✔
537

538

539
def _progress_urlretrieve(url, cache_name=None, reset=False,
9✔
540
                          auth=None, timeout=None):
541
    """Downloads a file, returns its local path, and shows a progressbar."""
542

543
    try:
8✔
544
        from progressbar import DataTransferBar, UnknownLength
8✔
545
        pbar = None
8✔
546

547
        def _upd(count, size, total):
8✔
548
            nonlocal pbar
549
            if pbar is None:
8✔
550
                pbar = DataTransferBar()
8✔
551
                if not pbar.is_terminal:
8✔
552
                    pbar.min_poll_interval = 15
8✔
553
            if pbar.max_value is None:
8✔
554
                if total > 0:
8✔
555
                    pbar.start(total)
8✔
556
                else:
557
                    pbar.start(UnknownLength)
×
558
            pbar.update(min(count * size, total))
8✔
559
            sys.stdout.flush()
8✔
560
        res = oggm_urlretrieve(url, cache_obj_name=cache_name, reset=reset,
8✔
561
                               reporthook=_upd, auth=auth, timeout=timeout)
562
        try:
8✔
563
            pbar.finish()
8✔
564
        except BaseException:
6✔
565
            pass
6✔
566
        return res
8✔
567
    except (ImportError, ModuleNotFoundError):
1✔
568
        return oggm_urlretrieve(url, cache_obj_name=cache_name,
×
569
                                reset=reset, auth=auth, timeout=timeout)
570

571

572
def aws_file_download(aws_path, cache_name=None, reset=False):
9✔
573
    with get_lock():
×
574
        return _aws_file_download_unlocked(aws_path, cache_name, reset)
×
575

576

577
def _aws_file_download_unlocked(aws_path, cache_name=None, reset=False):
9✔
578
    """Download a file from the AWS drive s3://astgtmv2/
579

580
    **Note:** you need AWS credentials for this to work.
581

582
    Parameters
583
    ----------
584
    aws_path: path relative to s3://astgtmv2/
585
    """
586

587
    while aws_path.startswith('/'):
×
588
        aws_path = aws_path[1:]
×
589

590
    if cache_name is not None:
×
591
        cache_obj_name = cache_name
×
592
    else:
593
        cache_obj_name = 'astgtmv2/' + aws_path
×
594

595
    def _dlf(cache_path):
×
596
        raise NotImplementedError("Downloads from AWS are no longer supported")
597

598
    return _verified_download_helper(cache_obj_name, _dlf, reset)
×
599

600

601
def file_downloader(www_path, retry_max=3, sleep_on_retry=5,
9✔
602
                    cache_name=None, reset=False, auth=None,
603
                    timeout=None):
604
    """A slightly better downloader: it tries more than once."""
605

606
    local_path = None
8✔
607
    retry_counter = 0
8✔
608
    while retry_counter < retry_max:
8✔
609
        # Try to download
610
        try:
8✔
611
            retry_counter += 1
8✔
612
            local_path = _progress_urlretrieve(www_path, cache_name=cache_name,
8✔
613
                                               reset=reset, auth=auth,
614
                                               timeout=timeout)
615
            # if no error, exit
616
            break
8✔
617
        except HttpDownloadError as err:
1✔
618
            # This works well for py3
619
            if err.code == 404 or err.code == 300:
×
620
                # Ok so this *should* be an ocean tile
621
                return None
×
622
            elif err.code >= 500 and err.code < 600:
×
623
                logger.info(f"Downloading {www_path} failed with "
×
624
                            f"HTTP error {err.code}, "
625
                            f"retrying in {sleep_on_retry} seconds... "
626
                            f"{retry_counter}/{retry_max}")
627
                time.sleep(sleep_on_retry)
×
628
                continue
×
629
            else:
630
                raise
×
631
        except HttpContentTooShortError as err:
1✔
632
            logger.info("Downloading %s failed with ContentTooShortError"
×
633
                        " error %s, retrying in %s seconds... %s/%s" %
634
                        (www_path, err.code, sleep_on_retry, retry_counter, retry_max))
635
            time.sleep(sleep_on_retry)
×
636
            continue
×
637
        except DownloadVerificationFailedException as err:
1✔
638
            if (cfg.PATHS['dl_cache_dir'] and
1✔
639
                  err.path.startswith(cfg.PATHS['dl_cache_dir']) and
640
                  cfg.PARAMS['dl_cache_readonly']):
641
                if not cache_name:
×
642
                    cache_name = _get_url_cache_name(www_path)
×
643
                cache_name = "GLOBAL_CACHE_INVALID/" + cache_name
×
644
                retry_counter -= 1
×
645
                logger.info("Global cache for %s is invalid!")
×
646
            else:
647
                try:
1✔
648
                    os.remove(err.path)
1✔
649
                except FileNotFoundError:
×
650
                    pass
×
651
                logger.info("Downloading %s failed with "
1✔
652
                            "DownloadVerificationFailedException\n %s\n"
653
                            "The file might have changed or is corrupted. "
654
                            "File deleted. Re-downloading... %s/%s" %
655
                            (www_path, err.msg, retry_counter, retry_max))
656
            continue
1✔
657
        except requests.ConnectionError as err:
1✔
658
            if err.args[0].__class__.__name__ == 'MaxRetryError':
×
659
                # if request tried often enough we don't have to do this
660
                # this error does happen for not existing ASTERv3 files
661
                return None
×
662
            else:
663
                # in other cases: try again
664
                logger.info("Downloading %s failed with ConnectionError, "
×
665
                            "retrying in %s seconds... %s/%s" %
666
                            (www_path, sleep_on_retry, retry_counter, retry_max))
667
                time.sleep(sleep_on_retry)
×
668
                continue
×
669
        except FTPSDownloadError as err:
1✔
670
            logger.info("Downloading %s failed with FTPSDownloadError"
×
671
                        " error: '%s', retrying in %s seconds... %s/%s" %
672
                        (www_path, err.orgerr, sleep_on_retry, retry_counter, retry_max))
673
            time.sleep(sleep_on_retry)
×
674
            continue
×
675

676
    # See if we managed (fail is allowed)
677
    if not local_path or not os.path.exists(local_path):
8✔
678
        logger.warning('Downloading %s failed.' % www_path)
×
679

680
    return local_path
8✔
681

682

683
def locked_func(func):
9✔
684
    """To decorate a function that needs to be locked for multiprocessing"""
685
    @wraps(func)
9✔
686
    def wrapper(*args, **kwargs):
9✔
687
        with get_lock():
6✔
688
            return func(*args, **kwargs)
6✔
689
    return wrapper
9✔
690

691

692
def file_extractor(file_path):
9✔
693
    """For archives with only one file inside extract the file to tmpdir."""
694

695
    filename, file_extension = os.path.splitext(file_path)
6✔
696
    # Second one for tar.gz files
697
    f2, ex2 = os.path.splitext(filename)
6✔
698
    if ex2 == '.tar':
6✔
699
        filename, file_extension = f2, '.tar.gz'
1✔
700
    bname = os.path.basename(file_path)
6✔
701

702
    # This is to give a unique name to the tmp file
703
    hid = hashlib.md5(file_path.encode()).hexdigest()[:7] + '_'
6✔
704

705
    # extract directory
706
    tmpdir = cfg.PATHS['tmp_dir']
6✔
707
    mkdir(tmpdir)
6✔
708

709
    # Check output extension
710
    def _check_ext(f):
6✔
711
        _, of_ext = os.path.splitext(f)
6✔
712
        if of_ext not in ['.nc', '.tif']:
6✔
713
            raise InvalidParamsError('Extracted file extension not recognized'
×
714
                                     ': {}'.format(of_ext))
715
        return of_ext
6✔
716

717
    if file_extension == '.zip':
6✔
718
        with zipfile.ZipFile(file_path) as zf:
6✔
719
            members = zf.namelist()
6✔
720
            if len(members) != 1:
6✔
721
                raise RuntimeError('Cannot extract multiple files')
×
722
            o_name = hid + members[0]
6✔
723
            o_path = os.path.join(tmpdir, o_name)
6✔
724
            of_ext = _check_ext(o_path)
6✔
725
            if not os.path.exists(o_path):
6✔
726
                logger.info('Extracting {} to {}...'.format(bname, o_path))
6✔
727
                with open(o_path, 'wb') as f:
6✔
728
                    f.write(zf.read(members[0]))
6✔
729
    elif file_extension == '.gz':
4✔
730
        # Gzip files cannot be inspected. It's always only one file
731
        # Decide on its name
732
        o_name = hid + os.path.basename(filename)
2✔
733
        o_path = os.path.join(tmpdir, o_name)
2✔
734
        of_ext = _check_ext(o_path)
2✔
735
        if not os.path.exists(o_path):
2✔
736
            logger.info('Extracting {} to {}...'.format(bname, o_path))
2✔
737
            with gzip.GzipFile(file_path) as zf:
2✔
738
                with open(o_path, 'wb') as outfile:
2✔
739
                    for line in zf:
2✔
740
                        outfile.write(line)
2✔
741
    elif file_extension == '.bz2':
4✔
742
        # bzip2 files cannot be inspected. It's always only one file
743
        # Decide on its name
744
        o_name = hid + os.path.basename(filename)
4✔
745
        o_path = os.path.join(tmpdir, o_name)
4✔
746
        of_ext = _check_ext(o_path)
4✔
747
        if not os.path.exists(o_path):
4✔
748
            logger.info('Extracting {} to {}...'.format(bname, o_path))
2✔
749
            with bz2.open(file_path) as zf:
2✔
750
                with open(o_path, 'wb') as outfile:
2✔
751
                    for line in zf:
2✔
752
                        outfile.write(line)
2✔
753
    elif file_extension in ['.tar.gz', '.tar']:
1✔
754
        with tarfile.open(file_path) as zf:
1✔
755
            members = zf.getmembers()
1✔
756
            if len(members) != 1:
1✔
757
                raise RuntimeError('Cannot extract multiple files')
×
758
            o_name = hid + members[0].name
1✔
759
            o_path = os.path.join(tmpdir, o_name)
1✔
760
            of_ext = _check_ext(o_path)
1✔
761
            if not os.path.exists(o_path):
1✔
762
                logger.info('Extracting {} to {}...'.format(bname, o_path))
1✔
763
                with open(o_path, 'wb') as f:
1✔
764
                    f.write(zf.extractfile(members[0]).read())
1✔
765
    else:
766
        raise InvalidParamsError('Extension not recognized: '
1✔
767
                                 '{}'.format(file_extension))
768

769
    # Be sure we don't overfill the folder
770
    cfg.get_lru_handler(tmpdir, ending=of_ext).append(o_path)
6✔
771

772
    return o_path
6✔
773

774

775
def download_with_authentication(wwwfile, key):
9✔
776
    """ Uses credentials from a local .netrc file to download files
777

778
    This is function is currently used for TanDEM-X and ASTER
779

780
    Parameters
781
    ----------
782
    wwwfile : str
783
        path to the file to download
784
    key : str
785
        the machine to to look at in the .netrc file
786

787
    Returns
788
    -------
789

790
    """
791
    # Check the cache first. Use dummy download function to assure nothing is
792
    # tried to be downloaded without credentials:
793

794
    def _always_none(foo):
×
795
        return None
×
796

797
    cache_obj_name = _get_url_cache_name(wwwfile)
×
798
    dest_file = _verified_download_helper(cache_obj_name, _always_none)
×
799

800
    # Grab auth parameters
801
    if not dest_file:
×
802
        authfile = os.path.expanduser('~/.netrc')
×
803

804
        if not os.path.isfile(authfile):
×
805
            raise DownloadCredentialsMissingException(
×
806
                (authfile, ' does not exist. Add necessary credentials for ',
807
                 key, ' with `oggm_netrc_credentials. You may have to ',
808
                 'register at the respective service first.'))
809

810
        try:
×
811
            netrc(authfile).authenticators(key)[0]
×
812
        except TypeError:
×
813
            raise DownloadCredentialsMissingException(
×
814
                ('Credentials for ', key, ' are not in ', authfile, '. Add ',
815
                 'credentials for with `oggm_netrc_credentials`.'))
816

817
        dest_file = file_downloader(
×
818
            wwwfile, auth=(netrc(authfile).authenticators(key)[0],
819
                           netrc(authfile).authenticators(key)[2]))
820

821
    return dest_file
×
822

823

824
def download_oggm_files():
9✔
825
    with get_lock():
8✔
826
        return _download_oggm_files_unlocked()
8✔
827

828

829
def _download_oggm_files_unlocked():
9✔
830
    """Checks if the demo data is already on the cache and downloads it."""
831

832
    zip_url = 'https://github.com/%s/archive/%s.zip' % \
8✔
833
              (SAMPLE_DATA_GH_REPO, SAMPLE_DATA_COMMIT)
834
    odir = os.path.join(cfg.CACHE_DIR)
8✔
835
    sdir = os.path.join(cfg.CACHE_DIR,
8✔
836
                        'oggm-sample-data-%s' % SAMPLE_DATA_COMMIT)
837

838
    # download only if necessary
839
    if not os.path.exists(sdir):
8✔
840
        ofile = file_downloader(zip_url)
8✔
841
        with zipfile.ZipFile(ofile) as zf:
8✔
842
            zf.extractall(odir)
8✔
843
        assert os.path.isdir(sdir)
8✔
844

845
    # list of files for output
846
    out = dict()
8✔
847
    for root, directories, filenames in os.walk(sdir):
8✔
848
        for filename in filenames:
8✔
849
            if filename in out:
8✔
850
                # This was a stupid thing, and should not happen
851
                # TODO: duplicates in sample data...
852
                k = os.path.join(os.path.basename(root), filename)
8✔
853
                assert k not in out
8✔
854
                out[k] = os.path.join(root, filename)
8✔
855
            else:
856
                out[filename] = os.path.join(root, filename)
8✔
857

858
    return out
8✔
859

860

861
def _download_srtm_file(zone):
9✔
862
    with get_lock():
1✔
863
        return _download_srtm_file_unlocked(zone)
1✔
864

865

866
def _download_srtm_file_unlocked(zone):
9✔
867
    """Checks if the srtm data is in the directory and if not, download it.
868
    """
869

870
    # extract directory
871
    tmpdir = cfg.PATHS['tmp_dir']
1✔
872
    mkdir(tmpdir)
1✔
873
    outpath = os.path.join(tmpdir, 'srtm_' + zone + '.tif')
1✔
874

875
    # check if extracted file exists already
876
    if os.path.exists(outpath):
1✔
877
        return outpath
×
878

879
    # Did we download it yet?
880
    wwwfile = ('http://srtm.csi.cgiar.org/wp-content/uploads/files/srtm_5x5/'
1✔
881
               'TIFF/srtm_' + zone + '.zip')
882
    dest_file = file_downloader(wwwfile)
1✔
883

884
    # None means we tried hard but we couldn't find it
885
    if not dest_file:
1✔
886
        return None
×
887

888
    # ok we have to extract it
889
    if not os.path.exists(outpath):
1✔
890
        with zipfile.ZipFile(dest_file) as zf:
1✔
891
            zf.extractall(tmpdir)
1✔
892

893
    # See if we're good, don't overfill the tmp directory
894
    assert os.path.exists(outpath)
1✔
895
    cfg.get_lru_handler(tmpdir).append(outpath)
1✔
896
    return outpath
1✔
897

898

899
def _download_nasadem_file(zone):
9✔
900
    with get_lock():
1✔
901
        return _download_nasadem_file_unlocked(zone)
1✔
902

903

904
def _download_nasadem_file_unlocked(zone):
9✔
905
    """Checks if the NASADEM data is in the directory and if not, download it.
906
    """
907

908
    # extract directory
909
    tmpdir = cfg.PATHS['tmp_dir']
1✔
910
    mkdir(tmpdir)
1✔
911
    wwwfile = ('https://e4ftl01.cr.usgs.gov/MEASURES/NASADEM_HGT.001/'
1✔
912
               '2000.02.11/NASADEM_HGT_{}.zip'.format(zone))
913
    demfile = '{}.hgt'.format(zone)
1✔
914
    outpath = os.path.join(tmpdir, demfile)
1✔
915

916
    # check if extracted file exists already
917
    if os.path.exists(outpath):
1✔
918
        return outpath
×
919

920
    # Did we download it yet?
921
    dest_file = file_downloader(wwwfile)
1✔
922

923
    # None means we tried hard but we couldn't find it
924
    if not dest_file:
1✔
925
        return None
×
926

927
    # ok we have to extract it
928
    if not os.path.exists(outpath):
1✔
929
        with zipfile.ZipFile(dest_file) as zf:
1✔
930
            zf.extract(demfile, path=tmpdir)
1✔
931

932
    # See if we're good, don't overfill the tmp directory
933
    assert os.path.exists(outpath)
1✔
934
    cfg.get_lru_handler(tmpdir).append(outpath)
1✔
935
    return outpath
1✔
936

937

938
def _download_tandem_file(zone):
9✔
939
    with get_lock():
1✔
940
        return _download_tandem_file_unlocked(zone)
1✔
941

942

943
def _download_tandem_file_unlocked(zone):
9✔
944
    """Checks if the tandem data is in the directory and if not, download it.
945
    """
946

947
    # extract directory
948
    tmpdir = cfg.PATHS['tmp_dir']
1✔
949
    mkdir(tmpdir)
1✔
950
    bname = zone.split('/')[-1] + '_DEM.tif'
1✔
951
    wwwfile = ('https://download.geoservice.dlr.de/TDM90/files/DEM/'
1✔
952
               '{}.zip'.format(zone))
953
    outpath = os.path.join(tmpdir, bname)
1✔
954

955
    # check if extracted file exists already
956
    if os.path.exists(outpath):
1✔
957
        return outpath
×
958

959
    dest_file = download_with_authentication(wwwfile, 'geoservice.dlr.de')
1✔
960

961
    # That means we tried hard but we couldn't find it
962
    if not dest_file:
1✔
963
        return None
×
964
    elif not zipfile.is_zipfile(dest_file):
1✔
965
        # If the TanDEM-X tile does not exist, a invalid file is created.
966
        # See https://github.com/OGGM/oggm/issues/893 for more details
967
        return None
1✔
968

969
    # ok we have to extract it
970
    if not os.path.exists(outpath):
1✔
971
        with zipfile.ZipFile(dest_file) as zf:
1✔
972
            for fn in zf.namelist():
1✔
973
                if 'DEM/' + bname in fn:
1✔
974
                    break
×
975
            with open(outpath, 'wb') as fo:
1✔
976
                fo.write(zf.read(fn))
1✔
977

978
    # See if we're good, don't overfill the tmp directory
979
    assert os.path.exists(outpath)
1✔
980
    cfg.get_lru_handler(tmpdir).append(outpath)
1✔
981
    return outpath
1✔
982

983

984
def _download_dem3_viewpano(zone):
9✔
985
    with get_lock():
1✔
986
        return _download_dem3_viewpano_unlocked(zone)
1✔
987

988

989
def _download_dem3_viewpano_unlocked(zone):
9✔
990
    """Checks if the DEM3 data is in the directory and if not, download it.
991
    """
992

993
    # extract directory
994
    tmpdir = cfg.PATHS['tmp_dir']
1✔
995
    mkdir(tmpdir)
1✔
996
    outpath = os.path.join(tmpdir, zone + '.tif')
1✔
997
    extract_dir = os.path.join(tmpdir, 'tmp_' + zone)
1✔
998
    mkdir(extract_dir, reset=True)
1✔
999

1000
    # check if extracted file exists already
1001
    if os.path.exists(outpath):
1✔
1002
        return outpath
×
1003

1004
    # OK, so see if downloaded already
1005
    # some files have a newer version 'v2'
1006
    if zone in ['R33', 'R34', 'R35', 'R36', 'R37', 'R38', 'Q32', 'Q33', 'Q34',
1✔
1007
                'Q35', 'Q36', 'Q37', 'Q38', 'Q39', 'Q40', 'P31', 'P32', 'P33',
1008
                'P34', 'P35', 'P36', 'P37', 'P38', 'P39', 'P40']:
1009
        ifile = 'http://viewfinderpanoramas.org/dem3/' + zone + 'v2.zip'
×
1010
    elif zone in DEM3REG.keys():
1✔
1011
        # We prepared these files as tif already
1012
        ifile = ('https://cluster.klima.uni-bremen.de/~oggm/dem/'
1✔
1013
                 'DEM3_MERGED/{}.tif'.format(zone))
1014
        return file_downloader(ifile)
1✔
1015
    else:
1016
        ifile = 'http://viewfinderpanoramas.org/dem3/' + zone + '.zip'
1✔
1017

1018
    dfile = file_downloader(ifile)
1✔
1019

1020
    # None means we tried hard but we couldn't find it
1021
    if not dfile:
1✔
1022
        return None
×
1023

1024
    # ok we have to extract it
1025
    with zipfile.ZipFile(dfile) as zf:
1✔
1026
        zf.extractall(extract_dir)
1✔
1027

1028
    # Serious issue: sometimes, if a southern hemisphere URL is queried for
1029
    # download and there is none, a NH zip file is downloaded.
1030
    # Example: http://viewfinderpanoramas.org/dem3/SN29.zip yields N29!
1031
    # BUT: There are southern hemisphere files that download properly. However,
1032
    # the unzipped folder has the file name of
1033
    # the northern hemisphere file. Some checks if correct file exists:
1034
    if len(zone) == 4 and zone.startswith('S'):
1✔
1035
        zonedir = os.path.join(extract_dir, zone[1:])
×
1036
    else:
1037
        zonedir = os.path.join(extract_dir, zone)
1✔
1038
    globlist = glob.glob(os.path.join(zonedir, '*.hgt'))
1✔
1039

1040
    # take care of the special file naming cases
1041
    if zone in DEM3REG.keys():
1✔
1042
        globlist = glob.glob(os.path.join(extract_dir, '*', '*.hgt'))
×
1043

1044
    if not globlist:
1✔
1045
        # Final resort
1046
        globlist = (findfiles(extract_dir, '.hgt') or
×
1047
                    findfiles(extract_dir, '.HGT'))
1048
        if not globlist:
×
1049
            raise RuntimeError("We should have some files here, but we don't")
×
1050

1051
    # merge the single HGT files (can be a bit ineffective, because not every
1052
    # single file might be exactly within extent...)
1053
    rfiles = [rasterio.open(s) for s in globlist]
1✔
1054
    dest, output_transform = merge_tool(rfiles)
1✔
1055
    profile = rfiles[0].profile
1✔
1056
    if 'affine' in profile:
1✔
1057
        profile.pop('affine')
×
1058
    profile['transform'] = output_transform
1✔
1059
    profile['height'] = dest.shape[1]
1✔
1060
    profile['width'] = dest.shape[2]
1✔
1061
    profile['driver'] = 'GTiff'
1✔
1062
    with rasterio.open(outpath, 'w', **profile) as dst:
1✔
1063
        dst.write(dest)
1✔
1064
    for rf in rfiles:
1✔
1065
        rf.close()
1✔
1066

1067
    # delete original files to spare disk space
1068
    for s in globlist:
1✔
1069
        os.remove(s)
1✔
1070
    del_empty_dirs(tmpdir)
1✔
1071

1072
    # See if we're good, don't overfill the tmp directory
1073
    assert os.path.exists(outpath)
1✔
1074
    cfg.get_lru_handler(tmpdir).append(outpath)
1✔
1075
    return outpath
1✔
1076

1077

1078
def _download_aster_file(zone):
9✔
1079
    with get_lock():
1✔
1080
        return _download_aster_file_unlocked(zone)
1✔
1081

1082

1083
def _download_aster_file_unlocked(zone):
9✔
1084
    """Checks if the ASTER data is in the directory and if not, download it.
1085
    """
1086

1087
    # extract directory
1088
    tmpdir = cfg.PATHS['tmp_dir']
1✔
1089
    mkdir(tmpdir)
1✔
1090
    wwwfile = ('https://e4ftl01.cr.usgs.gov/ASTT/ASTGTM.003/'
1✔
1091
               '2000.03.01/{}.zip'.format(zone))
1092
    outpath = os.path.join(tmpdir, zone + '_dem.tif')
1✔
1093

1094
    # check if extracted file exists already
1095
    if os.path.exists(outpath):
1✔
1096
        return outpath
×
1097

1098
    # download from NASA Earthdata with credentials
1099
    dest_file = download_with_authentication(wwwfile, 'urs.earthdata.nasa.gov')
1✔
1100

1101
    # That means we tried hard but we couldn't find it
1102
    if not dest_file:
1✔
1103
        return None
×
1104

1105
    # ok we have to extract it
1106
    if not os.path.exists(outpath):
1✔
1107
        with zipfile.ZipFile(dest_file) as zf:
1✔
1108
            zf.extractall(tmpdir)
1✔
1109

1110
    # See if we're good, don't overfill the tmp directory
1111
    assert os.path.exists(outpath)
1✔
1112
    cfg.get_lru_handler(tmpdir).append(outpath)
1✔
1113
    return outpath
1✔
1114

1115

1116
def _download_topo_file_from_cluster(fname):
9✔
1117
    with get_lock():
×
1118
        return _download_topo_file_from_cluster_unlocked(fname)
×
1119

1120

1121
def _download_topo_file_from_cluster_unlocked(fname):
9✔
1122
    """Checks if the special topo data is in the directory and if not,
1123
    download it from the cluster.
1124
    """
1125

1126
    # extract directory
1127
    tmpdir = cfg.PATHS['tmp_dir']
×
1128
    mkdir(tmpdir)
×
1129
    outpath = os.path.join(tmpdir, fname)
×
1130

1131
    url = 'https://cluster.klima.uni-bremen.de/data/dems/'
×
1132
    url += fname + '.zip'
×
1133
    dfile = file_downloader(url)
×
1134

1135
    if not os.path.exists(outpath):
×
1136
        logger.info('Extracting ' + fname + '.zip to ' + outpath + '...')
×
1137
        with zipfile.ZipFile(dfile) as zf:
×
1138
            zf.extractall(tmpdir)
×
1139

1140
    # See if we're good, don't overfill the tmp directory
1141
    assert os.path.exists(outpath)
×
1142
    cfg.get_lru_handler(tmpdir).append(outpath)
×
1143
    return outpath
×
1144

1145

1146
def _download_copdem_file(cppfile, tilename, source):
9✔
1147
    with get_lock():
1✔
1148
        return _download_copdem_file_unlocked(cppfile, tilename, source)
1✔
1149

1150

1151
def _download_copdem_file_unlocked(cppfile, tilename, source):
9✔
1152
    """Checks if Copernicus DEM file is in the directory, if not download it.
1153

1154
    cppfile : name of the tarfile to download
1155
    tilename : name of folder and tif file within the cppfile
1156
    source : either 'COPDEM90' or 'COPDEM30'
1157

1158
    """
1159

1160
    # extract directory
1161
    tmpdir = cfg.PATHS['tmp_dir']
1✔
1162
    mkdir(tmpdir)
1✔
1163

1164
    # tarfiles are extracted in directories per each tile
1165
    fpath = '{0}_DEM.tif'.format(tilename)
1✔
1166
    demfile = os.path.join(tmpdir, fpath)
1✔
1167

1168
    # check if extracted file exists already
1169
    if os.path.exists(demfile):
1✔
1170
        return demfile
×
1171

1172
    # Did we download it yet?
1173
    ftpfile = ('ftps://cdsdata.copernicus.eu:990/' +
1✔
1174
               'datasets/COP-DEM_GLO-{}-DGED/2022_1/'.format(source[-2:]) +
1175
               cppfile)
1176

1177
    dest_file = download_with_authentication(ftpfile,
1✔
1178
                                             'spacedata.copernicus.eu')
1179

1180
    # None means we tried hard but we couldn't find it
1181
    if not dest_file:
1✔
1182
        return None
×
1183

1184
    # ok we have to extract it
1185
    if not os.path.exists(demfile):
1✔
1186
        tiffile = os.path.join(tilename, 'DEM', fpath)
1✔
1187
        with tarfile.open(dest_file) as tf:
1✔
1188
            tmember = tf.getmember(tiffile)
1✔
1189
            # do not extract the full path of the file
1190
            tmember.name = os.path.basename(tf.getmember(tiffile).name)
1✔
1191
            tf.extract(tmember, tmpdir)
1✔
1192

1193
    # See if we're good, don't overfill the tmp directory
1194
    assert os.path.exists(demfile)
1✔
1195
    cfg.get_lru_handler(tmpdir).append(demfile)
1✔
1196

1197
    return demfile
1✔
1198

1199

1200
def _download_aw3d30_file(zone):
9✔
1201
    with get_lock():
1✔
1202
        return _download_aw3d30_file_unlocked(zone)
1✔
1203

1204

1205
def _download_aw3d30_file_unlocked(fullzone):
9✔
1206
    """Checks if the AW3D30 data is in the directory and if not, download it.
1207
    """
1208

1209
    # extract directory
1210
    tmpdir = cfg.PATHS['tmp_dir']
1✔
1211
    mkdir(tmpdir)
1✔
1212

1213
    # tarfiles are extracted in directories per each tile
1214
    tile = fullzone.split('/')[1]
1✔
1215
    demfile = os.path.join(tmpdir, tile, tile + '_AVE_DSM.tif')
1✔
1216

1217
    # check if extracted file exists already
1218
    if os.path.exists(demfile):
1✔
1219
        return demfile
×
1220

1221
    # Did we download it yet?
1222
    ftpfile = ('ftp://ftp.eorc.jaxa.jp/pub/ALOS/ext1/AW3D30/release_v1804/'
1✔
1223
               + fullzone + '.tar.gz')
1224
    try:
1✔
1225
        dest_file = file_downloader(ftpfile, timeout=180)
1✔
1226
    except urllib.error.URLError:
×
1227
        # This error is raised if file is not available, could be water
1228
        return None
×
1229

1230
    # None means we tried hard but we couldn't find it
1231
    if not dest_file:
1✔
1232
        return None
×
1233

1234
    # ok we have to extract it
1235
    if not os.path.exists(demfile):
1✔
1236
        from oggm.utils import robust_tar_extract
1✔
1237
        dempath = os.path.dirname(demfile)
1✔
1238
        robust_tar_extract(dest_file, dempath)
1✔
1239

1240
    # See if we're good, don't overfill the tmp directory
1241
    assert os.path.exists(demfile)
1✔
1242
    # this tarfile contains several files
1243
    for file in os.listdir(dempath):
1✔
1244
        cfg.get_lru_handler(tmpdir).append(os.path.join(dempath, file))
1✔
1245
    return demfile
1✔
1246

1247

1248
def _download_mapzen_file(zone):
9✔
1249
    with get_lock():
1✔
1250
        return _download_mapzen_file_unlocked(zone)
1✔
1251

1252

1253
def _download_mapzen_file_unlocked(zone):
9✔
1254
    """Checks if the mapzen data is in the directory and if not, download it.
1255
    """
1256
    bucket = 'elevation-tiles-prod'
1✔
1257
    prefix = 'geotiff'
1✔
1258
    url = 'http://s3.amazonaws.com/%s/%s/%s' % (bucket, prefix, zone)
1✔
1259

1260
    # That's all
1261
    return file_downloader(url, timeout=180)
1✔
1262

1263

1264
def get_prepro_gdir(rgi_version, rgi_id, border, prepro_level, base_url=None):
9✔
1265
    with get_lock():
2✔
1266
        return _get_prepro_gdir_unlocked(rgi_version, rgi_id, border,
2✔
1267
                                         prepro_level, base_url=base_url)
1268

1269

1270
def get_prepro_base_url(base_url=None, rgi_version=None, border=None,
9✔
1271
                        prepro_level=None):
1272
    """Extended base url where to find the desired gdirs."""
1273

1274
    if base_url is None:
2✔
1275
        raise InvalidParamsError('Starting with v1.6, users now have to '
×
1276
                                 'explicitly indicate the url they want '
1277
                                 'to start from.')
1278

1279
    if not base_url.endswith('/'):
2✔
1280
        base_url += '/'
×
1281

1282
    if rgi_version is None:
2✔
1283
        rgi_version = cfg.PARAMS['rgi_version']
1✔
1284

1285
    if border is None:
2✔
1286
        border = cfg.PARAMS['border']
1✔
1287

1288
    url = base_url
2✔
1289
    url += 'RGI{}/'.format(rgi_version)
2✔
1290
    url += 'b_{:03d}/'.format(int(border))
2✔
1291
    url += 'L{:d}/'.format(prepro_level)
2✔
1292
    return url
2✔
1293

1294

1295
def _get_prepro_gdir_unlocked(rgi_version, rgi_id, border, prepro_level,
9✔
1296
                              base_url=None):
1297

1298
    url = get_prepro_base_url(rgi_version=rgi_version, border=border,
2✔
1299
                              prepro_level=prepro_level, base_url=base_url)
1300
    url += '{}/{}.tar' .format(rgi_id[:8], rgi_id[:11])
2✔
1301
    tar_base = file_downloader(url)
2✔
1302
    if tar_base is None:
2✔
1303
        raise RuntimeError('Could not find file at ' + url)
×
1304

1305
    return tar_base
2✔
1306

1307

1308
def get_geodetic_mb_dataframe(file_path=None):
9✔
1309
    """Fetches the reference geodetic dataframe for calibration.
1310

1311
    Currently that's the data from Hughonnet et al 2021, corrected for
1312
    outliers and with void filled. The data preparation script is
1313
    available at
1314
    https://nbviewer.jupyter.org/urls/cluster.klima.uni-bremen.de/~oggm/geodetic_ref_mb/convert.ipynb
1315

1316
    Parameters
1317
    ----------
1318
    file_path : str
1319
        in case you have your own file to parse (check the format first!)
1320

1321
    Returns
1322
    -------
1323
    a DataFrame with the data.
1324
    """
1325

1326
    # fetch the file online or read custom file
1327
    if file_path is None:
7✔
1328
        base_url = 'https://cluster.klima.uni-bremen.de/~oggm/geodetic_ref_mb/'
7✔
1329
        file_name = 'hugonnet_2021_ds_rgi60_pergla_rates_10_20_worldwide_filled.hdf'
7✔
1330
        file_path = file_downloader(base_url + file_name)
7✔
1331

1332
    # Did we open it yet?
1333
    if file_path in cfg.DATA:
7✔
1334
        return cfg.DATA[file_path]
4✔
1335

1336
    # If not let's go
1337
    extension = os.path.splitext(file_path)[1]
7✔
1338
    if extension == '.csv':
7✔
1339
        df = pd.read_csv(file_path, index_col=0)
×
1340
    elif extension == '.hdf':
7✔
1341
        df = pd.read_hdf(file_path)
7✔
1342

1343
    # Check for missing data (old files)
1344
    if len(df.loc[df['dmdtda'].isnull()]) > 0:
7✔
1345
        raise InvalidParamsError('The reference file you are using has missing '
×
1346
                                 'data and is probably outdated (sorry for '
1347
                                 'that). Delete the file at '
1348
                                 f'{file_path} and start again.')
1349
    cfg.DATA[file_path] = df
7✔
1350
    return df
7✔
1351

1352

1353
def get_temp_bias_dataframe(dataset='w5e5'):
9✔
1354
    """Fetches the reference geodetic dataframe for calibration.
1355

1356
    Currently, that's the data from Hughonnet et al. (2021), corrected for
1357
    outliers and with void filled. The data preparation script is
1358
    available at
1359
    https://nbviewer.jupyter.org/urls/cluster.klima.uni-bremen.de/~oggm/geodetic_ref_mb/convert.ipynb
1360

1361
    Parameters
1362
    ----------
1363
    file_path : str
1364
        in case you have your own file to parse (check the format first!)
1365

1366
    Returns
1367
    -------
1368
    a DataFrame with the data.
1369
    """
1370

1371
    if dataset != 'w5e5':
1✔
1372
        raise NotImplementedError(f'No such dataset available yet: {dataset}')
1373

1374
    # fetch the file online
1375
    base_url = ('https://cluster.klima.uni-bremen.de/~oggm/test_files/'
1✔
1376
                'w5e5_temp_bias_v2023.3.csv')
1377
    file_path = file_downloader(base_url)
1✔
1378

1379
    # Did we open it yet?
1380
    if file_path in cfg.DATA:
1✔
1381
        return cfg.DATA[file_path]
1✔
1382

1383
    # If not let's go
1384
    extension = os.path.splitext(file_path)[1]
1✔
1385
    if extension == '.csv':
1✔
1386
        df = pd.read_csv(file_path, index_col=0)
1✔
1387
    elif extension == '.hdf':
×
1388
        df = pd.read_hdf(file_path)
×
1389

1390
    cfg.DATA[file_path] = df
1✔
1391
    return df
1✔
1392

1393

1394
def srtm_zone(lon_ex, lat_ex):
9✔
1395
    """Returns a list of SRTM zones covering the desired extent.
1396
    """
1397

1398
    # SRTM are sorted in tiles of 5 degrees
1399
    srtm_x0 = -180.
1✔
1400
    srtm_y0 = 60.
1✔
1401
    srtm_dx = 5.
1✔
1402
    srtm_dy = -5.
1✔
1403

1404
    # quick n dirty solution to be sure that we will cover the whole range
1405
    mi, ma = np.min(lon_ex), np.max(lon_ex)
1✔
1406
    # int() to avoid Deprec warning:
1407
    lon_ex = np.linspace(mi, ma, int(np.ceil((ma - mi) + 3)))
1✔
1408
    mi, ma = np.min(lat_ex), np.max(lat_ex)
1✔
1409
    # int() to avoid Deprec warning
1410
    lat_ex = np.linspace(mi, ma, int(np.ceil((ma - mi) + 3)))
1✔
1411

1412
    zones = []
1✔
1413
    for lon in lon_ex:
1✔
1414
        for lat in lat_ex:
1✔
1415
            dx = lon - srtm_x0
1✔
1416
            dy = lat - srtm_y0
1✔
1417
            if dy > 0:
1✔
1418
                continue
×
1419
            zx = np.ceil(dx / srtm_dx)
1✔
1420
            zy = np.ceil(dy / srtm_dy)
1✔
1421
            zones.append('{:02.0f}_{:02.0f}'.format(zx, zy))
1✔
1422
    return list(sorted(set(zones)))
1✔
1423

1424

1425
def _tandem_path(lon_tile, lat_tile):
9✔
1426

1427
    # OK we have a proper tile now
1428
    # This changed in December 2022
1429

1430
    # First folder level is sorted from S to N
1431
    level_0 = 'S' if lat_tile < 0 else 'N'
1✔
1432
    level_0 += '{:02d}'.format(abs(lat_tile))
1✔
1433

1434
    # Second folder level is sorted from W to E, but in 10 steps
1435
    level_1 = 'W' if lon_tile < 0 else 'E'
1✔
1436
    level_1 += '{:03d}'.format(divmod(abs(lon_tile), 10)[0] * 10)
1✔
1437

1438
    # Level 2 is formatting, but depends on lat
1439
    level_2 = 'W' if lon_tile < 0 else 'E'
1✔
1440
    if abs(lat_tile) <= 60:
1✔
1441
        level_2 += '{:03d}'.format(abs(lon_tile))
1✔
1442
    elif abs(lat_tile) <= 80:
1✔
1443
        level_2 += '{:03d}'.format(divmod(abs(lon_tile), 2)[0] * 2)
1✔
1444
    else:
1445
        level_2 += '{:03d}'.format(divmod(abs(lon_tile), 4)[0] * 4)
1✔
1446

1447
    # Final path
1448
    out = (level_0 + '/' + level_1 + '/' +
1✔
1449
           'TDM1_DEM__30_{}{}'.format(level_0, level_2))
1450
    return out
1✔
1451

1452

1453
def tandem_zone(lon_ex, lat_ex):
9✔
1454
    """Returns a list of TanDEM-X zones covering the desired extent.
1455
    """
1456

1457
    # Files are one by one tiles, so lets loop over them
1458
    # For higher lats they are stored in steps of 2 and 4. My code below
1459
    # is probably giving more files than needed but better safe than sorry
1460
    lat_tiles = np.arange(np.floor(lat_ex[0]), np.ceil(lat_ex[1]+1e-9),
1✔
1461
                          dtype=int)
1462
    zones = []
1✔
1463
    for lat in lat_tiles:
1✔
1464
        if abs(lat) < 60:
1✔
1465
            l0 = np.floor(lon_ex[0])
1✔
1466
            l1 = np.floor(lon_ex[1])
1✔
1467
        elif abs(lat) < 80:
1✔
1468
            l0 = divmod(lon_ex[0], 2)[0] * 2
1✔
1469
            l1 = divmod(lon_ex[1], 2)[0] * 2
1✔
1470
        elif abs(lat) < 90:
1✔
1471
            l0 = divmod(lon_ex[0], 4)[0] * 4
1✔
1472
            l1 = divmod(lon_ex[1], 4)[0] * 4
1✔
1473
        lon_tiles = np.arange(l0, l1+1, dtype=int)
1✔
1474
        for lon in lon_tiles:
1✔
1475
            zones.append(_tandem_path(lon, lat))
1✔
1476
    return list(sorted(set(zones)))
1✔
1477

1478

1479
def _aw3d30_path(lon_tile, lat_tile):
9✔
1480

1481
    # OK we have a proper tile now
1482

1483
    # Folders are sorted with N E S W in 5 degree steps
1484
    # But in N and E the lower boundary is indicated
1485
    # e.g. N060 contains N060 - N064
1486
    # e.g. E000 contains E000 - E004
1487
    # but S and W indicate the upper boundary:
1488
    # e.g. S010 contains S006 - S010
1489
    # e.g. W095 contains W091 - W095
1490

1491
    # get letters
1492
    ns = 'S' if lat_tile < 0 else 'N'
1✔
1493
    ew = 'W' if lon_tile < 0 else 'E'
1✔
1494

1495
    # get lat/lon
1496
    lon = abs(5 * np.floor(lon_tile/5))
1✔
1497
    lat = abs(5 * np.floor(lat_tile/5))
1✔
1498

1499
    folder = '%s%.3d%s%.3d' % (ns, lat, ew, lon)
1✔
1500
    filename = '%s%.3d%s%.3d' % (ns, abs(lat_tile), ew, abs(lon_tile))
1✔
1501

1502
    # Final path
1503
    out = folder + '/' + filename
1✔
1504
    return out
1✔
1505

1506

1507
def aw3d30_zone(lon_ex, lat_ex):
9✔
1508
    """Returns a list of AW3D30 zones covering the desired extent.
1509
    """
1510

1511
    # Files are one by one tiles, so lets loop over them
1512
    lon_tiles = np.arange(np.floor(lon_ex[0]), np.ceil(lon_ex[1]+1e-9),
1✔
1513
                          dtype=int)
1514
    lat_tiles = np.arange(np.floor(lat_ex[0]), np.ceil(lat_ex[1]+1e-9),
1✔
1515
                          dtype=int)
1516
    zones = []
1✔
1517
    for lon in lon_tiles:
1✔
1518
        for lat in lat_tiles:
1✔
1519
            zones.append(_aw3d30_path(lon, lat))
1✔
1520
    return list(sorted(set(zones)))
1✔
1521

1522

1523
def _extent_to_polygon(lon_ex, lat_ex, to_crs=None):
9✔
1524

1525
    if lon_ex[0] == lon_ex[1] and lat_ex[0] == lat_ex[1]:
1✔
1526
        out = shpg.Point(lon_ex[0], lat_ex[0])
1✔
1527
    else:
1528
        x = [lon_ex[0], lon_ex[1], lon_ex[1], lon_ex[0], lon_ex[0]]
1✔
1529
        y = [lat_ex[0], lat_ex[0], lat_ex[1], lat_ex[1], lat_ex[0]]
1✔
1530
        out = shpg.Polygon(np.array((x, y)).T)
1✔
1531
    if to_crs is not None:
1✔
1532
        out = salem.transform_geometry(out, to_crs=to_crs)
1✔
1533
    return out
1✔
1534

1535

1536
def arcticdem_zone(lon_ex, lat_ex):
9✔
1537
    """Returns a list of Arctic-DEM zones covering the desired extent.
1538
    """
1539

1540
    gdf = gpd.read_file(get_demo_file('ArcticDEM_Tile_Index_Rel7_by_tile.shp'))
1✔
1541
    p = _extent_to_polygon(lon_ex, lat_ex, to_crs=gdf.crs)
1✔
1542
    gdf = gdf.loc[gdf.intersects(p)]
1✔
1543
    return gdf.tile.values if len(gdf) > 0 else []
1✔
1544

1545

1546
def rema_zone(lon_ex, lat_ex):
9✔
1547
    """Returns a list of REMA-DEM zones covering the desired extent.
1548
    """
1549

1550
    gdf = gpd.read_file(get_demo_file('REMA_Tile_Index_Rel1.1.shp'))
1✔
1551
    p = _extent_to_polygon(lon_ex, lat_ex, to_crs=gdf.crs)
1✔
1552
    gdf = gdf.loc[gdf.intersects(p)]
1✔
1553
    return gdf.tile.values if len(gdf) > 0 else []
1✔
1554

1555

1556
def alaska_dem_zone(lon_ex, lat_ex):
9✔
1557
    """Returns a list of Alaska-DEM zones covering the desired extent.
1558
    """
1559

1560
    gdf = gpd.read_file(get_demo_file('Alaska_albers_V3_tiles.shp'))
1✔
1561
    p = _extent_to_polygon(lon_ex, lat_ex, to_crs=gdf.crs)
1✔
1562
    gdf = gdf.loc[gdf.intersects(p)]
1✔
1563
    return gdf.tile.values if len(gdf) > 0 else []
1✔
1564

1565

1566
def copdem_zone(lon_ex, lat_ex, source):
9✔
1567
    """Returns a list of Copernicus DEM tarfile and tilename tuples
1568
    """
1569

1570
    # because we use both meters and arc secs in our filenames...
1571
    if source[-2:] == '90':
1✔
1572
        asec = '30'
1✔
1573
    elif source[-2:] == '30':
1✔
1574
        asec = '10'
1✔
1575
    else:
1576
        raise InvalidDEMError('COPDEM Version not valid.')
×
1577

1578
    # either reuse or load lookup table
1579
    if source in cfg.DATA:
1✔
1580
        df = cfg.DATA[source]
1✔
1581
    else:
1582
        df = pd.read_csv(get_demo_file('{}_2022_1.csv'.format(source.lower())))
1✔
1583
        cfg.DATA[source] = df
1✔
1584

1585
    # adding small buffer for unlikely case where one lon/lat_ex == xx.0
1586
    lons = np.arange(np.floor(lon_ex[0]-1e-9), np.ceil(lon_ex[1]+1e-9))
1✔
1587
    lats = np.arange(np.floor(lat_ex[0]-1e-9), np.ceil(lat_ex[1]+1e-9))
1✔
1588

1589
    flist = []
1✔
1590
    for lat in lats:
1✔
1591
        # north or south?
1592
        ns = 'S' if lat < 0 else 'N'
1✔
1593
        for lon in lons:
1✔
1594
            # east or west?
1595
            ew = 'W' if lon < 0 else 'E'
1✔
1596
            lat_str = '{}{:02.0f}'.format(ns, abs(lat))
1✔
1597
            lon_str = '{}{:03.0f}'.format(ew, abs(lon))
1✔
1598
            try:
1✔
1599
                filename = df.loc[(df['Long'] == lon_str) &
1✔
1600
                                  (df['Lat'] == lat_str)]['CPP filename'].iloc[0]
1601
                flist.append((filename,
1✔
1602
                              'Copernicus_DSM_{}_{}_00_{}_00'.format(asec, lat_str, lon_str)))
1603
            except IndexError:
×
1604
                # COPDEM is global, if we miss tiles it is probably in the ocean
1605
                pass
×
1606
    return flist
1✔
1607

1608

1609
def dem3_viewpano_zone(lon_ex, lat_ex):
9✔
1610
    """Returns a list of DEM3 zones covering the desired extent.
1611

1612
    http://viewfinderpanoramas.org/Coverage%20map%20viewfinderpanoramas_org3.htm
1613
    """
1614

1615
    for _f in DEM3REG.keys():
1✔
1616

1617
        if (np.min(lon_ex) >= DEM3REG[_f][0]) and \
1✔
1618
           (np.max(lon_ex) <= DEM3REG[_f][1]) and \
1619
           (np.min(lat_ex) >= DEM3REG[_f][2]) and \
1620
           (np.max(lat_ex) <= DEM3REG[_f][3]):
1621

1622
            # test some weird inset files in Antarctica
1623
            if (np.min(lon_ex) >= -91.) and (np.max(lon_ex) <= -90.) and \
1✔
1624
               (np.min(lat_ex) >= -72.) and (np.max(lat_ex) <= -68.):
1625
                return ['SR15']
×
1626

1627
            elif (np.min(lon_ex) >= -47.) and (np.max(lon_ex) <= -43.) and \
1✔
1628
                 (np.min(lat_ex) >= -61.) and (np.max(lat_ex) <= -60.):
1629
                return ['SP23']
×
1630

1631
            elif (np.min(lon_ex) >= 162.) and (np.max(lon_ex) <= 165.) and \
1✔
1632
                 (np.min(lat_ex) >= -68.) and (np.max(lat_ex) <= -66.):
1633
                return ['SQ58']
×
1634

1635
            # test some rogue Greenland tiles as well
1636
            elif (np.min(lon_ex) >= -72.) and (np.max(lon_ex) <= -66.) and \
1✔
1637
                 (np.min(lat_ex) >= 76.) and (np.max(lat_ex) <= 80.):
1638
                return ['T19']
×
1639

1640
            elif (np.min(lon_ex) >= -72.) and (np.max(lon_ex) <= -66.) and \
1✔
1641
                 (np.min(lat_ex) >= 80.) and (np.max(lat_ex) <= 83.):
1642
                return ['U19']
×
1643

1644
            elif (np.min(lon_ex) >= -66.) and (np.max(lon_ex) <= -60.) and \
1✔
1645
                 (np.min(lat_ex) >= 80.) and (np.max(lat_ex) <= 83.):
1646
                return ['U20']
×
1647

1648
            elif (np.min(lon_ex) >= -60.) and (np.max(lon_ex) <= -54.) and \
1✔
1649
                 (np.min(lat_ex) >= 80.) and (np.max(lat_ex) <= 83.):
1650
                return ['U21']
×
1651

1652
            elif (np.min(lon_ex) >= -54.) and (np.max(lon_ex) <= -48.) and \
1✔
1653
                 (np.min(lat_ex) >= 80.) and (np.max(lat_ex) <= 83.):
1654
                return ['U22']
×
1655

1656
            elif (np.min(lon_ex) >= -25.) and (np.max(lon_ex) <= -13.) and \
1✔
1657
                 (np.min(lat_ex) >= 63.) and (np.max(lat_ex) <= 67.):
1658
                return ['ISL']
1✔
1659

1660
            else:
1661
                return [_f]
1✔
1662

1663
    # if the tile doesn't have a special name, its name can be found like this:
1664
    # corrected SRTMs are sorted in tiles of 6 deg longitude and 4 deg latitude
1665
    srtm_x0 = -180.
1✔
1666
    srtm_y0 = 0.
1✔
1667
    srtm_dx = 6.
1✔
1668
    srtm_dy = 4.
1✔
1669

1670
    # quick n dirty solution to be sure that we will cover the whole range
1671
    mi, ma = np.min(lon_ex), np.max(lon_ex)
1✔
1672
    # TODO: Fabien, find out what Johannes wanted with this +3
1673
    # +3 is just for the number to become still a bit larger
1674
    # int() to avoid Deprec warning
1675
    lon_ex = np.linspace(mi, ma, int(np.ceil((ma - mi) / srtm_dy) + 3))
1✔
1676
    mi, ma = np.min(lat_ex), np.max(lat_ex)
1✔
1677
    # int() to avoid Deprec warning
1678
    lat_ex = np.linspace(mi, ma, int(np.ceil((ma - mi) / srtm_dx) + 3))
1✔
1679

1680
    zones = []
1✔
1681
    for lon in lon_ex:
1✔
1682
        for lat in lat_ex:
1✔
1683
            dx = lon - srtm_x0
1✔
1684
            dy = lat - srtm_y0
1✔
1685
            zx = np.ceil(dx / srtm_dx)
1✔
1686
            # convert number to letter
1687
            zy = chr(int(abs(dy / srtm_dy)) + ord('A'))
1✔
1688
            if lat >= 0:
1✔
1689
                zones.append('%s%02.0f' % (zy, zx))
1✔
1690
            else:
1691
                zones.append('S%s%02.0f' % (zy, zx))
×
1692
    return list(sorted(set(zones)))
1✔
1693

1694

1695
def aster_zone(lon_ex, lat_ex):
9✔
1696
    """Returns a list of ASTGTMV3 zones covering the desired extent.
1697

1698
    ASTER v3 tiles are 1 degree x 1 degree
1699
    N50 contains 50 to 50.9
1700
    E10 contains 10 to 10.9
1701
    S70 contains -69.99 to -69.0
1702
    W20 contains -19.99 to -19.0
1703
    """
1704

1705
    # adding small buffer for unlikely case where one lon/lat_ex == xx.0
1706
    lons = np.arange(np.floor(lon_ex[0]-1e-9), np.ceil(lon_ex[1]+1e-9))
1✔
1707
    lats = np.arange(np.floor(lat_ex[0]-1e-9), np.ceil(lat_ex[1]+1e-9))
1✔
1708

1709
    zones = []
1✔
1710
    for lat in lats:
1✔
1711
        # north or south?
1712
        ns = 'S' if lat < 0 else 'N'
1✔
1713
        for lon in lons:
1✔
1714
            # east or west?
1715
            ew = 'W' if lon < 0 else 'E'
1✔
1716
            filename = 'ASTGTMV003_{}{:02.0f}{}{:03.0f}'.format(ns, abs(lat),
1✔
1717
                                                                ew, abs(lon))
1718
            zones.append(filename)
1✔
1719
    return list(sorted(set(zones)))
1✔
1720

1721

1722
def nasadem_zone(lon_ex, lat_ex):
9✔
1723
    """Returns a list of NASADEM zones covering the desired extent.
1724

1725
    NASADEM tiles are 1 degree x 1 degree
1726
    N50 contains 50 to 50.9
1727
    E10 contains 10 to 10.9
1728
    S70 contains -69.99 to -69.0
1729
    W20 contains -19.99 to -19.0
1730
    """
1731

1732
    # adding small buffer for unlikely case where one lon/lat_ex == xx.0
1733
    lons = np.arange(np.floor(lon_ex[0]-1e-9), np.ceil(lon_ex[1]+1e-9))
1✔
1734
    lats = np.arange(np.floor(lat_ex[0]-1e-9), np.ceil(lat_ex[1]+1e-9))
1✔
1735

1736
    zones = []
1✔
1737
    for lat in lats:
1✔
1738
        # north or south?
1739
        ns = 's' if lat < 0 else 'n'
1✔
1740
        for lon in lons:
1✔
1741
            # east or west?
1742
            ew = 'w' if lon < 0 else 'e'
1✔
1743
            filename = '{}{:02.0f}{}{:03.0f}'.format(ns, abs(lat), ew,
1✔
1744
                                                     abs(lon))
1745
            zones.append(filename)
1✔
1746
    return list(sorted(set(zones)))
1✔
1747

1748

1749
def mapzen_zone(lon_ex, lat_ex, dx_meter=None, zoom=None):
9✔
1750
    """Returns a list of AWS mapzen zones covering the desired extent.
1751

1752
    For mapzen one has to specify the level of detail (zoom) one wants. The
1753
    best way in OGGM is to specify dx_meter of the underlying map and OGGM
1754
    will decide which zoom level works best.
1755
    """
1756

1757
    if dx_meter is None and zoom is None:
1✔
1758
        raise InvalidParamsError('Need either zoom level or dx_meter.')
×
1759

1760
    bottom, top = lat_ex
1✔
1761
    left, right = lon_ex
1✔
1762
    ybound = 85.0511
1✔
1763
    if bottom <= -ybound:
1✔
1764
        bottom = -ybound
1✔
1765
    if top <= -ybound:
1✔
1766
        top = -ybound
1✔
1767
    if bottom > ybound:
1✔
1768
        bottom = ybound
1✔
1769
    if top > ybound:
1✔
1770
        top = ybound
1✔
1771
    if right >= 180:
1✔
1772
        right = 179.999
1✔
1773
    if left >= 180:
1✔
1774
        left = 179.999
1✔
1775

1776
    if dx_meter:
1✔
1777
        # Find out the zoom so that we are close to the desired accuracy
1778
        lat = np.max(np.abs([bottom, top]))
1✔
1779
        zoom = int(np.ceil(math.log2((math.cos(lat * math.pi / 180) *
1✔
1780
                                      2 * math.pi * WEB_EARTH_RADUIS) /
1781
                                     (WEB_N_PIX * dx_meter))))
1782

1783
        # According to this we should just always stay above 10 (sorry)
1784
        # https://github.com/tilezen/joerd/blob/master/docs/data-sources.md
1785
        zoom = 10 if zoom < 10 else zoom
1✔
1786

1787
    # Code from planetutils
1788
    size = 2 ** zoom
1✔
1789
    xt = lambda x: int((x + 180.0) / 360.0 * size)
1✔
1790
    yt = lambda y: int((1.0 - math.log(math.tan(math.radians(y)) +
1✔
1791
                                       (1 / math.cos(math.radians(y))))
1792
                        / math.pi) / 2.0 * size)
1793
    tiles = []
1✔
1794
    for x in range(xt(left), xt(right) + 1):
1✔
1795
        for y in range(yt(top), yt(bottom) + 1):
1✔
1796
            tiles.append('/'.join(map(str, [zoom, x, str(y) + '.tif'])))
1✔
1797
    return tiles
1✔
1798

1799

1800
def get_demo_file(fname):
9✔
1801
    """Returns the path to the desired OGGM-sample-file.
1802

1803
    If Sample data is not cached it will be downloaded from
1804
    https://github.com/OGGM/oggm-sample-data
1805

1806
    Parameters
1807
    ----------
1808
    fname : str
1809
        Filename of the desired OGGM-sample-file
1810

1811
    Returns
1812
    -------
1813
    str
1814
        Absolute path to the desired file.
1815
    """
1816

1817
    d = download_oggm_files()
7✔
1818
    if fname in d:
7✔
1819
        return d[fname]
7✔
1820
    else:
1821
        return None
1✔
1822

1823

1824
def get_wgms_files():
9✔
1825
    """Get the path to the default WGMS-RGI link file and the data dir.
1826

1827
    Returns
1828
    -------
1829
    (file, dir) : paths to the files
1830
    """
1831

1832
    download_oggm_files()
4✔
1833
    sdir = os.path.join(cfg.CACHE_DIR,
4✔
1834
                        'oggm-sample-data-%s' % SAMPLE_DATA_COMMIT,
1835
                        'wgms')
1836
    datadir = os.path.join(sdir, 'mbdata')
4✔
1837
    assert os.path.exists(datadir)
4✔
1838

1839
    outf = os.path.join(sdir, 'rgi_wgms_links_20220112.csv')
4✔
1840
    outf = pd.read_csv(outf, dtype={'RGI_REG': object})
4✔
1841

1842
    return outf, datadir
4✔
1843

1844

1845
def get_glathida_file():
9✔
1846
    """Get the path to the default GlaThiDa-RGI link file.
1847

1848
    Returns
1849
    -------
1850
    file : paths to the file
1851
    """
1852

1853
    # Roll our own
1854
    download_oggm_files()
1✔
1855
    sdir = os.path.join(cfg.CACHE_DIR,
1✔
1856
                        'oggm-sample-data-%s' % SAMPLE_DATA_COMMIT,
1857
                        'glathida')
1858
    outf = os.path.join(sdir, 'rgi_glathida_links.csv')
1✔
1859
    assert os.path.exists(outf)
1✔
1860
    return outf
1✔
1861

1862

1863
def get_rgi_dir(version=None, reset=False):
9✔
1864
    """Path to the RGI directory.
1865

1866
    If the RGI files are not present, download them.
1867

1868
    Parameters
1869
    ----------
1870
    version : str
1871
        '5', '6', defaults to None (linking to the one specified in cfg.PARAMS)
1872
    reset : bool
1873
        If True, deletes the RGI directory first and downloads the data
1874

1875
    Returns
1876
    -------
1877
    str
1878
        path to the RGI directory
1879
    """
1880

1881
    with get_lock():
1✔
1882
        return _get_rgi_dir_unlocked(version=version, reset=reset)
1✔
1883

1884

1885
def _get_rgi_dir_unlocked(version=None, reset=False):
9✔
1886

1887
    rgi_dir = cfg.PATHS['rgi_dir']
1✔
1888
    if version is None:
1✔
1889
        version = cfg.PARAMS['rgi_version']
×
1890

1891
    if len(version) == 1:
1✔
1892
        version += '0'
1✔
1893

1894
    # Be sure the user gave a sensible path to the RGI dir
1895
    if not rgi_dir:
1✔
1896
        raise InvalidParamsError('The RGI data directory has to be'
×
1897
                                 'specified explicitly.')
1898
    rgi_dir = os.path.abspath(os.path.expanduser(rgi_dir))
1✔
1899
    rgi_dir = os.path.join(rgi_dir, 'RGIV' + version)
1✔
1900
    mkdir(rgi_dir, reset=reset)
1✔
1901

1902
    if version == '50':
1✔
1903
        dfile = 'http://www.glims.org/RGI/rgi50_files/rgi50.zip'
1✔
1904
    elif version == '60':
1✔
1905
        dfile = 'http://www.glims.org/RGI/rgi60_files/00_rgi60.zip'
1✔
1906
    elif version == '61':
×
1907
        dfile = 'https://cluster.klima.uni-bremen.de/data/rgi/rgi_61.zip'
×
1908
    elif version == '62':
×
1909
        dfile = 'https://cluster.klima.uni-bremen.de/~oggm/rgi/rgi62.zip'
×
1910

1911
    test_file = os.path.join(rgi_dir,
1✔
1912
                             '*_rgi*{}_manifest.txt'.format(version))
1913

1914
    if len(glob.glob(test_file)) == 0:
1✔
1915
        # if not there download it
1916
        ofile = file_downloader(dfile, reset=reset)
1✔
1917
        # Extract root
1918
        with zipfile.ZipFile(ofile) as zf:
1✔
1919
            zf.extractall(rgi_dir)
1✔
1920
        # Extract subdirs
1921
        pattern = '*_rgi{}_*.zip'.format(version)
1✔
1922
        for root, dirs, files in os.walk(cfg.PATHS['rgi_dir']):
1✔
1923
            for filename in fnmatch.filter(files, pattern):
1✔
1924
                zfile = os.path.join(root, filename)
1✔
1925
                with zipfile.ZipFile(zfile) as zf:
1✔
1926
                    ex_root = zfile.replace('.zip', '')
1✔
1927
                    mkdir(ex_root)
1✔
1928
                    zf.extractall(ex_root)
1✔
1929
                # delete the zipfile after success
1930
                os.remove(zfile)
1✔
1931
        if len(glob.glob(test_file)) == 0:
1✔
1932
            raise RuntimeError('Could not find a manifest file in the RGI '
×
1933
                               'directory: ' + rgi_dir)
1934
    return rgi_dir
1✔
1935

1936

1937
def get_rgi_region_file(region, version=None, reset=False):
9✔
1938
    """Path to the RGI region file.
1939

1940
    If the RGI files are not present, download them.
1941

1942
    Parameters
1943
    ----------
1944
    region : str
1945
        from '01' to '19'
1946
    version : str
1947
        '5', '6', defaults to None (linking to the one specified in cfg.PARAMS)
1948
    reset : bool
1949
        If True, deletes the RGI directory first and downloads the data
1950

1951
    Returns
1952
    -------
1953
    str
1954
        path to the RGI shapefile
1955
    """
1956

1957
    rgi_dir = get_rgi_dir(version=version, reset=reset)
1✔
1958
    f = list(glob.glob(rgi_dir + "/*/*{}_*.shp".format(region)))
1✔
1959
    assert len(f) == 1
1✔
1960
    return f[0]
1✔
1961

1962

1963
def get_rgi_glacier_entities(rgi_ids, version=None):
9✔
1964
    """Get a list of glacier outlines selected from their RGI IDs.
1965

1966
    Will download RGI data if not present.
1967

1968
    Parameters
1969
    ----------
1970
    rgi_ids : list of str
1971
        the glaciers you want the outlines for
1972
    version : str
1973
        the rgi version
1974

1975
    Returns
1976
    -------
1977
    geopandas.GeoDataFrame
1978
        containing the desired RGI glacier outlines
1979
    """
1980

1981
    regions = [s.split('-')[1].split('.')[0] for s in rgi_ids]
×
1982
    if version is None:
×
1983
        version = rgi_ids[0].split('-')[0][-2:]
×
1984
    selection = []
×
1985
    for reg in sorted(np.unique(regions)):
×
1986
        sh = gpd.read_file(get_rgi_region_file(reg, version=version))
×
1987
        selection.append(sh.loc[sh.RGIId.isin(rgi_ids)])
×
1988

1989
    # Make a new dataframe of those
1990
    selection = pd.concat(selection)
×
1991
    selection.crs = sh.crs  # for geolocalisation
×
1992
    if len(selection) != len(rgi_ids):
×
1993
        raise RuntimeError('Could not find all RGI ids')
×
1994

1995
    return selection
×
1996

1997

1998
def get_rgi_intersects_dir(version=None, reset=False):
9✔
1999
    """Path to the RGI directory containing the intersect files.
2000

2001
    If the files are not present, download them.
2002

2003
    Parameters
2004
    ----------
2005
    version : str
2006
        '5', '6', defaults to None (linking to the one specified in cfg.PARAMS)
2007
    reset : bool
2008
        If True, deletes the intersects before redownloading them
2009

2010
    Returns
2011
    -------
2012
    str
2013
        path to the directory
2014
    """
2015

2016
    with get_lock():
1✔
2017
        return _get_rgi_intersects_dir_unlocked(version=version, reset=reset)
1✔
2018

2019

2020
def _get_rgi_intersects_dir_unlocked(version=None, reset=False):
9✔
2021

2022
    rgi_dir = cfg.PATHS['rgi_dir']
1✔
2023
    if version is None:
1✔
2024
        version = cfg.PARAMS['rgi_version']
×
2025

2026
    if len(version) == 1:
1✔
2027
        version += '0'
1✔
2028

2029
    # Be sure the user gave a sensible path to the RGI dir
2030
    if not rgi_dir:
1✔
2031
        raise InvalidParamsError('The RGI data directory has to be'
×
2032
                                 'specified explicitly.')
2033

2034
    rgi_dir = os.path.abspath(os.path.expanduser(rgi_dir))
1✔
2035
    mkdir(rgi_dir)
1✔
2036

2037
    dfile = 'https://cluster.klima.uni-bremen.de/data/rgi/'
1✔
2038
    dfile += 'RGI_V{}_Intersects.zip'.format(version)
1✔
2039
    if version == '62':
1✔
2040
        dfile = ('https://cluster.klima.uni-bremen.de/~oggm/rgi/'
×
2041
                 'rgi62_Intersects.zip')
2042

2043
    odir = os.path.join(rgi_dir, 'RGI_V' + version + '_Intersects')
1✔
2044
    if reset and os.path.exists(odir):
1✔
2045
        shutil.rmtree(odir)
×
2046

2047
    # A lot of code for backwards compat (sigh...)
2048
    if version in ['50', '60']:
1✔
2049
        test_file = os.path.join(odir, 'Intersects_OGGM_Manifest.txt')
1✔
2050
        if not os.path.exists(test_file):
1✔
2051
            # if not there download it
2052
            ofile = file_downloader(dfile, reset=reset)
1✔
2053
            # Extract root
2054
            with zipfile.ZipFile(ofile) as zf:
1✔
2055
                zf.extractall(odir)
1✔
2056
            if not os.path.exists(test_file):
1✔
2057
                raise RuntimeError('Could not find a manifest file in the RGI '
×
2058
                                   'directory: ' + odir)
2059
    else:
2060
        test_file = os.path.join(odir,
×
2061
                                 '*ntersect*anifest.txt'.format(version))
2062
        if len(glob.glob(test_file)) == 0:
×
2063
            # if not there download it
2064
            ofile = file_downloader(dfile, reset=reset)
×
2065
            # Extract root
2066
            with zipfile.ZipFile(ofile) as zf:
×
2067
                zf.extractall(odir)
×
2068
            # Extract subdirs
2069
            pattern = '*_rgi{}_*.zip'.format(version)
×
2070
            for root, dirs, files in os.walk(cfg.PATHS['rgi_dir']):
×
2071
                for filename in fnmatch.filter(files, pattern):
×
2072
                    zfile = os.path.join(root, filename)
×
2073
                    with zipfile.ZipFile(zfile) as zf:
×
2074
                        ex_root = zfile.replace('.zip', '')
×
2075
                        mkdir(ex_root)
×
2076
                        zf.extractall(ex_root)
×
2077
                    # delete the zipfile after success
2078
                    os.remove(zfile)
×
2079
            if len(glob.glob(test_file)) == 0:
×
2080
                raise RuntimeError('Could not find a manifest file in the RGI '
×
2081
                                   'directory: ' + odir)
2082

2083
    return odir
1✔
2084

2085

2086
def get_rgi_intersects_region_file(region=None, version=None, reset=False):
9✔
2087
    """Path to the RGI regional intersect file.
2088

2089
    If the RGI files are not present, download them.
2090

2091
    Parameters
2092
    ----------
2093
    region : str
2094
        from '00' to '19', with '00' being the global file (deprecated).
2095
        From RGI version '61' onwards, please use `get_rgi_intersects_entities`
2096
        with a list of glaciers instead of relying to the global file.
2097
    version : str
2098
        '5', '6', '61'... defaults the one specified in cfg.PARAMS
2099
    reset : bool
2100
        If True, deletes the intersect file before redownloading it
2101

2102
    Returns
2103
    -------
2104
    str
2105
        path to the RGI intersects shapefile
2106
    """
2107

2108
    if version is None:
1✔
2109
        version = cfg.PARAMS['rgi_version']
×
2110
    if len(version) == 1:
1✔
2111
        version += '0'
1✔
2112

2113
    rgi_dir = get_rgi_intersects_dir(version=version, reset=reset)
1✔
2114

2115
    if region == '00':
1✔
2116
        if version in ['50', '60']:
1✔
2117
            version = 'AllRegs'
1✔
2118
            region = '*'
1✔
2119
        else:
2120
            raise InvalidParamsError("From RGI version 61 onwards, please use "
×
2121
                                     "get_rgi_intersects_entities() instead.")
2122
    f = list(glob.glob(os.path.join(rgi_dir, "*", '*intersects*' + region +
1✔
2123
                                    '_rgi*' + version + '*.shp')))
2124
    assert len(f) == 1
1✔
2125
    return f[0]
1✔
2126

2127

2128
def get_rgi_intersects_entities(rgi_ids, version=None):
9✔
2129
    """Get a list of glacier intersects selected from their RGI IDs.
2130

2131
    Parameters
2132
    ----------
2133
    rgi_ids: list of str
2134
        list of rgi_ids you want to look for intersections for
2135
    version: str
2136
        '5', '6', '61'... defaults the one specified in cfg.PARAMS
2137

2138
    Returns
2139
    -------
2140
    geopandas.GeoDataFrame
2141
        with the selected intersects
2142
    """
2143

2144
    if version is None:
×
2145
        version = cfg.PARAMS['rgi_version']
×
2146
    if len(version) == 1:
×
2147
        version += '0'
×
2148
    try:
×
2149
        regions = [s.split('-')[3] for s in rgi_ids]
×
2150

2151
    except IndexError:
×
2152
        # RGI V6
2153
        regions = [s.split('-')[1].split('.')[0] for s in rgi_ids]
×
2154
    selection = []
×
2155
    for reg in sorted(np.unique(regions)):
×
2156
        sh = gpd.read_file(get_rgi_intersects_region_file(reg,
×
2157
                                                          version=version))
2158
        selection.append(sh.loc[sh.RGIId_1.isin(rgi_ids) |
×
2159
                                sh.RGIId_2.isin(rgi_ids)])
2160

2161
    # Make a new dataframe of those
2162
    selection = pd.concat(selection)
×
2163
    selection.crs = sh.crs  # for geolocalisation
×
2164

2165
    return selection
×
2166

2167

2168
def is_dem_source_available(source, lon_ex, lat_ex):
9✔
2169
    """Checks if a DEM source is available for your purpose.
2170

2171
    This is only a very rough check! It doesn't mean that the data really is
2172
    available, but at least it's worth a try.
2173

2174
    Parameters
2175
    ----------
2176
    source : str, required
2177
        the source you want to check for
2178
    lon_ex : tuple or int, required
2179
        a (min_lon, max_lon) tuple delimiting the requested area longitudes
2180
    lat_ex : tuple or int, required
2181
        a (min_lat, max_lat) tuple delimiting the requested area latitudes
2182

2183
    Returns
2184
    -------
2185
    True or False
2186
    """
2187
    from oggm.utils import tolist
5✔
2188
    lon_ex = tolist(lon_ex, length=2)
5✔
2189
    lat_ex = tolist(lat_ex, length=2)
5✔
2190

2191
    def _in_grid(grid_json, lon, lat):
5✔
2192
        i, j = cfg.DATA['dem_grids'][grid_json].transform(lon, lat,
1✔
2193
                                                          maskout=True)
2194
        return np.all(~ (i.mask | j.mask))
1✔
2195

2196
    if source == 'GIMP':
5✔
2197
        return _in_grid('gimpdem_90m_v01.1.json', lon_ex, lat_ex)
1✔
2198
    elif source == 'ARCTICDEM':
5✔
2199
        return _in_grid('arcticdem_mosaic_100m_v3.0.json', lon_ex, lat_ex)
1✔
2200
    elif source == 'RAMP':
5✔
2201
        return _in_grid('AntarcticDEM_wgs84.json', lon_ex, lat_ex)
1✔
2202
    elif source == 'REMA':
5✔
2203
        return _in_grid('REMA_100m_dem.json', lon_ex, lat_ex)
1✔
2204
    elif source == 'ALASKA':
5✔
2205
        return _in_grid('Alaska_albers_V3.json', lon_ex, lat_ex)
×
2206
    elif source == 'TANDEM':
5✔
2207
        return True
1✔
2208
    elif source == 'AW3D30':
5✔
2209
        return np.min(lat_ex) > -60
1✔
2210
    elif source == 'MAPZEN':
5✔
2211
        return True
1✔
2212
    elif source == 'DEM3':
5✔
2213
        return True
1✔
2214
    elif source == 'ASTER':
5✔
2215
        return True
1✔
2216
    elif source == 'SRTM':
5✔
2217
        return np.max(np.abs(lat_ex)) < 60
1✔
2218
    elif source in ['COPDEM30', 'COPDEM90']:
5✔
2219
        return True
×
2220
    elif source == 'NASADEM':
5✔
2221
        return (np.min(lat_ex) > -56) and (np.max(lat_ex) < 60)
×
2222
    elif source == 'USER':
5✔
2223
        return True
×
2224
    elif source is None:
5✔
2225
        return True
5✔
2226

2227

2228
def default_dem_source(rgi_id):
9✔
2229
    """Current default DEM source at a given location.
2230

2231
    Parameters
2232
    ----------
2233
    rgi_id : str
2234
        the RGI id
2235

2236
    Returns
2237
    -------
2238
    the chosen DEM source
2239
    """
2240
    rgi_reg = 'RGI{}'.format(rgi_id[6:8])
1✔
2241
    rgi_id = rgi_id[:14]
1✔
2242
    if cfg.DEM_SOURCE_TABLE.get(rgi_reg) is None:
1✔
2243
        fp = get_demo_file('rgi62_dem_frac.h5')
1✔
2244
        cfg.DEM_SOURCE_TABLE[rgi_reg] = pd.read_hdf(fp)
1✔
2245

2246
    sel = cfg.DEM_SOURCE_TABLE[rgi_reg].loc[rgi_id]
1✔
2247
    for s in ['NASADEM', 'COPDEM90', 'COPDEM30', 'GIMP', 'REMA',
1✔
2248
              'RAMP', 'TANDEM', 'MAPZEN']:
2249
        if sel.loc[s] > 0.75:
1✔
2250
            return s
1✔
2251
    # If nothing works, try COPDEM again
2252
    return 'COPDEM90'
×
2253

2254

2255
def get_topo_file(lon_ex=None, lat_ex=None, rgi_id=None, *,
9✔
2256
                  dx_meter=None, zoom=None, source=None):
2257
    """Path(s) to the DEM file(s) covering the desired extent.
2258

2259
    If the needed files for covering the extent are not present, download them.
2260

2261
    The default behavior is to try a list of DEM sources in order, and
2262
    stop once the downloaded data is covering a large enough part of the
2263
    glacier. The DEM sources are tested in the following order:
2264

2265
    'NASADEM' -> 'COPDEM' -> 'GIMP' -> 'REMA' -> 'TANDEM' -> 'MAPZEN'
2266

2267
    To force usage of a certain data source, use the ``source`` kwarg argument.
2268

2269
    Parameters
2270
    ----------
2271
    lon_ex : tuple or int, required
2272
        a (min_lon, max_lon) tuple delimiting the requested area longitudes
2273
    lat_ex : tuple or int, required
2274
        a (min_lat, max_lat) tuple delimiting the requested area latitudes
2275
    rgi_id : str, required if source=None
2276
        the glacier id, used to decide on the DEM source
2277
    rgi_region : str, optional
2278
        the RGI region number (required for the GIMP DEM)
2279
    rgi_subregion : str, optional
2280
        the RGI subregion str (useful for RGI Reg 19)
2281
    dx_meter : float, required for source='MAPZEN'
2282
        the resolution of the glacier map (to decide the zoom level of mapzen)
2283
    zoom : int, optional
2284
        if you know the zoom already (for MAPZEN only)
2285
    source : str or list of str, optional
2286
        Name of specific DEM source. see utils.DEM_SOURCES for a list
2287

2288
    Returns
2289
    -------
2290
    tuple: (list with path(s) to the DEM file(s), data source str)
2291
    """
2292
    from oggm.utils import tolist
5✔
2293
    lon_ex = tolist(lon_ex, length=2)
5✔
2294
    lat_ex = tolist(lat_ex, length=2)
5✔
2295

2296
    if source is not None and not isinstance(source, str):
5✔
2297
        # check all user options
2298
        for s in source:
×
2299
            demf, source_str = get_topo_file(lon_ex=lon_ex, lat_ex=lat_ex,
×
2300
                                             rgi_id=rgi_id, source=s)
2301
            if demf[0]:
×
2302
                return demf, source_str
×
2303

2304
    # Did the user specify a specific DEM file?
2305
    if 'dem_file' in cfg.PATHS and os.path.isfile(cfg.PATHS['dem_file']):
5✔
2306
        source = 'USER' if source is None else source
5✔
2307
        if source == 'USER':
5✔
2308
            return [cfg.PATHS['dem_file']], source
5✔
2309

2310
    # Some logic to decide which source to take if unspecified
2311
    if source is None:
1✔
2312
        if rgi_id is None:
×
2313
            raise InvalidParamsError('rgi_id is needed if source=None')
×
2314
        source = default_dem_source(rgi_id)
×
2315

2316
    if source not in DEM_SOURCES:
1✔
2317
        raise InvalidParamsError('`source` must be one of '
×
2318
                                 '{}'.format(DEM_SOURCES))
2319

2320
    # OK go
2321
    files = []
1✔
2322
    if source == 'GIMP':
1✔
2323
        _file = _download_topo_file_from_cluster('gimpdem_90m_v01.1.tif')
1✔
2324
        files.append(_file)
1✔
2325

2326
    if source == 'ARCTICDEM':
1✔
2327
        zones = arcticdem_zone(lon_ex, lat_ex)
1✔
2328
        for z in zones:
1✔
2329
            with get_lock():
1✔
2330
                url = 'https://cluster.klima.uni-bremen.de/~oggm/'
1✔
2331
                url += 'dem/ArcticDEM_100m_v3.0/'
1✔
2332
                url += '{}_100m_v3.0/{}_100m_v3.0_reg_dem.tif'.format(z, z)
1✔
2333
                files.append(file_downloader(url))
1✔
2334

2335
    if source == 'RAMP':
1✔
2336
        _file = _download_topo_file_from_cluster('AntarcticDEM_wgs84.tif')
1✔
2337
        files.append(_file)
1✔
2338

2339
    if source == 'ALASKA':
1✔
2340
        zones = alaska_dem_zone(lon_ex, lat_ex)
1✔
2341
        for z in zones:
1✔
2342
            with get_lock():
1✔
2343
                url = 'https://cluster.klima.uni-bremen.de/~oggm/'
1✔
2344
                url += 'dem/Alaska_albers_V3/'
1✔
2345
                url += '{}_Alaska_albers_V3/'.format(z)
1✔
2346
                url += '{}_Alaska_albers_V3.tif'.format(z)
1✔
2347
                files.append(file_downloader(url))
1✔
2348

2349
    if source == 'REMA':
1✔
2350
        zones = rema_zone(lon_ex, lat_ex)
1✔
2351
        for z in zones:
1✔
2352
            with get_lock():
1✔
2353
                url = 'https://cluster.klima.uni-bremen.de/~oggm/'
1✔
2354
                url += 'dem/REMA_100m_v1.1/'
1✔
2355
                url += '{}_100m_v1.1/{}_100m_v1.1_reg_dem.tif'.format(z, z)
1✔
2356
                files.append(file_downloader(url))
1✔
2357

2358
    if source == 'TANDEM':
1✔
2359
        zones = tandem_zone(lon_ex, lat_ex)
1✔
2360
        for z in zones:
1✔
2361
            files.append(_download_tandem_file(z))
1✔
2362

2363
    if source == 'AW3D30':
1✔
2364
        zones = aw3d30_zone(lon_ex, lat_ex)
1✔
2365
        for z in zones:
1✔
2366
            files.append(_download_aw3d30_file(z))
1✔
2367

2368
    if source == 'MAPZEN':
1✔
2369
        zones = mapzen_zone(lon_ex, lat_ex, dx_meter=dx_meter, zoom=zoom)
1✔
2370
        for z in zones:
1✔
2371
            files.append(_download_mapzen_file(z))
1✔
2372

2373
    if source == 'ASTER':
1✔
2374
        zones = aster_zone(lon_ex, lat_ex)
1✔
2375
        for z in zones:
1✔
2376
            files.append(_download_aster_file(z))
1✔
2377

2378
    if source == 'DEM3':
1✔
2379
        zones = dem3_viewpano_zone(lon_ex, lat_ex)
1✔
2380
        for z in zones:
1✔
2381
            files.append(_download_dem3_viewpano(z))
1✔
2382

2383
    if source == 'SRTM':
1✔
2384
        zones = srtm_zone(lon_ex, lat_ex)
1✔
2385
        for z in zones:
1✔
2386
            files.append(_download_srtm_file(z))
1✔
2387

2388
    if source in ['COPDEM30', 'COPDEM90']:
1✔
2389
        filetuple = copdem_zone(lon_ex, lat_ex, source)
1✔
2390
        for cpp, eop in filetuple:
1✔
2391
            files.append(_download_copdem_file(cpp, eop, source))
1✔
2392

2393
    if source == 'NASADEM':
1✔
2394
        zones = nasadem_zone(lon_ex, lat_ex)
1✔
2395
        for z in zones:
1✔
2396
            files.append(_download_nasadem_file(z))
1✔
2397

2398
    # filter for None (e.g. oceans)
2399
    files = [s for s in files if s]
1✔
2400
    if files:
1✔
2401
        return files, source
1✔
2402
    else:
2403
        raise InvalidDEMError('Source: {2} no topography file available for '
×
2404
                              'extent lat:{0}, lon:{1}!'.
2405
                              format(lat_ex, lon_ex, source))
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc