• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OGGM / oggm / 5975837719

25 Aug 2023 12:24PM UTC coverage: 85.547% (+0.05%) from 85.502%
5975837719

push

github

web-flow
Tiny edit on index page regarding youtube (#1628)

* Update index.rst

* Update index.rst

11441 of 13374 relevant lines covered (85.55%)

3.76 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

77.91
/oggm/utils/_downloads.py
1
"""Automated data download and IO."""
2

3
# Builtins
4
import glob
9✔
5
import os
9✔
6
import gzip
9✔
7
import bz2
9✔
8
import hashlib
9✔
9
import shutil
9✔
10
import zipfile
9✔
11
import sys
9✔
12
import math
9✔
13
import logging
9✔
14
from functools import partial, wraps
9✔
15
import time
9✔
16
import fnmatch
9✔
17
import urllib.request
9✔
18
import urllib.error
9✔
19
from urllib.parse import urlparse
9✔
20
import socket
9✔
21
import multiprocessing
9✔
22
from netrc import netrc
9✔
23
import ftplib
9✔
24
import ssl
9✔
25
import tarfile
9✔
26

27
# External libs
28
import pandas as pd
9✔
29
import numpy as np
9✔
30
import shapely.geometry as shpg
9✔
31
import requests
9✔
32

33
# Optional libs
34
try:
9✔
35
    import geopandas as gpd
9✔
36
except ImportError:
37
    pass
38
try:
9✔
39
    import salem
9✔
40
    from salem import wgs84
9✔
41
except ImportError:
42
    pass
43
try:
9✔
44
    import rasterio
9✔
45
    try:
8✔
46
        # rasterio V > 1.0
47
        from rasterio.merge import merge as merge_tool
8✔
48
    except ImportError:
49
        from rasterio.tools.merge import merge as merge_tool
50
except ImportError:
51
    pass
52
try:
9✔
53
    ModuleNotFoundError
9✔
54
except NameError:
×
55
    ModuleNotFoundError = ImportError
×
56

57
# Locals
58
import oggm.cfg as cfg
9✔
59
from oggm.exceptions import (InvalidParamsError, NoInternetException,
9✔
60
                             DownloadVerificationFailedException,
61
                             DownloadCredentialsMissingException,
62
                             HttpDownloadError, HttpContentTooShortError,
63
                             InvalidDEMError, FTPSDownloadError)
64

65
# Module logger
66
logger = logging.getLogger('.'.join(__name__.split('.')[:-1]))
9✔
67

68
# Github repository and commit hash/branch name/tag name on that repository
69
# The given commit will be downloaded from github and used as source for
70
# all sample data
71
SAMPLE_DATA_GH_REPO = 'OGGM/oggm-sample-data'
9✔
72
SAMPLE_DATA_COMMIT = '2ad86c93f9235e40edea506f13f6f489adeee805'
9✔
73

74
CHECKSUM_URL = 'https://cluster.klima.uni-bremen.de/data/downloads.sha256.hdf'
9✔
75
CHECKSUM_VALIDATION_URL = CHECKSUM_URL + '.sha256'
9✔
76
CHECKSUM_LIFETIME = 24 * 60 * 60
9✔
77

78
# Recommended url for runs
79
DEFAULT_BASE_URL = ('https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.6/'
9✔
80
                    'L3-L5_files/2023.3/elev_bands/W5E5_spinup')
81

82
# Web mercator proj constants
83
WEB_N_PIX = 256
9✔
84
WEB_EARTH_RADUIS = 6378137.
9✔
85

86
DEM_SOURCES = ['GIMP', 'ARCTICDEM', 'RAMP', 'TANDEM', 'AW3D30', 'MAPZEN', 'DEM3',
9✔
87
               'ASTER', 'SRTM', 'REMA', 'ALASKA', 'COPDEM30', 'COPDEM90', 'NASADEM']
88
DEM_SOURCES_PER_GLACIER = None
9✔
89

90
_RGI_METADATA = dict()
9✔
91

92
DEM3REG = {
9✔
93
    'ISL': [-25., -13., 63., 67.],  # Iceland
94
    'SVALBARD': [9., 35.99, 75., 84.],
95
    'JANMAYEN': [-10., -7., 70., 72.],
96
    'FJ': [36., 68., 79., 90.],  # Franz Josef Land
97
    'FAR': [-8., -6., 61., 63.],  # Faroer
98
    'BEAR': [18., 20., 74., 75.],  # Bear Island
99
    'SHL': [-3., 0., 60., 61.],  # Shetland
100
    # Antarctica tiles as UTM zones, large files
101
    '01-15': [-180., -91., -90, -60.],
102
    '16-30': [-91., -1., -90., -60.],
103
    '31-45': [-1., 89., -90., -60.],
104
    '46-60': [89., 189., -90., -60.],
105
    # Greenland tiles
106
    'GL-North': [-72., -11., 76., 84.],
107
    'GL-West': [-62., -42., 64., 76.],
108
    'GL-South': [-52., -40., 59., 64.],
109
    'GL-East': [-42., -17., 64., 76.]
110
}
111

112
# Function
113
tuple2int = partial(np.array, dtype=np.int64)
9✔
114

115
lock = None
9✔
116

117

118
def mkdir(path, reset=False):
9✔
119
    """Checks if directory exists and if not, create one.
120

121
    Parameters
122
    ----------
123
    reset: erase the content of the directory if exists
124

125
    Returns
126
    -------
127
    the path
128
    """
129

130
    if reset and os.path.exists(path):
8✔
131
        shutil.rmtree(path)
×
132
    try:
8✔
133
        os.makedirs(path)
8✔
134
    except FileExistsError:
8✔
135
        pass
8✔
136
    return path
8✔
137

138

139
def del_empty_dirs(s_dir):
9✔
140
    """Delete empty directories."""
141
    b_empty = True
1✔
142
    for s_target in os.listdir(s_dir):
1✔
143
        s_path = os.path.join(s_dir, s_target)
1✔
144
        if os.path.isdir(s_path):
1✔
145
            if not del_empty_dirs(s_path):
1✔
146
                b_empty = False
×
147
        else:
148
            b_empty = False
1✔
149
    if b_empty:
1✔
150
        os.rmdir(s_dir)
1✔
151
    return b_empty
1✔
152

153

154
def findfiles(root_dir, endswith):
9✔
155
    """Finds all files with a specific ending in a directory
156

157
    Parameters
158
    ----------
159
    root_dir : str
160
       The directory to search fo
161
    endswith : str
162
       The file ending (e.g. '.hgt'
163

164
    Returns
165
    -------
166
    the list of files
167
    """
168
    out = []
×
169
    for dirpath, dirnames, filenames in os.walk(root_dir):
×
170
        for filename in [f for f in filenames if f.endswith(endswith)]:
×
171
            out.append(os.path.join(dirpath, filename))
×
172
    return out
×
173

174

175
def get_lock():
9✔
176
    """Get multiprocessing lock."""
177
    global lock
178
    if lock is None:
8✔
179
        # Global Lock
180
        if cfg.PARAMS.get('use_mp_spawn', False):
8✔
181
            lock = multiprocessing.get_context('spawn').Lock()
×
182
        else:
183
            lock = multiprocessing.Lock()
8✔
184
    return lock
8✔
185

186

187
def get_dl_verify_data(section):
9✔
188
    """Returns a pandas DataFrame with all known download object hashes.
189

190
    The returned dictionary resolves str: cache_obj_name (without section)
191
    to a tuple int(size) and bytes(sha256)
192
    """
193

194
    verify_key = 'dl_verify_data_' + section
8✔
195
    if verify_key in cfg.DATA:
8✔
196
        return cfg.DATA[verify_key]
3✔
197

198
    verify_file_path = os.path.join(cfg.CACHE_DIR, 'downloads.sha256.hdf')
8✔
199

200
    def verify_file(force=False):
8✔
201
        """Check the hash file's own hash"""
202
        if not cfg.PARAMS['has_internet']:
8✔
203
            return
×
204

205
        if not force and os.path.isfile(verify_file_path) and \
8✔
206
           os.path.getmtime(verify_file_path) + CHECKSUM_LIFETIME > time.time():
207
            return
7✔
208

209
        logger.info('Checking the download verification file checksum...')
8✔
210
        try:
8✔
211
            with requests.get(CHECKSUM_VALIDATION_URL) as req:
8✔
212
                req.raise_for_status()
8✔
213
                verify_file_sha256 = req.text.split(maxsplit=1)[0]
8✔
214
                verify_file_sha256 = bytearray.fromhex(verify_file_sha256)
8✔
215
        except Exception as e:
×
216
            verify_file_sha256 = None
×
217
            logger.warning('Failed getting verification checksum: ' + repr(e))
×
218

219
        if os.path.isfile(verify_file_path) and verify_file_sha256:
8✔
220
            sha256 = hashlib.sha256()
8✔
221
            with open(verify_file_path, 'rb') as f:
8✔
222
                for b in iter(lambda: f.read(0xFFFF), b''):
8✔
223
                    sha256.update(b)
8✔
224
            if sha256.digest() != verify_file_sha256:
8✔
225
                logger.warning('%s changed or invalid, deleting.'
×
226
                               % (verify_file_path))
227
                os.remove(verify_file_path)
×
228
            else:
229
                os.utime(verify_file_path)
8✔
230

231
    if not np.any(['dl_verify_data_' in k for k in cfg.DATA.keys()]):
8✔
232
        # We check the hash file only once per session
233
        # no need to do it at each call
234
        verify_file()
8✔
235

236
    if not os.path.isfile(verify_file_path):
8✔
237
        if not cfg.PARAMS['has_internet']:
8✔
238
            return pd.DataFrame()
×
239

240
        logger.info('Downloading %s to %s...'
8✔
241
                    % (CHECKSUM_URL, verify_file_path))
242

243
        with requests.get(CHECKSUM_URL, stream=True) as req:
8✔
244
            if req.status_code == 200:
8✔
245
                mkdir(os.path.dirname(verify_file_path))
8✔
246
                with open(verify_file_path, 'wb') as f:
8✔
247
                    for b in req.iter_content(chunk_size=0xFFFF):
8✔
248
                        if b:
8✔
249
                            f.write(b)
8✔
250

251
        logger.info('Done downloading.')
8✔
252

253
        verify_file(force=True)
8✔
254

255
    if not os.path.isfile(verify_file_path):
8✔
256
        logger.warning('Downloading and verifying checksums failed.')
×
257
        return pd.DataFrame()
×
258

259
    try:
8✔
260
        data = pd.read_hdf(verify_file_path, key=section)
8✔
261
    except KeyError:
8✔
262
        data = pd.DataFrame()
8✔
263

264
    cfg.DATA[verify_key] = data
8✔
265

266
    return data
8✔
267

268

269
def _call_dl_func(dl_func, cache_path):
9✔
270
    """Helper so the actual call to downloads can be overridden
271
    """
272
    return dl_func(cache_path)
8✔
273

274

275
def _cached_download_helper(cache_obj_name, dl_func, reset=False):
9✔
276
    """Helper function for downloads.
277

278
    Takes care of checking if the file is already cached.
279
    Only calls the actual download function when no cached version exists.
280
    """
281
    cache_dir = cfg.PATHS['dl_cache_dir']
8✔
282
    cache_ro = cfg.PARAMS['dl_cache_readonly']
8✔
283

284
    # A lot of logic below could be simplified but it's also not too important
285
    wd = cfg.PATHS.get('working_dir')
8✔
286
    if wd:
8✔
287
        # this is for real runs
288
        fb_cache_dir = os.path.join(wd, 'cache')
7✔
289
        check_fb_dir = False
7✔
290
    else:
291
        # Nothing have been set up yet, this is bad - find a place to write
292
        # This should happen on read-only cluster only but still
293
        wd = os.environ.get('OGGM_WORKDIR')
8✔
294
        if wd is not None and os.path.isdir(wd):
8✔
295
            fb_cache_dir = os.path.join(wd, 'cache')
×
296
        else:
297
            fb_cache_dir = os.path.join(cfg.CACHE_DIR, 'cache')
8✔
298
        check_fb_dir = True
8✔
299

300
    if not cache_dir:
8✔
301
        # Defaults to working directory: it must be set!
302
        if not cfg.PATHS['working_dir']:
×
303
            raise InvalidParamsError("Need a valid PATHS['working_dir']!")
×
304
        cache_dir = fb_cache_dir
×
305
        cache_ro = False
×
306

307
    fb_path = os.path.join(fb_cache_dir, cache_obj_name)
8✔
308
    if not reset and os.path.isfile(fb_path):
8✔
309
        return fb_path
×
310

311
    cache_path = os.path.join(cache_dir, cache_obj_name)
8✔
312
    if not reset and os.path.isfile(cache_path):
8✔
313
        return cache_path
6✔
314

315
    if cache_ro:
8✔
316
        if check_fb_dir:
×
317
            # Add a manual check that we are caching sample data download
318
            if 'oggm-sample-data' not in fb_path:
×
319
                raise InvalidParamsError('Attempting to download something '
×
320
                                         'with invalid global settings.')
321
        cache_path = fb_path
×
322

323
    if not cfg.PARAMS['has_internet']:
8✔
324
        raise NoInternetException("Download required, but "
1✔
325
                                  "`has_internet` is False.")
326

327
    mkdir(os.path.dirname(cache_path))
8✔
328

329
    try:
8✔
330
        cache_path = _call_dl_func(dl_func, cache_path)
8✔
331
    except BaseException:
×
332
        if os.path.exists(cache_path):
×
333
            os.remove(cache_path)
×
334
        raise
×
335

336
    return cache_path
8✔
337

338

339
def _verified_download_helper(cache_obj_name, dl_func, reset=False):
9✔
340
    """Helper function for downloads.
341

342
    Verifies the size and hash of the downloaded file against the included
343
    list of known static files.
344
    Uses _cached_download_helper to perform the actual download.
345
    """
346
    path = _cached_download_helper(cache_obj_name, dl_func, reset)
8✔
347

348
    dl_verify = cfg.PARAMS.get('dl_verify', False)
8✔
349

350
    if dl_verify and path and cache_obj_name not in cfg.DL_VERIFIED:
8✔
351
        cache_section, cache_path = cache_obj_name.split('/', 1)
1✔
352
        data = get_dl_verify_data(cache_section)
1✔
353
        if cache_path not in data.index:
1✔
354
            logger.info('No known hash for %s' % cache_obj_name)
×
355
            cfg.DL_VERIFIED[cache_obj_name] = True
×
356
        else:
357
            # compute the hash
358
            sha256 = hashlib.sha256()
1✔
359
            with open(path, 'rb') as f:
1✔
360
                for b in iter(lambda: f.read(0xFFFF), b''):
1✔
361
                    sha256.update(b)
1✔
362
            sha256 = sha256.digest()
1✔
363
            size = os.path.getsize(path)
1✔
364

365
            # check
366
            data = data.loc[cache_path]
1✔
367
            if data['size'] != size or bytes(data['sha256']) != sha256:
1✔
368
                err = '%s failed to verify!\nis: %s %s\nexpected: %s %s' % (
1✔
369
                    path, size, sha256.hex(), data[0], data[1].hex())
370
                raise DownloadVerificationFailedException(msg=err, path=path)
1✔
371
            logger.info('%s verified successfully.' % path)
1✔
372
            cfg.DL_VERIFIED[cache_obj_name] = True
1✔
373

374
    return path
8✔
375

376

377
def _requests_urlretrieve(url, path, reporthook, auth=None, timeout=None):
9✔
378
    """Implements the required features of urlretrieve on top of requests
379
    """
380

381
    chunk_size = 128 * 1024
8✔
382
    chunk_count = 0
8✔
383

384
    with requests.get(url, stream=True, auth=auth, timeout=timeout) as r:
8✔
385
        if r.status_code != 200:
8✔
386
            raise HttpDownloadError(r.status_code, url)
×
387
        r.raise_for_status()
8✔
388

389
        size = r.headers.get('content-length') or -1
8✔
390
        size = int(size)
8✔
391

392
        if reporthook:
8✔
393
            reporthook(chunk_count, chunk_size, size)
8✔
394

395
        with open(path, 'wb') as f:
8✔
396
            for chunk in r.iter_content(chunk_size=chunk_size):
8✔
397
                if not chunk:
8✔
398
                    continue
×
399
                f.write(chunk)
8✔
400
                chunk_count += 1
8✔
401
                if reporthook:
8✔
402
                    reporthook(chunk_count, chunk_size, size)
8✔
403

404
        if chunk_count * chunk_size < size:
8✔
405
            raise HttpContentTooShortError()
×
406

407

408
def _classic_urlretrieve(url, path, reporthook, auth=None, timeout=None):
9✔
409
    """Thin wrapper around pythons urllib urlretrieve
410
    """
411

412
    ourl = url
×
413
    if auth:
×
414
        u = urlparse(url)
×
415
        if '@' not in u.netloc:
×
416
            netloc = auth[0] + ':' + auth[1] + '@' + u.netloc
×
417
            url = u._replace(netloc=netloc).geturl()
×
418

419
    old_def_timeout = socket.getdefaulttimeout()
×
420
    if timeout is not None:
×
421
        socket.setdefaulttimeout(timeout)
×
422

423
    try:
×
424
        urllib.request.urlretrieve(url, path, reporthook)
×
425
    except urllib.error.HTTPError as e:
×
426
        raise HttpDownloadError(e.code, ourl)
×
427
    except urllib.error.ContentTooShortError as e:
×
428
        raise HttpContentTooShortError()
×
429
    finally:
430
        socket.setdefaulttimeout(old_def_timeout)
×
431

432

433
class ImplicitFTPTLS(ftplib.FTP_TLS):
9✔
434
    """ FTP_TLS subclass that automatically wraps sockets in SSL to support
435
        implicit FTPS.
436

437
        Taken from https://stackoverflow.com/a/36049814
438
    """
439

440
    def __init__(self, *args, **kwargs):
9✔
441
        super().__init__(*args, **kwargs)
×
442
        self._sock = None
×
443

444
    @property
9✔
445
    def sock(self):
9✔
446
        """Return the socket."""
447
        return self._sock
×
448

449
    @sock.setter
9✔
450
    def sock(self, value):
9✔
451
        """When modifying the socket, ensure that it is ssl wrapped."""
452
        if value is not None and not isinstance(value, ssl.SSLSocket):
×
453
            value = self.context.wrap_socket(value)
×
454
        self._sock = value
×
455

456

457
def url_exists(url):
9✔
458
    """Checks if a given a URL exists or not."""
459
    request = requests.get(url)
2✔
460
    return request.status_code < 400
2✔
461

462

463
def _ftps_retrieve(url, path, reporthook, auth=None, timeout=None):
9✔
464
    """ Wrapper around ftplib to download from FTPS server
465
    """
466

467
    if not auth:
×
468
        raise DownloadCredentialsMissingException('No authentication '
×
469
                                                  'credentials given!')
470

471
    upar = urlparse(url)
×
472

473
    # Decide if Implicit or Explicit FTPS is used based on the port in url
474
    if upar.port == 990:
×
475
        ftps = ImplicitFTPTLS()
×
476
    elif upar.port == 21:
×
477
        ftps = ftplib.FTP_TLS()
×
478

479
    try:
×
480
        # establish ssl connection
481
        ftps.connect(host=upar.hostname, port=upar.port, timeout=timeout)
×
482
        ftps.login(user=auth[0], passwd=auth[1])
×
483
        ftps.prot_p()
×
484

485
        logger.info('Established connection %s' % upar.hostname)
×
486

487
        # meta for progress bar size
488
        count = 0
×
489
        total = ftps.size(upar.path)
×
490
        bs = 12*1024
×
491

492
        def _ftps_progress(data):
×
493
            outfile.write(data)
×
494
            nonlocal count
495
            count += 1
×
496
            reporthook(count, count*bs, total)
×
497

498
        with open(path, 'wb') as outfile:
×
499
            ftps.retrbinary('RETR ' + upar.path, _ftps_progress, blocksize=bs)
×
500

501
    except (ftplib.error_perm, socket.timeout, socket.gaierror) as err:
×
502
        raise FTPSDownloadError(err)
×
503
    finally:
504
        ftps.close()
×
505

506

507
def _get_url_cache_name(url):
9✔
508
    """Returns the cache name for any given url.
509
    """
510

511
    res = urlparse(url)
8✔
512
    return res.netloc.split(':', 1)[0] + res.path
8✔
513

514

515
def oggm_urlretrieve(url, cache_obj_name=None, reset=False,
9✔
516
                     reporthook=None, auth=None, timeout=None):
517
    """Wrapper around urlretrieve, to implement our caching logic.
518

519
    Instead of accepting a destination path, it decided where to store the file
520
    and returns the local path.
521

522
    auth is expected to be either a tuple of ('username', 'password') or None.
523
    """
524

525
    if cache_obj_name is None:
8✔
526
        cache_obj_name = _get_url_cache_name(url)
8✔
527

528
    def _dlf(cache_path):
8✔
529
        logger.info("Downloading %s to %s..." % (url, cache_path))
8✔
530
        try:
8✔
531
            _requests_urlretrieve(url, cache_path, reporthook, auth, timeout)
8✔
532
        except requests.exceptions.InvalidSchema:
×
533
            if 'ftps://' in url:
×
534
                _ftps_retrieve(url, cache_path, reporthook, auth, timeout)
×
535
            else:
536
                _classic_urlretrieve(url, cache_path, reporthook, auth,
×
537
                                     timeout)
538
        return cache_path
8✔
539

540
    return _verified_download_helper(cache_obj_name, _dlf, reset)
8✔
541

542

543
def _progress_urlretrieve(url, cache_name=None, reset=False,
9✔
544
                          auth=None, timeout=None):
545
    """Downloads a file, returns its local path, and shows a progressbar."""
546

547
    try:
8✔
548
        from progressbar import DataTransferBar, UnknownLength
8✔
549
        pbar = None
8✔
550

551
        def _upd(count, size, total):
8✔
552
            nonlocal pbar
553
            if pbar is None:
8✔
554
                pbar = DataTransferBar()
8✔
555
                if not pbar.is_terminal:
8✔
556
                    pbar.min_poll_interval = 15
8✔
557
            if pbar.max_value is None:
8✔
558
                if total > 0:
8✔
559
                    pbar.start(total)
8✔
560
                else:
561
                    pbar.start(UnknownLength)
1✔
562
            pbar.update(min(count * size, total))
8✔
563
            sys.stdout.flush()
8✔
564
        res = oggm_urlretrieve(url, cache_obj_name=cache_name, reset=reset,
8✔
565
                               reporthook=_upd, auth=auth, timeout=timeout)
566
        try:
8✔
567
            pbar.finish()
8✔
568
        except BaseException:
6✔
569
            pass
6✔
570
        return res
8✔
571
    except (ImportError, ModuleNotFoundError):
1✔
572
        return oggm_urlretrieve(url, cache_obj_name=cache_name,
×
573
                                reset=reset, auth=auth, timeout=timeout)
574

575

576
def aws_file_download(aws_path, cache_name=None, reset=False):
9✔
577
    with get_lock():
×
578
        return _aws_file_download_unlocked(aws_path, cache_name, reset)
×
579

580

581
def _aws_file_download_unlocked(aws_path, cache_name=None, reset=False):
9✔
582
    """Download a file from the AWS drive s3://astgtmv2/
583

584
    **Note:** you need AWS credentials for this to work.
585

586
    Parameters
587
    ----------
588
    aws_path: path relative to s3://astgtmv2/
589
    """
590

591
    while aws_path.startswith('/'):
×
592
        aws_path = aws_path[1:]
×
593

594
    if cache_name is not None:
×
595
        cache_obj_name = cache_name
×
596
    else:
597
        cache_obj_name = 'astgtmv2/' + aws_path
×
598

599
    def _dlf(cache_path):
×
600
        raise NotImplementedError("Downloads from AWS are no longer supported")
601

602
    return _verified_download_helper(cache_obj_name, _dlf, reset)
×
603

604

605
def file_downloader(www_path, retry_max=3, sleep_on_retry=5,
9✔
606
                    cache_name=None, reset=False, auth=None,
607
                    timeout=None):
608
    """A slightly better downloader: it tries more than once."""
609

610
    local_path = None
8✔
611
    retry_counter = 0
8✔
612
    while retry_counter < retry_max:
8✔
613
        # Try to download
614
        try:
8✔
615
            retry_counter += 1
8✔
616
            local_path = _progress_urlretrieve(www_path, cache_name=cache_name,
8✔
617
                                               reset=reset, auth=auth,
618
                                               timeout=timeout)
619
            # if no error, exit
620
            break
8✔
621
        except HttpDownloadError as err:
1✔
622
            # This works well for py3
623
            if err.code == 404 or err.code == 300:
×
624
                # Ok so this *should* be an ocean tile
625
                return None
×
626
            elif err.code >= 500 and err.code < 600:
×
627
                logger.info(f"Downloading {www_path} failed with "
×
628
                            f"HTTP error {err.code}, "
629
                            f"retrying in {sleep_on_retry} seconds... "
630
                            f"{retry_counter}/{retry_max}")
631
                time.sleep(sleep_on_retry)
×
632
                continue
×
633
            else:
634
                raise
×
635
        except HttpContentTooShortError as err:
1✔
636
            logger.info("Downloading %s failed with ContentTooShortError"
×
637
                        " error %s, retrying in %s seconds... %s/%s" %
638
                        (www_path, err.code, sleep_on_retry, retry_counter, retry_max))
639
            time.sleep(sleep_on_retry)
×
640
            continue
×
641
        except DownloadVerificationFailedException as err:
1✔
642
            if (cfg.PATHS['dl_cache_dir'] and
1✔
643
                  err.path.startswith(cfg.PATHS['dl_cache_dir']) and
644
                  cfg.PARAMS['dl_cache_readonly']):
645
                if not cache_name:
×
646
                    cache_name = _get_url_cache_name(www_path)
×
647
                cache_name = "GLOBAL_CACHE_INVALID/" + cache_name
×
648
                retry_counter -= 1
×
649
                logger.info("Global cache for %s is invalid!")
×
650
            else:
651
                try:
1✔
652
                    os.remove(err.path)
1✔
653
                except FileNotFoundError:
×
654
                    pass
×
655
                logger.info("Downloading %s failed with "
1✔
656
                            "DownloadVerificationFailedException\n %s\n"
657
                            "The file might have changed or is corrupted. "
658
                            "File deleted. Re-downloading... %s/%s" %
659
                            (www_path, err.msg, retry_counter, retry_max))
660
            continue
1✔
661
        except requests.ConnectionError as err:
1✔
662
            if err.args[0].__class__.__name__ == 'MaxRetryError':
×
663
                # if request tried often enough we don't have to do this
664
                # this error does happen for not existing ASTERv3 files
665
                return None
×
666
            else:
667
                # in other cases: try again
668
                logger.info("Downloading %s failed with ConnectionError, "
×
669
                            "retrying in %s seconds... %s/%s" %
670
                            (www_path, sleep_on_retry, retry_counter, retry_max))
671
                time.sleep(sleep_on_retry)
×
672
                continue
×
673
        except FTPSDownloadError as err:
1✔
674
            logger.info("Downloading %s failed with FTPSDownloadError"
×
675
                        " error: '%s', retrying in %s seconds... %s/%s" %
676
                        (www_path, err.orgerr, sleep_on_retry, retry_counter, retry_max))
677
            time.sleep(sleep_on_retry)
×
678
            continue
×
679

680
    # See if we managed (fail is allowed)
681
    if not local_path or not os.path.exists(local_path):
8✔
682
        logger.warning('Downloading %s failed.' % www_path)
×
683

684
    return local_path
8✔
685

686

687
def locked_func(func):
9✔
688
    """To decorate a function that needs to be locked for multiprocessing"""
689
    @wraps(func)
9✔
690
    def wrapper(*args, **kwargs):
9✔
691
        with get_lock():
6✔
692
            return func(*args, **kwargs)
6✔
693
    return wrapper
9✔
694

695

696
def file_extractor(file_path):
9✔
697
    """For archives with only one file inside extract the file to tmpdir."""
698

699
    filename, file_extension = os.path.splitext(file_path)
6✔
700
    # Second one for tar.gz files
701
    f2, ex2 = os.path.splitext(filename)
6✔
702
    if ex2 == '.tar':
6✔
703
        filename, file_extension = f2, '.tar.gz'
1✔
704
    bname = os.path.basename(file_path)
6✔
705

706
    # This is to give a unique name to the tmp file
707
    hid = hashlib.md5(file_path.encode()).hexdigest()[:7] + '_'
6✔
708

709
    # extract directory
710
    tmpdir = cfg.PATHS['tmp_dir']
6✔
711
    mkdir(tmpdir)
6✔
712

713
    # Check output extension
714
    def _check_ext(f):
6✔
715
        _, of_ext = os.path.splitext(f)
6✔
716
        if of_ext not in ['.nc', '.tif']:
6✔
717
            raise InvalidParamsError('Extracted file extension not recognized'
×
718
                                     ': {}'.format(of_ext))
719
        return of_ext
6✔
720

721
    if file_extension == '.zip':
6✔
722
        with zipfile.ZipFile(file_path) as zf:
6✔
723
            members = zf.namelist()
6✔
724
            if len(members) != 1:
6✔
725
                raise RuntimeError('Cannot extract multiple files')
×
726
            o_name = hid + members[0]
6✔
727
            o_path = os.path.join(tmpdir, o_name)
6✔
728
            of_ext = _check_ext(o_path)
6✔
729
            if not os.path.exists(o_path):
6✔
730
                logger.info('Extracting {} to {}...'.format(bname, o_path))
6✔
731
                with open(o_path, 'wb') as f:
6✔
732
                    f.write(zf.read(members[0]))
6✔
733
    elif file_extension == '.gz':
4✔
734
        # Gzip files cannot be inspected. It's always only one file
735
        # Decide on its name
736
        o_name = hid + os.path.basename(filename)
2✔
737
        o_path = os.path.join(tmpdir, o_name)
2✔
738
        of_ext = _check_ext(o_path)
2✔
739
        if not os.path.exists(o_path):
2✔
740
            logger.info('Extracting {} to {}...'.format(bname, o_path))
2✔
741
            with gzip.GzipFile(file_path) as zf:
2✔
742
                with open(o_path, 'wb') as outfile:
2✔
743
                    for line in zf:
2✔
744
                        outfile.write(line)
2✔
745
    elif file_extension == '.bz2':
4✔
746
        # bzip2 files cannot be inspected. It's always only one file
747
        # Decide on its name
748
        o_name = hid + os.path.basename(filename)
4✔
749
        o_path = os.path.join(tmpdir, o_name)
4✔
750
        of_ext = _check_ext(o_path)
4✔
751
        if not os.path.exists(o_path):
4✔
752
            logger.info('Extracting {} to {}...'.format(bname, o_path))
4✔
753
            with bz2.open(file_path) as zf:
4✔
754
                with open(o_path, 'wb') as outfile:
4✔
755
                    for line in zf:
4✔
756
                        outfile.write(line)
4✔
757
    elif file_extension in ['.tar.gz', '.tar']:
1✔
758
        with tarfile.open(file_path) as zf:
1✔
759
            members = zf.getmembers()
1✔
760
            if len(members) != 1:
1✔
761
                raise RuntimeError('Cannot extract multiple files')
×
762
            o_name = hid + members[0].name
1✔
763
            o_path = os.path.join(tmpdir, o_name)
1✔
764
            of_ext = _check_ext(o_path)
1✔
765
            if not os.path.exists(o_path):
1✔
766
                logger.info('Extracting {} to {}...'.format(bname, o_path))
1✔
767
                with open(o_path, 'wb') as f:
1✔
768
                    f.write(zf.extractfile(members[0]).read())
1✔
769
    else:
770
        raise InvalidParamsError('Extension not recognized: '
1✔
771
                                 '{}'.format(file_extension))
772

773
    # Be sure we don't overfill the folder
774
    cfg.get_lru_handler(tmpdir, ending=of_ext).append(o_path)
6✔
775

776
    return o_path
6✔
777

778

779
def download_with_authentication(wwwfile, key):
9✔
780
    """ Uses credentials from a local .netrc file to download files
781

782
    This is function is currently used for TanDEM-X and ASTER
783

784
    Parameters
785
    ----------
786
    wwwfile : str
787
        path to the file to download
788
    key : str
789
        the machine to to look at in the .netrc file
790

791
    Returns
792
    -------
793

794
    """
795
    # Check the cache first. Use dummy download function to assure nothing is
796
    # tried to be downloaded without credentials:
797

798
    def _always_none(foo):
×
799
        return None
×
800

801
    cache_obj_name = _get_url_cache_name(wwwfile)
×
802
    dest_file = _verified_download_helper(cache_obj_name, _always_none)
×
803

804
    # Grab auth parameters
805
    if not dest_file:
×
806
        authfile = os.path.expanduser('~/.netrc')
×
807

808
        if not os.path.isfile(authfile):
×
809
            raise DownloadCredentialsMissingException(
×
810
                (authfile, ' does not exist. Add necessary credentials for ',
811
                 key, ' with `oggm_netrc_credentials. You may have to ',
812
                 'register at the respective service first.'))
813

814
        try:
×
815
            netrc(authfile).authenticators(key)[0]
×
816
        except TypeError:
×
817
            raise DownloadCredentialsMissingException(
×
818
                ('Credentials for ', key, ' are not in ', authfile, '. Add ',
819
                 'credentials for with `oggm_netrc_credentials`.'))
820

821
        dest_file = file_downloader(
×
822
            wwwfile, auth=(netrc(authfile).authenticators(key)[0],
823
                           netrc(authfile).authenticators(key)[2]))
824

825
    return dest_file
×
826

827

828
def download_oggm_files():
9✔
829
    with get_lock():
8✔
830
        return _download_oggm_files_unlocked()
8✔
831

832

833
def _download_oggm_files_unlocked():
9✔
834
    """Checks if the demo data is already on the cache and downloads it."""
835

836
    zip_url = 'https://github.com/%s/archive/%s.zip' % \
8✔
837
              (SAMPLE_DATA_GH_REPO, SAMPLE_DATA_COMMIT)
838
    odir = os.path.join(cfg.CACHE_DIR)
8✔
839
    sdir = os.path.join(cfg.CACHE_DIR,
8✔
840
                        'oggm-sample-data-%s' % SAMPLE_DATA_COMMIT)
841

842
    # download only if necessary
843
    if not os.path.exists(sdir):
8✔
844
        ofile = file_downloader(zip_url)
8✔
845
        with zipfile.ZipFile(ofile) as zf:
8✔
846
            zf.extractall(odir)
8✔
847
        assert os.path.isdir(sdir)
8✔
848

849
    # list of files for output
850
    out = dict()
8✔
851
    for root, directories, filenames in os.walk(sdir):
8✔
852
        for filename in filenames:
8✔
853
            if filename in out:
8✔
854
                # This was a stupid thing, and should not happen
855
                # TODO: duplicates in sample data...
856
                k = os.path.join(os.path.basename(root), filename)
8✔
857
                assert k not in out
8✔
858
                out[k] = os.path.join(root, filename)
8✔
859
            else:
860
                out[filename] = os.path.join(root, filename)
8✔
861

862
    return out
8✔
863

864

865
def _download_srtm_file(zone):
9✔
866
    with get_lock():
1✔
867
        return _download_srtm_file_unlocked(zone)
1✔
868

869

870
def _download_srtm_file_unlocked(zone):
9✔
871
    """Checks if the srtm data is in the directory and if not, download it.
872
    """
873

874
    # extract directory
875
    tmpdir = cfg.PATHS['tmp_dir']
1✔
876
    mkdir(tmpdir)
1✔
877
    outpath = os.path.join(tmpdir, 'srtm_' + zone + '.tif')
1✔
878

879
    # check if extracted file exists already
880
    if os.path.exists(outpath):
1✔
881
        return outpath
×
882

883
    # Did we download it yet?
884
    wwwfile = ('http://srtm.csi.cgiar.org/wp-content/uploads/files/srtm_5x5/'
1✔
885
               'TIFF/srtm_' + zone + '.zip')
886
    dest_file = file_downloader(wwwfile)
1✔
887

888
    # None means we tried hard but we couldn't find it
889
    if not dest_file:
1✔
890
        return None
×
891

892
    # ok we have to extract it
893
    if not os.path.exists(outpath):
1✔
894
        with zipfile.ZipFile(dest_file) as zf:
1✔
895
            zf.extractall(tmpdir)
1✔
896

897
    # See if we're good, don't overfill the tmp directory
898
    assert os.path.exists(outpath)
1✔
899
    cfg.get_lru_handler(tmpdir).append(outpath)
1✔
900
    return outpath
1✔
901

902

903
def _download_nasadem_file(zone):
9✔
904
    with get_lock():
1✔
905
        return _download_nasadem_file_unlocked(zone)
1✔
906

907

908
def _download_nasadem_file_unlocked(zone):
9✔
909
    """Checks if the NASADEM data is in the directory and if not, download it.
910
    """
911

912
    # extract directory
913
    tmpdir = cfg.PATHS['tmp_dir']
1✔
914
    mkdir(tmpdir)
1✔
915
    wwwfile = ('https://e4ftl01.cr.usgs.gov/MEASURES/NASADEM_HGT.001/'
1✔
916
               '2000.02.11/NASADEM_HGT_{}.zip'.format(zone))
917
    demfile = '{}.hgt'.format(zone)
1✔
918
    outpath = os.path.join(tmpdir, demfile)
1✔
919

920
    # check if extracted file exists already
921
    if os.path.exists(outpath):
1✔
922
        return outpath
×
923

924
    # Did we download it yet?
925
    dest_file = file_downloader(wwwfile)
1✔
926

927
    # None means we tried hard but we couldn't find it
928
    if not dest_file:
1✔
929
        return None
×
930

931
    # ok we have to extract it
932
    if not os.path.exists(outpath):
1✔
933
        with zipfile.ZipFile(dest_file) as zf:
1✔
934
            zf.extract(demfile, path=tmpdir)
1✔
935

936
    # See if we're good, don't overfill the tmp directory
937
    assert os.path.exists(outpath)
1✔
938
    cfg.get_lru_handler(tmpdir).append(outpath)
1✔
939
    return outpath
1✔
940

941

942
def _download_tandem_file(zone):
9✔
943
    with get_lock():
1✔
944
        return _download_tandem_file_unlocked(zone)
1✔
945

946

947
def _download_tandem_file_unlocked(zone):
9✔
948
    """Checks if the tandem data is in the directory and if not, download it.
949
    """
950

951
    # extract directory
952
    tmpdir = cfg.PATHS['tmp_dir']
1✔
953
    mkdir(tmpdir)
1✔
954
    bname = zone.split('/')[-1] + '_DEM.tif'
1✔
955
    wwwfile = ('https://download.geoservice.dlr.de/TDM90/files/DEM/'
1✔
956
               '{}.zip'.format(zone))
957
    outpath = os.path.join(tmpdir, bname)
1✔
958

959
    # check if extracted file exists already
960
    if os.path.exists(outpath):
1✔
961
        return outpath
×
962

963
    dest_file = download_with_authentication(wwwfile, 'geoservice.dlr.de')
1✔
964

965
    # That means we tried hard but we couldn't find it
966
    if not dest_file:
1✔
967
        return None
×
968
    elif not zipfile.is_zipfile(dest_file):
1✔
969
        # If the TanDEM-X tile does not exist, a invalid file is created.
970
        # See https://github.com/OGGM/oggm/issues/893 for more details
971
        return None
1✔
972

973
    # ok we have to extract it
974
    if not os.path.exists(outpath):
1✔
975
        with zipfile.ZipFile(dest_file) as zf:
1✔
976
            for fn in zf.namelist():
1✔
977
                if 'DEM/' + bname in fn:
1✔
978
                    break
×
979
            with open(outpath, 'wb') as fo:
1✔
980
                fo.write(zf.read(fn))
1✔
981

982
    # See if we're good, don't overfill the tmp directory
983
    assert os.path.exists(outpath)
1✔
984
    cfg.get_lru_handler(tmpdir).append(outpath)
1✔
985
    return outpath
1✔
986

987

988
def _download_dem3_viewpano(zone):
9✔
989
    with get_lock():
1✔
990
        return _download_dem3_viewpano_unlocked(zone)
1✔
991

992

993
def _download_dem3_viewpano_unlocked(zone):
9✔
994
    """Checks if the DEM3 data is in the directory and if not, download it.
995
    """
996

997
    # extract directory
998
    tmpdir = cfg.PATHS['tmp_dir']
1✔
999
    mkdir(tmpdir)
1✔
1000
    outpath = os.path.join(tmpdir, zone + '.tif')
1✔
1001
    extract_dir = os.path.join(tmpdir, 'tmp_' + zone)
1✔
1002
    mkdir(extract_dir, reset=True)
1✔
1003

1004
    # check if extracted file exists already
1005
    if os.path.exists(outpath):
1✔
1006
        return outpath
×
1007

1008
    # OK, so see if downloaded already
1009
    # some files have a newer version 'v2'
1010
    if zone in ['R33', 'R34', 'R35', 'R36', 'R37', 'R38', 'Q32', 'Q33', 'Q34',
1✔
1011
                'Q35', 'Q36', 'Q37', 'Q38', 'Q39', 'Q40', 'P31', 'P32', 'P33',
1012
                'P34', 'P35', 'P36', 'P37', 'P38', 'P39', 'P40']:
1013
        ifile = 'http://viewfinderpanoramas.org/dem3/' + zone + 'v2.zip'
×
1014
    elif zone in DEM3REG.keys():
1✔
1015
        # We prepared these files as tif already
1016
        ifile = ('https://cluster.klima.uni-bremen.de/~oggm/dem/'
1✔
1017
                 'DEM3_MERGED/{}.tif'.format(zone))
1018
        return file_downloader(ifile)
1✔
1019
    else:
1020
        ifile = 'http://viewfinderpanoramas.org/dem3/' + zone + '.zip'
1✔
1021

1022
    dfile = file_downloader(ifile)
1✔
1023

1024
    # None means we tried hard but we couldn't find it
1025
    if not dfile:
1✔
1026
        return None
×
1027

1028
    # ok we have to extract it
1029
    with zipfile.ZipFile(dfile) as zf:
1✔
1030
        zf.extractall(extract_dir)
1✔
1031

1032
    # Serious issue: sometimes, if a southern hemisphere URL is queried for
1033
    # download and there is none, a NH zip file is downloaded.
1034
    # Example: http://viewfinderpanoramas.org/dem3/SN29.zip yields N29!
1035
    # BUT: There are southern hemisphere files that download properly. However,
1036
    # the unzipped folder has the file name of
1037
    # the northern hemisphere file. Some checks if correct file exists:
1038
    if len(zone) == 4 and zone.startswith('S'):
1✔
1039
        zonedir = os.path.join(extract_dir, zone[1:])
×
1040
    else:
1041
        zonedir = os.path.join(extract_dir, zone)
1✔
1042
    globlist = glob.glob(os.path.join(zonedir, '*.hgt'))
1✔
1043

1044
    # take care of the special file naming cases
1045
    if zone in DEM3REG.keys():
1✔
1046
        globlist = glob.glob(os.path.join(extract_dir, '*', '*.hgt'))
×
1047

1048
    if not globlist:
1✔
1049
        # Final resort
1050
        globlist = (findfiles(extract_dir, '.hgt') or
×
1051
                    findfiles(extract_dir, '.HGT'))
1052
        if not globlist:
×
1053
            raise RuntimeError("We should have some files here, but we don't")
×
1054

1055
    # merge the single HGT files (can be a bit ineffective, because not every
1056
    # single file might be exactly within extent...)
1057
    rfiles = [rasterio.open(s) for s in globlist]
1✔
1058
    dest, output_transform = merge_tool(rfiles)
1✔
1059
    profile = rfiles[0].profile
1✔
1060
    if 'affine' in profile:
1✔
1061
        profile.pop('affine')
×
1062
    profile['transform'] = output_transform
1✔
1063
    profile['height'] = dest.shape[1]
1✔
1064
    profile['width'] = dest.shape[2]
1✔
1065
    profile['driver'] = 'GTiff'
1✔
1066
    with rasterio.open(outpath, 'w', **profile) as dst:
1✔
1067
        dst.write(dest)
1✔
1068
    for rf in rfiles:
1✔
1069
        rf.close()
1✔
1070

1071
    # delete original files to spare disk space
1072
    for s in globlist:
1✔
1073
        os.remove(s)
1✔
1074
    del_empty_dirs(tmpdir)
1✔
1075

1076
    # See if we're good, don't overfill the tmp directory
1077
    assert os.path.exists(outpath)
1✔
1078
    cfg.get_lru_handler(tmpdir).append(outpath)
1✔
1079
    return outpath
1✔
1080

1081

1082
def _download_aster_file(zone):
9✔
1083
    with get_lock():
1✔
1084
        return _download_aster_file_unlocked(zone)
1✔
1085

1086

1087
def _download_aster_file_unlocked(zone):
9✔
1088
    """Checks if the ASTER data is in the directory and if not, download it.
1089
    """
1090

1091
    # extract directory
1092
    tmpdir = cfg.PATHS['tmp_dir']
1✔
1093
    mkdir(tmpdir)
1✔
1094
    wwwfile = ('https://e4ftl01.cr.usgs.gov/ASTT/ASTGTM.003/'
1✔
1095
               '2000.03.01/{}.zip'.format(zone))
1096
    outpath = os.path.join(tmpdir, zone + '_dem.tif')
1✔
1097

1098
    # check if extracted file exists already
1099
    if os.path.exists(outpath):
1✔
1100
        return outpath
×
1101

1102
    # download from NASA Earthdata with credentials
1103
    dest_file = download_with_authentication(wwwfile, 'urs.earthdata.nasa.gov')
1✔
1104

1105
    # That means we tried hard but we couldn't find it
1106
    if not dest_file:
1✔
1107
        return None
×
1108

1109
    # ok we have to extract it
1110
    if not os.path.exists(outpath):
1✔
1111
        with zipfile.ZipFile(dest_file) as zf:
1✔
1112
            zf.extractall(tmpdir)
1✔
1113

1114
    # See if we're good, don't overfill the tmp directory
1115
    assert os.path.exists(outpath)
1✔
1116
    cfg.get_lru_handler(tmpdir).append(outpath)
1✔
1117
    return outpath
1✔
1118

1119

1120
def _download_topo_file_from_cluster(fname):
9✔
1121
    with get_lock():
×
1122
        return _download_topo_file_from_cluster_unlocked(fname)
×
1123

1124

1125
def _download_topo_file_from_cluster_unlocked(fname):
9✔
1126
    """Checks if the special topo data is in the directory and if not,
1127
    download it from the cluster.
1128
    """
1129

1130
    # extract directory
1131
    tmpdir = cfg.PATHS['tmp_dir']
×
1132
    mkdir(tmpdir)
×
1133
    outpath = os.path.join(tmpdir, fname)
×
1134

1135
    url = 'https://cluster.klima.uni-bremen.de/data/dems/'
×
1136
    url += fname + '.zip'
×
1137
    dfile = file_downloader(url)
×
1138

1139
    if not os.path.exists(outpath):
×
1140
        logger.info('Extracting ' + fname + '.zip to ' + outpath + '...')
×
1141
        with zipfile.ZipFile(dfile) as zf:
×
1142
            zf.extractall(tmpdir)
×
1143

1144
    # See if we're good, don't overfill the tmp directory
1145
    assert os.path.exists(outpath)
×
1146
    cfg.get_lru_handler(tmpdir).append(outpath)
×
1147
    return outpath
×
1148

1149

1150
def _download_copdem_file(cppfile, tilename, source):
9✔
1151
    with get_lock():
1✔
1152
        return _download_copdem_file_unlocked(cppfile, tilename, source)
1✔
1153

1154

1155
def _download_copdem_file_unlocked(cppfile, tilename, source):
9✔
1156
    """Checks if Copernicus DEM file is in the directory, if not download it.
1157

1158
    cppfile : name of the tarfile to download
1159
    tilename : name of folder and tif file within the cppfile
1160
    source : either 'COPDEM90' or 'COPDEM30'
1161

1162
    """
1163

1164
    # extract directory
1165
    tmpdir = cfg.PATHS['tmp_dir']
1✔
1166
    mkdir(tmpdir)
1✔
1167

1168
    # tarfiles are extracted in directories per each tile
1169
    fpath = '{0}_DEM.tif'.format(tilename)
1✔
1170
    demfile = os.path.join(tmpdir, fpath)
1✔
1171

1172
    # check if extracted file exists already
1173
    if os.path.exists(demfile):
1✔
1174
        return demfile
×
1175

1176
    # Did we download it yet?
1177
    ftpfile = ('ftps://cdsdata.copernicus.eu:990/' +
1✔
1178
               'datasets/COP-DEM_GLO-{}-DGED/2022_1/'.format(source[-2:]) +
1179
               cppfile)
1180

1181
    dest_file = download_with_authentication(ftpfile,
1✔
1182
                                             'spacedata.copernicus.eu')
1183

1184
    # None means we tried hard but we couldn't find it
1185
    if not dest_file:
1✔
1186
        return None
×
1187

1188
    # ok we have to extract it
1189
    if not os.path.exists(demfile):
1✔
1190
        tiffile = os.path.join(tilename, 'DEM', fpath)
1✔
1191
        with tarfile.open(dest_file) as tf:
1✔
1192
            tmember = tf.getmember(tiffile)
1✔
1193
            # do not extract the full path of the file
1194
            tmember.name = os.path.basename(tf.getmember(tiffile).name)
1✔
1195
            tf.extract(tmember, tmpdir)
1✔
1196

1197
    # See if we're good, don't overfill the tmp directory
1198
    assert os.path.exists(demfile)
1✔
1199
    cfg.get_lru_handler(tmpdir).append(demfile)
1✔
1200

1201
    return demfile
1✔
1202

1203

1204
def _download_aw3d30_file(zone):
9✔
1205
    with get_lock():
1✔
1206
        return _download_aw3d30_file_unlocked(zone)
1✔
1207

1208

1209
def _download_aw3d30_file_unlocked(fullzone):
9✔
1210
    """Checks if the AW3D30 data is in the directory and if not, download it.
1211
    """
1212

1213
    # extract directory
1214
    tmpdir = cfg.PATHS['tmp_dir']
1✔
1215
    mkdir(tmpdir)
1✔
1216

1217
    # tarfiles are extracted in directories per each tile
1218
    tile = fullzone.split('/')[1]
1✔
1219
    demfile = os.path.join(tmpdir, tile, tile + '_AVE_DSM.tif')
1✔
1220

1221
    # check if extracted file exists already
1222
    if os.path.exists(demfile):
1✔
1223
        return demfile
×
1224

1225
    # Did we download it yet?
1226
    ftpfile = ('ftp://ftp.eorc.jaxa.jp/pub/ALOS/ext1/AW3D30/release_v1804/'
1✔
1227
               + fullzone + '.tar.gz')
1228
    try:
1✔
1229
        dest_file = file_downloader(ftpfile, timeout=180)
1✔
1230
    except urllib.error.URLError:
×
1231
        # This error is raised if file is not available, could be water
1232
        return None
×
1233

1234
    # None means we tried hard but we couldn't find it
1235
    if not dest_file:
1✔
1236
        return None
×
1237

1238
    # ok we have to extract it
1239
    if not os.path.exists(demfile):
1✔
1240
        from oggm.utils import robust_tar_extract
1✔
1241
        dempath = os.path.dirname(demfile)
1✔
1242
        robust_tar_extract(dest_file, dempath)
1✔
1243

1244
    # See if we're good, don't overfill the tmp directory
1245
    assert os.path.exists(demfile)
1✔
1246
    # this tarfile contains several files
1247
    for file in os.listdir(dempath):
1✔
1248
        cfg.get_lru_handler(tmpdir).append(os.path.join(dempath, file))
1✔
1249
    return demfile
1✔
1250

1251

1252
def _download_mapzen_file(zone):
9✔
1253
    with get_lock():
1✔
1254
        return _download_mapzen_file_unlocked(zone)
1✔
1255

1256

1257
def _download_mapzen_file_unlocked(zone):
9✔
1258
    """Checks if the mapzen data is in the directory and if not, download it.
1259
    """
1260
    bucket = 'elevation-tiles-prod'
1✔
1261
    prefix = 'geotiff'
1✔
1262
    url = 'http://s3.amazonaws.com/%s/%s/%s' % (bucket, prefix, zone)
1✔
1263

1264
    # That's all
1265
    return file_downloader(url, timeout=180)
1✔
1266

1267

1268
def get_prepro_gdir(rgi_version, rgi_id, border, prepro_level, base_url=None):
9✔
1269
    with get_lock():
2✔
1270
        return _get_prepro_gdir_unlocked(rgi_version, rgi_id, border,
2✔
1271
                                         prepro_level, base_url=base_url)
1272

1273

1274
def get_prepro_base_url(base_url=None, rgi_version=None, border=None,
9✔
1275
                        prepro_level=None):
1276
    """Extended base url where to find the desired gdirs."""
1277

1278
    if base_url is None:
2✔
1279
        raise InvalidParamsError('Starting with v1.6, users now have to '
×
1280
                                 'explicitly indicate the url they want '
1281
                                 'to start from.')
1282

1283
    if not base_url.endswith('/'):
2✔
1284
        base_url += '/'
×
1285

1286
    if rgi_version is None:
2✔
1287
        rgi_version = cfg.PARAMS['rgi_version']
1✔
1288

1289
    if border is None:
2✔
1290
        border = cfg.PARAMS['border']
1✔
1291

1292
    url = base_url
2✔
1293
    url += 'RGI{}/'.format(rgi_version)
2✔
1294
    url += 'b_{:03d}/'.format(int(border))
2✔
1295
    url += 'L{:d}/'.format(prepro_level)
2✔
1296
    return url
2✔
1297

1298

1299
def _get_prepro_gdir_unlocked(rgi_version, rgi_id, border, prepro_level,
9✔
1300
                              base_url=None):
1301

1302
    url = get_prepro_base_url(rgi_version=rgi_version, border=border,
2✔
1303
                              prepro_level=prepro_level, base_url=base_url)
1304
    if len(rgi_id) == 23:
2✔
1305
        # RGI7
1306
        url += '{}/{}.tar'.format(rgi_id[:17], rgi_id[:20])
×
1307
    else:
1308
        url += '{}/{}.tar'.format(rgi_id[:8], rgi_id[:11])
2✔
1309
    tar_base = file_downloader(url)
2✔
1310
    if tar_base is None:
2✔
1311
        raise RuntimeError('Could not find file at ' + url)
×
1312

1313
    return tar_base
2✔
1314

1315

1316
def get_geodetic_mb_dataframe(file_path=None):
9✔
1317
    """Fetches the reference geodetic dataframe for calibration.
1318

1319
    Currently that's the data from Hughonnet et al 2021, corrected for
1320
    outliers and with void filled. The data preparation script is
1321
    available at
1322
    https://nbviewer.jupyter.org/urls/cluster.klima.uni-bremen.de/~oggm/geodetic_ref_mb/convert.ipynb
1323

1324
    Parameters
1325
    ----------
1326
    file_path : str
1327
        in case you have your own file to parse (check the format first!)
1328

1329
    Returns
1330
    -------
1331
    a DataFrame with the data.
1332
    """
1333

1334
    # fetch the file online or read custom file
1335
    if file_path is None:
7✔
1336
        base_url = 'https://cluster.klima.uni-bremen.de/~oggm/geodetic_ref_mb/'
7✔
1337
        file_name = 'hugonnet_2021_ds_rgi60_pergla_rates_10_20_worldwide_filled.hdf'
7✔
1338
        file_path = file_downloader(base_url + file_name)
7✔
1339

1340
    # Did we open it yet?
1341
    if file_path in cfg.DATA:
7✔
1342
        return cfg.DATA[file_path]
4✔
1343

1344
    # If not let's go
1345
    extension = os.path.splitext(file_path)[1]
7✔
1346
    if extension == '.csv':
7✔
1347
        df = pd.read_csv(file_path, index_col=0)
×
1348
    elif extension == '.hdf':
7✔
1349
        df = pd.read_hdf(file_path)
7✔
1350

1351
    # Check for missing data (old files)
1352
    if len(df.loc[df['dmdtda'].isnull()]) > 0:
7✔
1353
        raise InvalidParamsError('The reference file you are using has missing '
×
1354
                                 'data and is probably outdated (sorry for '
1355
                                 'that). Delete the file at '
1356
                                 f'{file_path} and start again.')
1357
    cfg.DATA[file_path] = df
7✔
1358
    return df
7✔
1359

1360

1361
def get_temp_bias_dataframe(dataset='w5e5'):
9✔
1362
    """Fetches the reference geodetic dataframe for calibration.
1363

1364
    Currently, that's the data from Hughonnet et al. (2021), corrected for
1365
    outliers and with void filled. The data preparation script is
1366
    available at
1367
    https://nbviewer.jupyter.org/urls/cluster.klima.uni-bremen.de/~oggm/geodetic_ref_mb/convert.ipynb
1368

1369
    Parameters
1370
    ----------
1371
    file_path : str
1372
        in case you have your own file to parse (check the format first!)
1373

1374
    Returns
1375
    -------
1376
    a DataFrame with the data.
1377
    """
1378

1379
    if dataset != 'w5e5':
1✔
1380
        raise NotImplementedError(f'No such dataset available yet: {dataset}')
1381

1382
    # fetch the file online
1383
    base_url = ('https://cluster.klima.uni-bremen.de/~oggm/ref_mb_params/oggm_v1.6/'
1✔
1384
                'w5e5_temp_bias_v2023.4.csv')
1385

1386

1387
    file_path = file_downloader(base_url)
1✔
1388

1389
    # Did we open it yet?
1390
    if file_path in cfg.DATA:
1✔
1391
        return cfg.DATA[file_path]
1✔
1392

1393
    # If not let's go
1394
    extension = os.path.splitext(file_path)[1]
1✔
1395
    if extension == '.csv':
1✔
1396
        df = pd.read_csv(file_path, index_col=0)
1✔
1397
    elif extension == '.hdf':
×
1398
        df = pd.read_hdf(file_path)
×
1399

1400
    cfg.DATA[file_path] = df
1✔
1401
    return df
1✔
1402

1403

1404
def srtm_zone(lon_ex, lat_ex):
9✔
1405
    """Returns a list of SRTM zones covering the desired extent.
1406
    """
1407

1408
    # SRTM are sorted in tiles of 5 degrees
1409
    srtm_x0 = -180.
1✔
1410
    srtm_y0 = 60.
1✔
1411
    srtm_dx = 5.
1✔
1412
    srtm_dy = -5.
1✔
1413

1414
    # quick n dirty solution to be sure that we will cover the whole range
1415
    mi, ma = np.min(lon_ex), np.max(lon_ex)
1✔
1416
    # int() to avoid Deprec warning:
1417
    lon_ex = np.linspace(mi, ma, int(np.ceil((ma - mi) + 3)))
1✔
1418
    mi, ma = np.min(lat_ex), np.max(lat_ex)
1✔
1419
    # int() to avoid Deprec warning
1420
    lat_ex = np.linspace(mi, ma, int(np.ceil((ma - mi) + 3)))
1✔
1421

1422
    zones = []
1✔
1423
    for lon in lon_ex:
1✔
1424
        for lat in lat_ex:
1✔
1425
            dx = lon - srtm_x0
1✔
1426
            dy = lat - srtm_y0
1✔
1427
            if dy > 0:
1✔
1428
                continue
×
1429
            zx = np.ceil(dx / srtm_dx)
1✔
1430
            zy = np.ceil(dy / srtm_dy)
1✔
1431
            zones.append('{:02.0f}_{:02.0f}'.format(zx, zy))
1✔
1432
    return list(sorted(set(zones)))
1✔
1433

1434

1435
def _tandem_path(lon_tile, lat_tile):
9✔
1436

1437
    # OK we have a proper tile now
1438
    # This changed in December 2022
1439

1440
    # First folder level is sorted from S to N
1441
    level_0 = 'S' if lat_tile < 0 else 'N'
1✔
1442
    level_0 += '{:02d}'.format(abs(lat_tile))
1✔
1443

1444
    # Second folder level is sorted from W to E, but in 10 steps
1445
    level_1 = 'W' if lon_tile < 0 else 'E'
1✔
1446
    level_1 += '{:03d}'.format(divmod(abs(lon_tile), 10)[0] * 10)
1✔
1447

1448
    # Level 2 is formatting, but depends on lat
1449
    level_2 = 'W' if lon_tile < 0 else 'E'
1✔
1450
    if abs(lat_tile) <= 60:
1✔
1451
        level_2 += '{:03d}'.format(abs(lon_tile))
1✔
1452
    elif abs(lat_tile) <= 80:
1✔
1453
        level_2 += '{:03d}'.format(divmod(abs(lon_tile), 2)[0] * 2)
1✔
1454
    else:
1455
        level_2 += '{:03d}'.format(divmod(abs(lon_tile), 4)[0] * 4)
1✔
1456

1457
    # Final path
1458
    out = (level_0 + '/' + level_1 + '/' +
1✔
1459
           'TDM1_DEM__30_{}{}'.format(level_0, level_2))
1460
    return out
1✔
1461

1462

1463
def tandem_zone(lon_ex, lat_ex):
9✔
1464
    """Returns a list of TanDEM-X zones covering the desired extent.
1465
    """
1466

1467
    # Files are one by one tiles, so lets loop over them
1468
    # For higher lats they are stored in steps of 2 and 4. My code below
1469
    # is probably giving more files than needed but better safe than sorry
1470
    lat_tiles = np.arange(np.floor(lat_ex[0]), np.ceil(lat_ex[1]+1e-9),
1✔
1471
                          dtype=int)
1472
    zones = []
1✔
1473
    for lat in lat_tiles:
1✔
1474
        if abs(lat) < 60:
1✔
1475
            l0 = np.floor(lon_ex[0])
1✔
1476
            l1 = np.floor(lon_ex[1])
1✔
1477
        elif abs(lat) < 80:
1✔
1478
            l0 = divmod(lon_ex[0], 2)[0] * 2
1✔
1479
            l1 = divmod(lon_ex[1], 2)[0] * 2
1✔
1480
        elif abs(lat) < 90:
1✔
1481
            l0 = divmod(lon_ex[0], 4)[0] * 4
1✔
1482
            l1 = divmod(lon_ex[1], 4)[0] * 4
1✔
1483
        lon_tiles = np.arange(l0, l1+1, dtype=int)
1✔
1484
        for lon in lon_tiles:
1✔
1485
            zones.append(_tandem_path(lon, lat))
1✔
1486
    return list(sorted(set(zones)))
1✔
1487

1488

1489
def _aw3d30_path(lon_tile, lat_tile):
9✔
1490

1491
    # OK we have a proper tile now
1492

1493
    # Folders are sorted with N E S W in 5 degree steps
1494
    # But in N and E the lower boundary is indicated
1495
    # e.g. N060 contains N060 - N064
1496
    # e.g. E000 contains E000 - E004
1497
    # but S and W indicate the upper boundary:
1498
    # e.g. S010 contains S006 - S010
1499
    # e.g. W095 contains W091 - W095
1500

1501
    # get letters
1502
    ns = 'S' if lat_tile < 0 else 'N'
1✔
1503
    ew = 'W' if lon_tile < 0 else 'E'
1✔
1504

1505
    # get lat/lon
1506
    lon = abs(5 * np.floor(lon_tile/5))
1✔
1507
    lat = abs(5 * np.floor(lat_tile/5))
1✔
1508

1509
    folder = '%s%.3d%s%.3d' % (ns, lat, ew, lon)
1✔
1510
    filename = '%s%.3d%s%.3d' % (ns, abs(lat_tile), ew, abs(lon_tile))
1✔
1511

1512
    # Final path
1513
    out = folder + '/' + filename
1✔
1514
    return out
1✔
1515

1516

1517
def aw3d30_zone(lon_ex, lat_ex):
9✔
1518
    """Returns a list of AW3D30 zones covering the desired extent.
1519
    """
1520

1521
    # Files are one by one tiles, so lets loop over them
1522
    lon_tiles = np.arange(np.floor(lon_ex[0]), np.ceil(lon_ex[1]+1e-9),
1✔
1523
                          dtype=int)
1524
    lat_tiles = np.arange(np.floor(lat_ex[0]), np.ceil(lat_ex[1]+1e-9),
1✔
1525
                          dtype=int)
1526
    zones = []
1✔
1527
    for lon in lon_tiles:
1✔
1528
        for lat in lat_tiles:
1✔
1529
            zones.append(_aw3d30_path(lon, lat))
1✔
1530
    return list(sorted(set(zones)))
1✔
1531

1532

1533
def _extent_to_polygon(lon_ex, lat_ex, to_crs=None):
9✔
1534

1535
    if lon_ex[0] == lon_ex[1] and lat_ex[0] == lat_ex[1]:
1✔
1536
        out = shpg.Point(lon_ex[0], lat_ex[0])
1✔
1537
    else:
1538
        x = [lon_ex[0], lon_ex[1], lon_ex[1], lon_ex[0], lon_ex[0]]
1✔
1539
        y = [lat_ex[0], lat_ex[0], lat_ex[1], lat_ex[1], lat_ex[0]]
1✔
1540
        out = shpg.Polygon(np.array((x, y)).T)
1✔
1541
    if to_crs is not None:
1✔
1542
        out = salem.transform_geometry(out, to_crs=to_crs)
1✔
1543
    return out
1✔
1544

1545

1546
def arcticdem_zone(lon_ex, lat_ex):
9✔
1547
    """Returns a list of Arctic-DEM zones covering the desired extent.
1548
    """
1549

1550
    gdf = gpd.read_file(get_demo_file('ArcticDEM_Tile_Index_Rel7_by_tile.shp'))
1✔
1551
    p = _extent_to_polygon(lon_ex, lat_ex, to_crs=gdf.crs)
1✔
1552
    gdf = gdf.loc[gdf.intersects(p)]
1✔
1553
    return gdf.tile.values if len(gdf) > 0 else []
1✔
1554

1555

1556
def rema_zone(lon_ex, lat_ex):
9✔
1557
    """Returns a list of REMA-DEM zones covering the desired extent.
1558
    """
1559

1560
    gdf = gpd.read_file(get_demo_file('REMA_Tile_Index_Rel1.1.shp'))
1✔
1561
    p = _extent_to_polygon(lon_ex, lat_ex, to_crs=gdf.crs)
1✔
1562
    gdf = gdf.loc[gdf.intersects(p)]
1✔
1563
    return gdf.tile.values if len(gdf) > 0 else []
1✔
1564

1565

1566
def alaska_dem_zone(lon_ex, lat_ex):
9✔
1567
    """Returns a list of Alaska-DEM zones covering the desired extent.
1568
    """
1569

1570
    gdf = gpd.read_file(get_demo_file('Alaska_albers_V3_tiles.shp'))
1✔
1571
    p = _extent_to_polygon(lon_ex, lat_ex, to_crs=gdf.crs)
1✔
1572
    gdf = gdf.loc[gdf.intersects(p)]
1✔
1573
    return gdf.tile.values if len(gdf) > 0 else []
1✔
1574

1575

1576
def copdem_zone(lon_ex, lat_ex, source):
9✔
1577
    """Returns a list of Copernicus DEM tarfile and tilename tuples
1578
    """
1579

1580
    # because we use both meters and arc secs in our filenames...
1581
    if source[-2:] == '90':
1✔
1582
        asec = '30'
1✔
1583
    elif source[-2:] == '30':
1✔
1584
        asec = '10'
1✔
1585
    else:
1586
        raise InvalidDEMError('COPDEM Version not valid.')
×
1587

1588
    # either reuse or load lookup table
1589
    if source in cfg.DATA:
1✔
1590
        df = cfg.DATA[source]
1✔
1591
    else:
1592
        df = pd.read_csv(get_demo_file('{}_2022_1.csv'.format(source.lower())))
1✔
1593
        cfg.DATA[source] = df
1✔
1594

1595
    # adding small buffer for unlikely case where one lon/lat_ex == xx.0
1596
    lons = np.arange(np.floor(lon_ex[0]-1e-9), np.ceil(lon_ex[1]+1e-9))
1✔
1597
    lats = np.arange(np.floor(lat_ex[0]-1e-9), np.ceil(lat_ex[1]+1e-9))
1✔
1598

1599
    flist = []
1✔
1600
    for lat in lats:
1✔
1601
        # north or south?
1602
        ns = 'S' if lat < 0 else 'N'
1✔
1603
        for lon in lons:
1✔
1604
            # east or west?
1605
            ew = 'W' if lon < 0 else 'E'
1✔
1606
            lat_str = '{}{:02.0f}'.format(ns, abs(lat))
1✔
1607
            lon_str = '{}{:03.0f}'.format(ew, abs(lon))
1✔
1608
            try:
1✔
1609
                filename = df.loc[(df['Long'] == lon_str) &
1✔
1610
                                  (df['Lat'] == lat_str)]['CPP filename'].iloc[0]
1611
                flist.append((filename,
1✔
1612
                              'Copernicus_DSM_{}_{}_00_{}_00'.format(asec, lat_str, lon_str)))
1613
            except IndexError:
×
1614
                # COPDEM is global, if we miss tiles it is probably in the ocean
1615
                pass
×
1616
    return flist
1✔
1617

1618

1619
def dem3_viewpano_zone(lon_ex, lat_ex):
9✔
1620
    """Returns a list of DEM3 zones covering the desired extent.
1621

1622
    http://viewfinderpanoramas.org/Coverage%20map%20viewfinderpanoramas_org3.htm
1623
    """
1624

1625
    for _f in DEM3REG.keys():
1✔
1626

1627
        if (np.min(lon_ex) >= DEM3REG[_f][0]) and \
1✔
1628
           (np.max(lon_ex) <= DEM3REG[_f][1]) and \
1629
           (np.min(lat_ex) >= DEM3REG[_f][2]) and \
1630
           (np.max(lat_ex) <= DEM3REG[_f][3]):
1631

1632
            # test some weird inset files in Antarctica
1633
            if (np.min(lon_ex) >= -91.) and (np.max(lon_ex) <= -90.) and \
1✔
1634
               (np.min(lat_ex) >= -72.) and (np.max(lat_ex) <= -68.):
1635
                return ['SR15']
×
1636

1637
            elif (np.min(lon_ex) >= -47.) and (np.max(lon_ex) <= -43.) and \
1✔
1638
                 (np.min(lat_ex) >= -61.) and (np.max(lat_ex) <= -60.):
1639
                return ['SP23']
×
1640

1641
            elif (np.min(lon_ex) >= 162.) and (np.max(lon_ex) <= 165.) and \
1✔
1642
                 (np.min(lat_ex) >= -68.) and (np.max(lat_ex) <= -66.):
1643
                return ['SQ58']
×
1644

1645
            # test some rogue Greenland tiles as well
1646
            elif (np.min(lon_ex) >= -72.) and (np.max(lon_ex) <= -66.) and \
1✔
1647
                 (np.min(lat_ex) >= 76.) and (np.max(lat_ex) <= 80.):
1648
                return ['T19']
×
1649

1650
            elif (np.min(lon_ex) >= -72.) and (np.max(lon_ex) <= -66.) and \
1✔
1651
                 (np.min(lat_ex) >= 80.) and (np.max(lat_ex) <= 83.):
1652
                return ['U19']
×
1653

1654
            elif (np.min(lon_ex) >= -66.) and (np.max(lon_ex) <= -60.) and \
1✔
1655
                 (np.min(lat_ex) >= 80.) and (np.max(lat_ex) <= 83.):
1656
                return ['U20']
×
1657

1658
            elif (np.min(lon_ex) >= -60.) and (np.max(lon_ex) <= -54.) and \
1✔
1659
                 (np.min(lat_ex) >= 80.) and (np.max(lat_ex) <= 83.):
1660
                return ['U21']
×
1661

1662
            elif (np.min(lon_ex) >= -54.) and (np.max(lon_ex) <= -48.) and \
1✔
1663
                 (np.min(lat_ex) >= 80.) and (np.max(lat_ex) <= 83.):
1664
                return ['U22']
×
1665

1666
            elif (np.min(lon_ex) >= -25.) and (np.max(lon_ex) <= -13.) and \
1✔
1667
                 (np.min(lat_ex) >= 63.) and (np.max(lat_ex) <= 67.):
1668
                return ['ISL']
1✔
1669

1670
            else:
1671
                return [_f]
1✔
1672

1673
    # if the tile doesn't have a special name, its name can be found like this:
1674
    # corrected SRTMs are sorted in tiles of 6 deg longitude and 4 deg latitude
1675
    srtm_x0 = -180.
1✔
1676
    srtm_y0 = 0.
1✔
1677
    srtm_dx = 6.
1✔
1678
    srtm_dy = 4.
1✔
1679

1680
    # quick n dirty solution to be sure that we will cover the whole range
1681
    mi, ma = np.min(lon_ex), np.max(lon_ex)
1✔
1682
    # TODO: Fabien, find out what Johannes wanted with this +3
1683
    # +3 is just for the number to become still a bit larger
1684
    # int() to avoid Deprec warning
1685
    lon_ex = np.linspace(mi, ma, int(np.ceil((ma - mi) / srtm_dy) + 3))
1✔
1686
    mi, ma = np.min(lat_ex), np.max(lat_ex)
1✔
1687
    # int() to avoid Deprec warning
1688
    lat_ex = np.linspace(mi, ma, int(np.ceil((ma - mi) / srtm_dx) + 3))
1✔
1689

1690
    zones = []
1✔
1691
    for lon in lon_ex:
1✔
1692
        for lat in lat_ex:
1✔
1693
            dx = lon - srtm_x0
1✔
1694
            dy = lat - srtm_y0
1✔
1695
            zx = np.ceil(dx / srtm_dx)
1✔
1696
            # convert number to letter
1697
            zy = chr(int(abs(dy / srtm_dy)) + ord('A'))
1✔
1698
            if lat >= 0:
1✔
1699
                zones.append('%s%02.0f' % (zy, zx))
1✔
1700
            else:
1701
                zones.append('S%s%02.0f' % (zy, zx))
×
1702
    return list(sorted(set(zones)))
1✔
1703

1704

1705
def aster_zone(lon_ex, lat_ex):
9✔
1706
    """Returns a list of ASTGTMV3 zones covering the desired extent.
1707

1708
    ASTER v3 tiles are 1 degree x 1 degree
1709
    N50 contains 50 to 50.9
1710
    E10 contains 10 to 10.9
1711
    S70 contains -69.99 to -69.0
1712
    W20 contains -19.99 to -19.0
1713
    """
1714

1715
    # adding small buffer for unlikely case where one lon/lat_ex == xx.0
1716
    lons = np.arange(np.floor(lon_ex[0]-1e-9), np.ceil(lon_ex[1]+1e-9))
1✔
1717
    lats = np.arange(np.floor(lat_ex[0]-1e-9), np.ceil(lat_ex[1]+1e-9))
1✔
1718

1719
    zones = []
1✔
1720
    for lat in lats:
1✔
1721
        # north or south?
1722
        ns = 'S' if lat < 0 else 'N'
1✔
1723
        for lon in lons:
1✔
1724
            # east or west?
1725
            ew = 'W' if lon < 0 else 'E'
1✔
1726
            filename = 'ASTGTMV003_{}{:02.0f}{}{:03.0f}'.format(ns, abs(lat),
1✔
1727
                                                                ew, abs(lon))
1728
            zones.append(filename)
1✔
1729
    return list(sorted(set(zones)))
1✔
1730

1731

1732
def nasadem_zone(lon_ex, lat_ex):
9✔
1733
    """Returns a list of NASADEM zones covering the desired extent.
1734

1735
    NASADEM tiles are 1 degree x 1 degree
1736
    N50 contains 50 to 50.9
1737
    E10 contains 10 to 10.9
1738
    S70 contains -69.99 to -69.0
1739
    W20 contains -19.99 to -19.0
1740
    """
1741

1742
    # adding small buffer for unlikely case where one lon/lat_ex == xx.0
1743
    lons = np.arange(np.floor(lon_ex[0]-1e-9), np.ceil(lon_ex[1]+1e-9))
1✔
1744
    lats = np.arange(np.floor(lat_ex[0]-1e-9), np.ceil(lat_ex[1]+1e-9))
1✔
1745

1746
    zones = []
1✔
1747
    for lat in lats:
1✔
1748
        # north or south?
1749
        ns = 's' if lat < 0 else 'n'
1✔
1750
        for lon in lons:
1✔
1751
            # east or west?
1752
            ew = 'w' if lon < 0 else 'e'
1✔
1753
            filename = '{}{:02.0f}{}{:03.0f}'.format(ns, abs(lat), ew,
1✔
1754
                                                     abs(lon))
1755
            zones.append(filename)
1✔
1756
    return list(sorted(set(zones)))
1✔
1757

1758

1759
def mapzen_zone(lon_ex, lat_ex, dx_meter=None, zoom=None):
9✔
1760
    """Returns a list of AWS mapzen zones covering the desired extent.
1761

1762
    For mapzen one has to specify the level of detail (zoom) one wants. The
1763
    best way in OGGM is to specify dx_meter of the underlying map and OGGM
1764
    will decide which zoom level works best.
1765
    """
1766

1767
    if dx_meter is None and zoom is None:
1✔
1768
        raise InvalidParamsError('Need either zoom level or dx_meter.')
×
1769

1770
    bottom, top = lat_ex
1✔
1771
    left, right = lon_ex
1✔
1772
    ybound = 85.0511
1✔
1773
    if bottom <= -ybound:
1✔
1774
        bottom = -ybound
1✔
1775
    if top <= -ybound:
1✔
1776
        top = -ybound
1✔
1777
    if bottom > ybound:
1✔
1778
        bottom = ybound
1✔
1779
    if top > ybound:
1✔
1780
        top = ybound
1✔
1781
    if right >= 180:
1✔
1782
        right = 179.999
1✔
1783
    if left >= 180:
1✔
1784
        left = 179.999
1✔
1785

1786
    if dx_meter:
1✔
1787
        # Find out the zoom so that we are close to the desired accuracy
1788
        lat = np.max(np.abs([bottom, top]))
1✔
1789
        zoom = int(np.ceil(math.log2((math.cos(lat * math.pi / 180) *
1✔
1790
                                      2 * math.pi * WEB_EARTH_RADUIS) /
1791
                                     (WEB_N_PIX * dx_meter))))
1792

1793
        # According to this we should just always stay above 10 (sorry)
1794
        # https://github.com/tilezen/joerd/blob/master/docs/data-sources.md
1795
        zoom = 10 if zoom < 10 else zoom
1✔
1796

1797
    # Code from planetutils
1798
    size = 2 ** zoom
1✔
1799
    xt = lambda x: int((x + 180.0) / 360.0 * size)
1✔
1800
    yt = lambda y: int((1.0 - math.log(math.tan(math.radians(y)) +
1✔
1801
                                       (1 / math.cos(math.radians(y))))
1802
                        / math.pi) / 2.0 * size)
1803
    tiles = []
1✔
1804
    for x in range(xt(left), xt(right) + 1):
1✔
1805
        for y in range(yt(top), yt(bottom) + 1):
1✔
1806
            tiles.append('/'.join(map(str, [zoom, x, str(y) + '.tif'])))
1✔
1807
    return tiles
1✔
1808

1809

1810
def get_demo_file(fname):
9✔
1811
    """Returns the path to the desired OGGM-sample-file.
1812

1813
    If Sample data is not cached it will be downloaded from
1814
    https://github.com/OGGM/oggm-sample-data
1815

1816
    Parameters
1817
    ----------
1818
    fname : str
1819
        Filename of the desired OGGM-sample-file
1820

1821
    Returns
1822
    -------
1823
    str
1824
        Absolute path to the desired file.
1825
    """
1826

1827
    d = download_oggm_files()
7✔
1828
    if fname in d:
7✔
1829
        return d[fname]
7✔
1830
    else:
1831
        return None
1✔
1832

1833

1834
def get_wgms_files():
9✔
1835
    """Get the path to the default WGMS-RGI link file and the data dir.
1836

1837
    Returns
1838
    -------
1839
    (file, dir) : paths to the files
1840
    """
1841

1842
    download_oggm_files()
4✔
1843
    sdir = os.path.join(cfg.CACHE_DIR,
4✔
1844
                        'oggm-sample-data-%s' % SAMPLE_DATA_COMMIT,
1845
                        'wgms')
1846
    datadir = os.path.join(sdir, 'mbdata')
4✔
1847
    assert os.path.exists(datadir)
4✔
1848

1849
    outf = os.path.join(sdir, 'rgi_wgms_links_20220112.csv')
4✔
1850
    outf = pd.read_csv(outf, dtype={'RGI_REG': object})
4✔
1851

1852
    return outf, datadir
4✔
1853

1854

1855
def get_glathida_file():
9✔
1856
    """Get the path to the default GlaThiDa-RGI link file.
1857

1858
    Returns
1859
    -------
1860
    file : paths to the file
1861
    """
1862

1863
    # Roll our own
1864
    download_oggm_files()
1✔
1865
    sdir = os.path.join(cfg.CACHE_DIR,
1✔
1866
                        'oggm-sample-data-%s' % SAMPLE_DATA_COMMIT,
1867
                        'glathida')
1868
    outf = os.path.join(sdir, 'rgi_glathida_links.csv')
1✔
1869
    assert os.path.exists(outf)
1✔
1870
    return outf
1✔
1871

1872

1873
def get_rgi_dir(version=None, reset=False):
9✔
1874
    """Path to the RGI directory.
1875

1876
    If the RGI files are not present, download them.
1877

1878
    Parameters
1879
    ----------
1880
    version : str
1881
        '5', '6', defaults to None (linking to the one specified in cfg.PARAMS)
1882
    reset : bool
1883
        If True, deletes the RGI directory first and downloads the data
1884

1885
    Returns
1886
    -------
1887
    str
1888
        path to the RGI directory
1889
    """
1890

1891
    with get_lock():
1✔
1892
        return _get_rgi_dir_unlocked(version=version, reset=reset)
1✔
1893

1894

1895
def _get_rgi_dir_unlocked(version=None, reset=False):
9✔
1896

1897
    rgi_dir = cfg.PATHS['rgi_dir']
1✔
1898
    if version is None:
1✔
1899
        version = cfg.PARAMS['rgi_version']
×
1900

1901
    if len(version) == 1:
1✔
1902
        version += '0'
1✔
1903

1904
    # Be sure the user gave a sensible path to the RGI dir
1905
    if not rgi_dir:
1✔
1906
        raise InvalidParamsError('The RGI data directory has to be'
×
1907
                                 'specified explicitly.')
1908
    rgi_dir = os.path.abspath(os.path.expanduser(rgi_dir))
1✔
1909
    rgi_dir = os.path.join(rgi_dir, 'RGIV' + version)
1✔
1910
    mkdir(rgi_dir, reset=reset)
1✔
1911

1912
    if version == '50':
1✔
1913
        dfile = 'http://www.glims.org/RGI/rgi50_files/rgi50.zip'
1✔
1914
    elif version == '60':
1✔
1915
        dfile = 'http://www.glims.org/RGI/rgi60_files/00_rgi60.zip'
1✔
1916
    elif version == '61':
×
1917
        dfile = 'https://cluster.klima.uni-bremen.de/data/rgi/rgi_61.zip'
×
1918
    elif version == '62':
×
1919
        dfile = 'https://cluster.klima.uni-bremen.de/~oggm/rgi/rgi62.zip'
×
1920

1921
    test_file = os.path.join(rgi_dir,
1✔
1922
                             '*_rgi*{}_manifest.txt'.format(version))
1923

1924
    if len(glob.glob(test_file)) == 0:
1✔
1925
        # if not there download it
1926
        ofile = file_downloader(dfile, reset=reset)
1✔
1927
        # Extract root
1928
        with zipfile.ZipFile(ofile) as zf:
1✔
1929
            zf.extractall(rgi_dir)
1✔
1930
        # Extract subdirs
1931
        pattern = '*_rgi{}_*.zip'.format(version)
1✔
1932
        for root, dirs, files in os.walk(cfg.PATHS['rgi_dir']):
1✔
1933
            for filename in fnmatch.filter(files, pattern):
1✔
1934
                zfile = os.path.join(root, filename)
1✔
1935
                with zipfile.ZipFile(zfile) as zf:
1✔
1936
                    ex_root = zfile.replace('.zip', '')
1✔
1937
                    mkdir(ex_root)
1✔
1938
                    zf.extractall(ex_root)
1✔
1939
                # delete the zipfile after success
1940
                os.remove(zfile)
1✔
1941
        if len(glob.glob(test_file)) == 0:
1✔
1942
            raise RuntimeError('Could not find a manifest file in the RGI '
×
1943
                               'directory: ' + rgi_dir)
1944
    return rgi_dir
1✔
1945

1946

1947
def get_rgi_region_file(region, version=None, reset=False):
9✔
1948
    """Path to the RGI region file.
1949

1950
    If the RGI files are not present, download them.
1951

1952
    Parameters
1953
    ----------
1954
    region : str
1955
        from '01' to '19'
1956
    version : str
1957
        '5', '6', defaults to None (linking to the one specified in cfg.PARAMS)
1958
    reset : bool
1959
        If True, deletes the RGI directory first and downloads the data
1960

1961
    Returns
1962
    -------
1963
    str
1964
        path to the RGI shapefile
1965
    """
1966

1967
    rgi_dir = get_rgi_dir(version=version, reset=reset)
1✔
1968
    f = list(glob.glob(rgi_dir + "/*/*{}_*.shp".format(region)))
1✔
1969
    assert len(f) == 1
1✔
1970
    return f[0]
1✔
1971

1972

1973
def get_rgi_glacier_entities(rgi_ids, version=None):
9✔
1974
    """Get a list of glacier outlines selected from their RGI IDs.
1975

1976
    Will download RGI data if not present.
1977

1978
    Parameters
1979
    ----------
1980
    rgi_ids : list of str
1981
        the glaciers you want the outlines for
1982
    version : str
1983
        the rgi version
1984

1985
    Returns
1986
    -------
1987
    geopandas.GeoDataFrame
1988
        containing the desired RGI glacier outlines
1989
    """
1990

1991
    regions = [s.split('-')[1].split('.')[0] for s in rgi_ids]
×
1992
    if version is None:
×
1993
        version = rgi_ids[0].split('-')[0][-2:]
×
1994
    selection = []
×
1995
    for reg in sorted(np.unique(regions)):
×
1996
        sh = gpd.read_file(get_rgi_region_file(reg, version=version))
×
1997
        selection.append(sh.loc[sh.RGIId.isin(rgi_ids)])
×
1998

1999
    # Make a new dataframe of those
2000
    selection = pd.concat(selection)
×
2001
    selection.crs = sh.crs  # for geolocalisation
×
2002
    if len(selection) != len(rgi_ids):
×
2003
        raise RuntimeError('Could not find all RGI ids')
×
2004

2005
    return selection
×
2006

2007

2008
def get_rgi_intersects_dir(version=None, reset=False):
9✔
2009
    """Path to the RGI directory containing the intersect files.
2010

2011
    If the files are not present, download them.
2012

2013
    Parameters
2014
    ----------
2015
    version : str
2016
        '5', '6', defaults to None (linking to the one specified in cfg.PARAMS)
2017
    reset : bool
2018
        If True, deletes the intersects before redownloading them
2019

2020
    Returns
2021
    -------
2022
    str
2023
        path to the directory
2024
    """
2025

2026
    with get_lock():
1✔
2027
        return _get_rgi_intersects_dir_unlocked(version=version, reset=reset)
1✔
2028

2029

2030
def _get_rgi_intersects_dir_unlocked(version=None, reset=False):
9✔
2031

2032
    rgi_dir = cfg.PATHS['rgi_dir']
1✔
2033
    if version is None:
1✔
2034
        version = cfg.PARAMS['rgi_version']
×
2035

2036
    if len(version) == 1:
1✔
2037
        version += '0'
1✔
2038

2039
    # Be sure the user gave a sensible path to the RGI dir
2040
    if not rgi_dir:
1✔
2041
        raise InvalidParamsError('The RGI data directory has to be'
×
2042
                                 'specified explicitly.')
2043

2044
    rgi_dir = os.path.abspath(os.path.expanduser(rgi_dir))
1✔
2045
    mkdir(rgi_dir)
1✔
2046

2047
    dfile = 'https://cluster.klima.uni-bremen.de/data/rgi/'
1✔
2048
    dfile += 'RGI_V{}_Intersects.zip'.format(version)
1✔
2049
    if version == '62':
1✔
2050
        dfile = ('https://cluster.klima.uni-bremen.de/~oggm/rgi/'
×
2051
                 'rgi62_Intersects.zip')
2052

2053
    odir = os.path.join(rgi_dir, 'RGI_V' + version + '_Intersects')
1✔
2054
    if reset and os.path.exists(odir):
1✔
2055
        shutil.rmtree(odir)
×
2056

2057
    # A lot of code for backwards compat (sigh...)
2058
    if version in ['50', '60']:
1✔
2059
        test_file = os.path.join(odir, 'Intersects_OGGM_Manifest.txt')
1✔
2060
        if not os.path.exists(test_file):
1✔
2061
            # if not there download it
2062
            ofile = file_downloader(dfile, reset=reset)
1✔
2063
            # Extract root
2064
            with zipfile.ZipFile(ofile) as zf:
1✔
2065
                zf.extractall(odir)
1✔
2066
            if not os.path.exists(test_file):
1✔
2067
                raise RuntimeError('Could not find a manifest file in the RGI '
×
2068
                                   'directory: ' + odir)
2069
    else:
2070
        test_file = os.path.join(odir,
×
2071
                                 '*ntersect*anifest.txt'.format(version))
2072
        if len(glob.glob(test_file)) == 0:
×
2073
            # if not there download it
2074
            ofile = file_downloader(dfile, reset=reset)
×
2075
            # Extract root
2076
            with zipfile.ZipFile(ofile) as zf:
×
2077
                zf.extractall(odir)
×
2078
            # Extract subdirs
2079
            pattern = '*_rgi{}_*.zip'.format(version)
×
2080
            for root, dirs, files in os.walk(cfg.PATHS['rgi_dir']):
×
2081
                for filename in fnmatch.filter(files, pattern):
×
2082
                    zfile = os.path.join(root, filename)
×
2083
                    with zipfile.ZipFile(zfile) as zf:
×
2084
                        ex_root = zfile.replace('.zip', '')
×
2085
                        mkdir(ex_root)
×
2086
                        zf.extractall(ex_root)
×
2087
                    # delete the zipfile after success
2088
                    os.remove(zfile)
×
2089
            if len(glob.glob(test_file)) == 0:
×
2090
                raise RuntimeError('Could not find a manifest file in the RGI '
×
2091
                                   'directory: ' + odir)
2092

2093
    return odir
1✔
2094

2095

2096
def get_rgi_intersects_region_file(region=None, version=None, reset=False):
9✔
2097
    """Path to the RGI regional intersect file.
2098

2099
    If the RGI files are not present, download them.
2100

2101
    Parameters
2102
    ----------
2103
    region : str
2104
        from '00' to '19', with '00' being the global file (deprecated).
2105
        From RGI version '61' onwards, please use `get_rgi_intersects_entities`
2106
        with a list of glaciers instead of relying to the global file.
2107
    version : str
2108
        '5', '6', '61'... defaults the one specified in cfg.PARAMS
2109
    reset : bool
2110
        If True, deletes the intersect file before redownloading it
2111

2112
    Returns
2113
    -------
2114
    str
2115
        path to the RGI intersects shapefile
2116
    """
2117

2118
    if version is None:
1✔
2119
        version = cfg.PARAMS['rgi_version']
×
2120
    if len(version) == 1:
1✔
2121
        version += '0'
1✔
2122

2123
    rgi_dir = get_rgi_intersects_dir(version=version, reset=reset)
1✔
2124

2125
    if region == '00':
1✔
2126
        if version in ['50', '60']:
1✔
2127
            version = 'AllRegs'
1✔
2128
            region = '*'
1✔
2129
        else:
2130
            raise InvalidParamsError("From RGI version 61 onwards, please use "
×
2131
                                     "get_rgi_intersects_entities() instead.")
2132
    f = list(glob.glob(os.path.join(rgi_dir, "*", '*intersects*' + region +
1✔
2133
                                    '_rgi*' + version + '*.shp')))
2134
    assert len(f) == 1
1✔
2135
    return f[0]
1✔
2136

2137

2138
def get_rgi_intersects_entities(rgi_ids, version=None):
9✔
2139
    """Get a list of glacier intersects selected from their RGI IDs.
2140

2141
    Parameters
2142
    ----------
2143
    rgi_ids: list of str
2144
        list of rgi_ids you want to look for intersections for
2145
    version: str
2146
        '5', '6', '61'... defaults the one specified in cfg.PARAMS
2147

2148
    Returns
2149
    -------
2150
    geopandas.GeoDataFrame
2151
        with the selected intersects
2152
    """
2153

2154
    if version is None:
×
2155
        version = cfg.PARAMS['rgi_version']
×
2156
    if len(version) == 1:
×
2157
        version += '0'
×
2158
    try:
×
2159
        regions = [s.split('-')[3] for s in rgi_ids]
×
2160

2161
    except IndexError:
×
2162
        # RGI V6
2163
        regions = [s.split('-')[1].split('.')[0] for s in rgi_ids]
×
2164
    selection = []
×
2165
    for reg in sorted(np.unique(regions)):
×
2166
        sh = gpd.read_file(get_rgi_intersects_region_file(reg,
×
2167
                                                          version=version))
2168
        selection.append(sh.loc[sh.RGIId_1.isin(rgi_ids) |
×
2169
                                sh.RGIId_2.isin(rgi_ids)])
2170

2171
    # Make a new dataframe of those
2172
    selection = pd.concat(selection)
×
2173
    selection.crs = sh.crs  # for geolocalisation
×
2174

2175
    return selection
×
2176

2177

2178
def is_dem_source_available(source, lon_ex, lat_ex):
9✔
2179
    """Checks if a DEM source is available for your purpose.
2180

2181
    This is only a very rough check! It doesn't mean that the data really is
2182
    available, but at least it's worth a try.
2183

2184
    Parameters
2185
    ----------
2186
    source : str, required
2187
        the source you want to check for
2188
    lon_ex : tuple or int, required
2189
        a (min_lon, max_lon) tuple delimiting the requested area longitudes
2190
    lat_ex : tuple or int, required
2191
        a (min_lat, max_lat) tuple delimiting the requested area latitudes
2192

2193
    Returns
2194
    -------
2195
    True or False
2196
    """
2197
    from oggm.utils import tolist
7✔
2198
    lon_ex = tolist(lon_ex, length=2)
7✔
2199
    lat_ex = tolist(lat_ex, length=2)
7✔
2200

2201
    def _in_grid(grid_json, lon, lat):
7✔
2202
        i, j = cfg.DATA['dem_grids'][grid_json].transform(lon, lat,
1✔
2203
                                                          maskout=True)
2204
        return np.all(~ (i.mask | j.mask))
1✔
2205

2206
    if source == 'GIMP':
7✔
2207
        return _in_grid('gimpdem_90m_v01.1.json', lon_ex, lat_ex)
1✔
2208
    elif source == 'ARCTICDEM':
7✔
2209
        return _in_grid('arcticdem_mosaic_100m_v3.0.json', lon_ex, lat_ex)
1✔
2210
    elif source == 'RAMP':
7✔
2211
        return _in_grid('AntarcticDEM_wgs84.json', lon_ex, lat_ex)
1✔
2212
    elif source == 'REMA':
7✔
2213
        return _in_grid('REMA_100m_dem.json', lon_ex, lat_ex)
1✔
2214
    elif source == 'ALASKA':
7✔
2215
        return _in_grid('Alaska_albers_V3.json', lon_ex, lat_ex)
×
2216
    elif source == 'TANDEM':
7✔
2217
        return True
1✔
2218
    elif source == 'AW3D30':
7✔
2219
        return np.min(lat_ex) > -60
1✔
2220
    elif source == 'MAPZEN':
7✔
2221
        return True
1✔
2222
    elif source == 'DEM3':
7✔
2223
        return True
1✔
2224
    elif source == 'ASTER':
7✔
2225
        return True
1✔
2226
    elif source == 'SRTM':
7✔
2227
        return np.max(np.abs(lat_ex)) < 60
1✔
2228
    elif source in ['COPDEM30', 'COPDEM90']:
7✔
2229
        return True
×
2230
    elif source == 'NASADEM':
7✔
2231
        return (np.min(lat_ex) > -56) and (np.max(lat_ex) < 60)
×
2232
    elif source == 'USER':
7✔
2233
        return True
×
2234
    elif source is None:
7✔
2235
        return True
7✔
2236

2237

2238
def default_dem_source(rgi_id):
9✔
2239
    """Current default DEM source at a given location.
2240

2241
    Parameters
2242
    ----------
2243
    rgi_id : str
2244
        the RGI id
2245

2246
    Returns
2247
    -------
2248
    the chosen DEM source
2249
    """
2250
    rgi_reg = 'RGI{}'.format(rgi_id[6:8])
1✔
2251
    rgi_id = rgi_id[:14]
1✔
2252
    if cfg.DEM_SOURCE_TABLE.get(rgi_reg) is None:
1✔
2253
        fp = get_demo_file('rgi62_dem_frac.h5')
1✔
2254
        cfg.DEM_SOURCE_TABLE[rgi_reg] = pd.read_hdf(fp)
1✔
2255

2256
    sel = cfg.DEM_SOURCE_TABLE[rgi_reg].loc[rgi_id]
1✔
2257
    for s in ['NASADEM', 'COPDEM90', 'COPDEM30', 'GIMP', 'REMA',
1✔
2258
              'RAMP', 'TANDEM', 'MAPZEN']:
2259
        if sel.loc[s] > 0.75:
1✔
2260
            return s
1✔
2261
    # If nothing works, try COPDEM again
2262
    return 'COPDEM90'
×
2263

2264

2265
def get_topo_file(lon_ex=None, lat_ex=None, rgi_id=None, *,
9✔
2266
                  dx_meter=None, zoom=None, source=None):
2267
    """Path(s) to the DEM file(s) covering the desired extent.
2268

2269
    If the needed files for covering the extent are not present, download them.
2270

2271
    The default behavior is to try a list of DEM sources in order, and
2272
    stop once the downloaded data is covering a large enough part of the
2273
    glacier. The DEM sources are tested in the following order:
2274

2275
    'NASADEM' -> 'COPDEM' -> 'GIMP' -> 'REMA' -> 'TANDEM' -> 'MAPZEN'
2276

2277
    To force usage of a certain data source, use the ``source`` kwarg argument.
2278

2279
    Parameters
2280
    ----------
2281
    lon_ex : tuple or int, required
2282
        a (min_lon, max_lon) tuple delimiting the requested area longitudes
2283
    lat_ex : tuple or int, required
2284
        a (min_lat, max_lat) tuple delimiting the requested area latitudes
2285
    rgi_id : str, required if source=None
2286
        the glacier id, used to decide on the DEM source
2287
    rgi_region : str, optional
2288
        the RGI region number (required for the GIMP DEM)
2289
    rgi_subregion : str, optional
2290
        the RGI subregion str (useful for RGI Reg 19)
2291
    dx_meter : float, required for source='MAPZEN'
2292
        the resolution of the glacier map (to decide the zoom level of mapzen)
2293
    zoom : int, optional
2294
        if you know the zoom already (for MAPZEN only)
2295
    source : str or list of str, optional
2296
        Name of specific DEM source. see utils.DEM_SOURCES for a list
2297

2298
    Returns
2299
    -------
2300
    tuple: (list with path(s) to the DEM file(s), data source str)
2301
    """
2302
    from oggm.utils import tolist
7✔
2303
    lon_ex = tolist(lon_ex, length=2)
7✔
2304
    lat_ex = tolist(lat_ex, length=2)
7✔
2305

2306
    if source is not None and not isinstance(source, str):
7✔
2307
        # check all user options
2308
        for s in source:
×
2309
            demf, source_str = get_topo_file(lon_ex=lon_ex, lat_ex=lat_ex,
×
2310
                                             rgi_id=rgi_id, source=s)
2311
            if demf[0]:
×
2312
                return demf, source_str
×
2313

2314
    # Did the user specify a specific DEM file?
2315
    if 'dem_file' in cfg.PATHS and os.path.isfile(cfg.PATHS['dem_file']):
7✔
2316
        source = 'USER' if source is None else source
7✔
2317
        if source == 'USER':
7✔
2318
            return [cfg.PATHS['dem_file']], source
7✔
2319

2320
    # Some logic to decide which source to take if unspecified
2321
    if source is None:
1✔
2322
        if rgi_id is None:
×
2323
            raise InvalidParamsError('rgi_id is needed if source=None')
×
2324
        source = default_dem_source(rgi_id)
×
2325

2326
    if source not in DEM_SOURCES:
1✔
2327
        raise InvalidParamsError('`source` must be one of '
×
2328
                                 '{}'.format(DEM_SOURCES))
2329

2330
    # OK go
2331
    files = []
1✔
2332
    if source == 'GIMP':
1✔
2333
        _file = _download_topo_file_from_cluster('gimpdem_90m_v01.1.tif')
1✔
2334
        files.append(_file)
1✔
2335

2336
    if source == 'ARCTICDEM':
1✔
2337
        zones = arcticdem_zone(lon_ex, lat_ex)
1✔
2338
        for z in zones:
1✔
2339
            with get_lock():
1✔
2340
                url = 'https://cluster.klima.uni-bremen.de/~oggm/'
1✔
2341
                url += 'dem/ArcticDEM_100m_v3.0/'
1✔
2342
                url += '{}_100m_v3.0/{}_100m_v3.0_reg_dem.tif'.format(z, z)
1✔
2343
                files.append(file_downloader(url))
1✔
2344

2345
    if source == 'RAMP':
1✔
2346
        _file = _download_topo_file_from_cluster('AntarcticDEM_wgs84.tif')
1✔
2347
        files.append(_file)
1✔
2348

2349
    if source == 'ALASKA':
1✔
2350
        zones = alaska_dem_zone(lon_ex, lat_ex)
1✔
2351
        for z in zones:
1✔
2352
            with get_lock():
1✔
2353
                url = 'https://cluster.klima.uni-bremen.de/~oggm/'
1✔
2354
                url += 'dem/Alaska_albers_V3/'
1✔
2355
                url += '{}_Alaska_albers_V3/'.format(z)
1✔
2356
                url += '{}_Alaska_albers_V3.tif'.format(z)
1✔
2357
                files.append(file_downloader(url))
1✔
2358

2359
    if source == 'REMA':
1✔
2360
        zones = rema_zone(lon_ex, lat_ex)
1✔
2361
        for z in zones:
1✔
2362
            with get_lock():
1✔
2363
                url = 'https://cluster.klima.uni-bremen.de/~oggm/'
1✔
2364
                url += 'dem/REMA_100m_v1.1/'
1✔
2365
                url += '{}_100m_v1.1/{}_100m_v1.1_reg_dem.tif'.format(z, z)
1✔
2366
                files.append(file_downloader(url))
1✔
2367

2368
    if source == 'TANDEM':
1✔
2369
        zones = tandem_zone(lon_ex, lat_ex)
1✔
2370
        for z in zones:
1✔
2371
            files.append(_download_tandem_file(z))
1✔
2372

2373
    if source == 'AW3D30':
1✔
2374
        zones = aw3d30_zone(lon_ex, lat_ex)
1✔
2375
        for z in zones:
1✔
2376
            files.append(_download_aw3d30_file(z))
1✔
2377

2378
    if source == 'MAPZEN':
1✔
2379
        zones = mapzen_zone(lon_ex, lat_ex, dx_meter=dx_meter, zoom=zoom)
1✔
2380
        for z in zones:
1✔
2381
            files.append(_download_mapzen_file(z))
1✔
2382

2383
    if source == 'ASTER':
1✔
2384
        zones = aster_zone(lon_ex, lat_ex)
1✔
2385
        for z in zones:
1✔
2386
            files.append(_download_aster_file(z))
1✔
2387

2388
    if source == 'DEM3':
1✔
2389
        zones = dem3_viewpano_zone(lon_ex, lat_ex)
1✔
2390
        for z in zones:
1✔
2391
            files.append(_download_dem3_viewpano(z))
1✔
2392

2393
    if source == 'SRTM':
1✔
2394
        zones = srtm_zone(lon_ex, lat_ex)
1✔
2395
        for z in zones:
1✔
2396
            files.append(_download_srtm_file(z))
1✔
2397

2398
    if source in ['COPDEM30', 'COPDEM90']:
1✔
2399
        filetuple = copdem_zone(lon_ex, lat_ex, source)
1✔
2400
        for cpp, eop in filetuple:
1✔
2401
            files.append(_download_copdem_file(cpp, eop, source))
1✔
2402

2403
    if source == 'NASADEM':
1✔
2404
        zones = nasadem_zone(lon_ex, lat_ex)
1✔
2405
        for z in zones:
1✔
2406
            files.append(_download_nasadem_file(z))
1✔
2407

2408
    # filter for None (e.g. oceans)
2409
    files = [s for s in files if s]
1✔
2410
    if files:
1✔
2411
        return files, source
1✔
2412
    else:
2413
        raise InvalidDEMError('Source: {2} no topography file available for '
×
2414
                              'extent lat:{0}, lon:{1}!'.
2415
                              format(lat_ex, lon_ex, source))
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc