• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OGGM / oggm / 4510759150

pending completion
4510759150

Pull #1549

github

GitHub
Merge f8c800840 into 71e58edf5
Pull Request #1549: Update test image

11167 of 13069 relevant lines covered (85.45%)

3.56 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.71
/oggm/utils/_workflow.py
1
"""Classes and functions used by the OGGM workflow"""
2

3
# Builtins
4
import glob
9✔
5
import os
9✔
6
import tempfile
9✔
7
import gzip
9✔
8
import json
9✔
9
import time
9✔
10
import random
9✔
11
import shutil
9✔
12
import tarfile
9✔
13
import sys
9✔
14
import signal
9✔
15
import datetime
9✔
16
import logging
9✔
17
import pickle
9✔
18
import warnings
9✔
19
import itertools
9✔
20
from collections import OrderedDict
9✔
21
from functools import partial, wraps
9✔
22
from time import gmtime, strftime
9✔
23
import fnmatch
9✔
24
import platform
9✔
25
import struct
9✔
26
import importlib
9✔
27
import re as regexp
9✔
28

29
# External libs
30
import pandas as pd
9✔
31
import numpy as np
9✔
32
from scipy import stats
9✔
33
import xarray as xr
9✔
34
import shapely.geometry as shpg
9✔
35
import shapely.affinity as shpa
9✔
36
from shapely.ops import transform as shp_trafo
9✔
37
import netCDF4
9✔
38

39
# Optional libs
40
try:
9✔
41
    import geopandas as gpd
9✔
42
except ImportError:
43
    pass
44
try:
9✔
45
    import salem
9✔
46
except ImportError:
47
    pass
48
try:
9✔
49
    from salem import wgs84
9✔
50
    from salem.gis import transform_proj
9✔
51
except ImportError:
52
    pass
53
try:
9✔
54
    import pyproj
9✔
55
except ImportError:
56
    pass
57

58

59
# Locals
60
from oggm import __version__
9✔
61
from oggm.utils._funcs import (calendardate_to_hydrodate, date_to_floatyear,
9✔
62
                               tolist, filter_rgi_name, parse_rgi_meta,
63
                               haversine, multipolygon_to_polygon, clip_scalar)
64
from oggm.utils._downloads import (get_demo_file, get_wgms_files,
9✔
65
                                   get_rgi_glacier_entities)
66
from oggm import cfg
9✔
67
from oggm.exceptions import InvalidParamsError, InvalidWorkflowError
9✔
68

69

70
# Default RGI date (median per region in RGI6)
71
RGI_DATE = {'01': 2009,
9✔
72
            '02': 2004,
73
            '03': 1999,
74
            '04': 2001,
75
            '05': 2001,
76
            '06': 2000,
77
            '07': 2008,
78
            '08': 2002,
79
            '09': 2001,
80
            '10': 2011,
81
            '11': 2003,
82
            '12': 2001,
83
            '13': 2006,
84
            '14': 2001,
85
            '15': 2001,
86
            '16': 2000,
87
            '17': 2000,
88
            '18': 1978,
89
            '19': 1989,
90
            }
91

92
# Module logger
93
log = logging.getLogger('.'.join(__name__.split('.')[:-1]))
9✔
94

95

96
def empty_cache():
9✔
97
    """Empty oggm's cache directory."""
98

99
    if os.path.exists(cfg.CACHE_DIR):
×
100
        shutil.rmtree(cfg.CACHE_DIR)
×
101
    os.makedirs(cfg.CACHE_DIR)
×
102

103

104
def expand_path(p):
9✔
105
    """Helper function for os.path.expanduser and os.path.expandvars"""
106

107
    return os.path.expandvars(os.path.expanduser(p))
×
108

109

110
def gettempdir(dirname='', reset=False, home=False):
9✔
111
    """Get a temporary directory.
112

113
    The default is to locate it in the system's temporary directory as
114
    given by python's `tempfile.gettempdir()/OGGM'. You can set `home=True` for
115
    a directory in the user's `home/tmp` folder instead (this isn't really
116
    a temporary folder but well...)
117

118
    Parameters
119
    ----------
120
    dirname : str
121
        if you want to give it a name
122
    reset : bool
123
        if it has to be emptied first.
124
    home : bool
125
        if True, returns `HOME/tmp/OGGM` instead
126

127
    Returns
128
    -------
129
    the path to the temporary directory
130
    """
131

132
    basedir = (os.path.join(os.path.expanduser('~'), 'tmp') if home
1✔
133
               else tempfile.gettempdir())
134
    return mkdir(os.path.join(basedir, 'OGGM', dirname), reset=reset)
1✔
135

136

137
# alias
138
get_temp_dir = gettempdir
9✔
139

140

141
def get_sys_info():
9✔
142
    """Returns system information as a list of tuples"""
143

144
    blob = []
8✔
145
    try:
8✔
146
        (sysname, nodename, release,
8✔
147
         version, machine, processor) = platform.uname()
148
        blob.extend([
8✔
149
            ("python", "%d.%d.%d.%s.%s" % sys.version_info[:]),
150
            ("python-bits", struct.calcsize("P") * 8),
151
            ("OS", "%s" % (sysname)),
152
            ("OS-release", "%s" % (release)),
153
            ("machine", "%s" % (machine)),
154
            ("processor", "%s" % (processor)),
155
        ])
156
    except BaseException:
×
157
        pass
×
158

159
    return blob
8✔
160

161

162
def get_env_info():
9✔
163
    """Returns env information as a list of tuples"""
164

165
    deps = [
8✔
166
        # (MODULE_NAME, f(mod) -> mod version)
167
        ("oggm", lambda mod: mod.__version__),
168
        ("numpy", lambda mod: mod.__version__),
169
        ("scipy", lambda mod: mod.__version__),
170
        ("pandas", lambda mod: mod.__version__),
171
        ("geopandas", lambda mod: mod.__version__),
172
        ("netCDF4", lambda mod: mod.__version__),
173
        ("matplotlib", lambda mod: mod.__version__),
174
        ("rasterio", lambda mod: mod.__version__),
175
        ("fiona", lambda mod: mod.__version__),
176
        ("pyproj", lambda mod: mod.__version__),
177
        ("shapely", lambda mod: mod.__version__),
178
        ("xarray", lambda mod: mod.__version__),
179
        ("dask", lambda mod: mod.__version__),
180
        ("salem", lambda mod: mod.__version__),
181
    ]
182

183
    deps_blob = list()
8✔
184
    for (modname, ver_f) in deps:
8✔
185
        try:
8✔
186
            if modname in sys.modules:
8✔
187
                mod = sys.modules[modname]
8✔
188
            else:
189
                mod = importlib.import_module(modname)
×
190
            ver = ver_f(mod)
8✔
191
            deps_blob.append((modname, ver))
8✔
192
        except BaseException:
×
193
            deps_blob.append((modname, None))
×
194

195
    return deps_blob
8✔
196

197

198
def get_git_ident():
9✔
199
    ident_str = '$Id$'
8✔
200
    if ":" not in ident_str:
8✔
201
        return 'no_git_id'
×
202
    return ident_str.replace("$", "").replace("Id:", "").replace(" ", "")
8✔
203

204

205
def show_versions(logger=None):
9✔
206
    """Prints the OGGM version and other system information.
207

208
    Parameters
209
    ----------
210
    logger : optional
211
        the logger you want to send the printouts to. If None, will use stdout
212

213
    Returns
214
    -------
215
    the output string
216
    """
217

218
    sys_info = get_sys_info()
1✔
219
    deps_blob = get_env_info()
1✔
220

221
    out = ['# OGGM environment: ']
1✔
222
    out.append("## System info:")
1✔
223
    for k, stat in sys_info:
1✔
224
        out.append("    %s: %s" % (k, stat))
1✔
225
    out.append("## Packages info:")
1✔
226
    for k, stat in deps_blob:
1✔
227
        out.append("    %s: %s" % (k, stat))
1✔
228
    out.append("    OGGM git identifier: " + get_git_ident())
1✔
229

230
    if logger is not None:
1✔
231
        logger.workflow('\n'.join(out))
1✔
232

233
    return '\n'.join(out)
1✔
234

235

236
class SuperclassMeta(type):
9✔
237
    """Metaclass for abstract base classes.
238

239
    http://stackoverflow.com/questions/40508492/python-sphinx-inherit-
240
    method-documentation-from-superclass
241
    """
242
    def __new__(mcls, classname, bases, cls_dict):
9✔
243
        cls = super().__new__(mcls, classname, bases, cls_dict)
9✔
244
        for name, member in cls_dict.items():
9✔
245
            if not getattr(member, '__doc__'):
9✔
246
                try:
9✔
247
                    member.__doc__ = getattr(bases[-1], name).__doc__
9✔
248
                except AttributeError:
9✔
249
                    pass
9✔
250
        return cls
9✔
251

252

253
class LRUFileCache():
9✔
254
    """A least recently used cache for temporary files.
255

256
    The files which are no longer used are deleted from the disk.
257
    """
258

259
    def __init__(self, l0=None, maxsize=None):
9✔
260
        """Instantiate.
261

262
        Parameters
263
        ----------
264
        l0 : list
265
            a list of file paths
266
        maxsize : int
267
            the max number of files to keep
268
        """
269
        self.files = [] if l0 is None else l0
6✔
270
        # if no maxsize is specified, use value from configuration
271
        maxsize = cfg.PARAMS['lru_maxsize'] if maxsize is None else maxsize
6✔
272
        self.maxsize = maxsize
6✔
273
        self.purge()
6✔
274

275
    def purge(self):
9✔
276
        """Remove expired entries."""
277
        if len(self.files) > self.maxsize:
6✔
278
            fpath = self.files.pop(0)
1✔
279
            if os.path.exists(fpath):
1✔
280
                os.remove(fpath)
1✔
281

282
    def append(self, fpath):
9✔
283
        """Append a file to the list."""
284
        if fpath not in self.files:
6✔
285
            self.files.append(fpath)
2✔
286
        self.purge()
6✔
287

288

289
def lazy_property(fn):
9✔
290
    """Decorator that makes a property lazy-evaluated."""
291

292
    attr_name = '_lazy_' + fn.__name__
9✔
293

294
    @property
9✔
295
    @wraps(fn)
9✔
296
    def _lazy_property(self):
9✔
297
        if not hasattr(self, attr_name):
8✔
298
            setattr(self, attr_name, fn(self))
8✔
299
        return getattr(self, attr_name)
8✔
300

301
    return _lazy_property
9✔
302

303

304
def mkdir(path, reset=False):
9✔
305
    """Checks if directory exists and if not, create one.
306

307
    Parameters
308
    ----------
309
    reset: erase the content of the directory if exists
310

311
    Returns
312
    -------
313
    the path
314
    """
315

316
    if reset and os.path.exists(path):
8✔
317
        shutil.rmtree(path)
2✔
318
        # deleting stuff takes time
319
        while os.path.exists(path):  # check if it still exists
2✔
320
            pass
×
321
    try:
8✔
322
        os.makedirs(path)
8✔
323
    except FileExistsError:
6✔
324
        pass
6✔
325
    return path
8✔
326

327

328
def include_patterns(*patterns):
9✔
329
    """Factory function that can be used with copytree() ignore parameter.
330

331
    Arguments define a sequence of glob-style patterns
332
    that are used to specify what files to NOT ignore.
333
    Creates and returns a function that determines this for each directory
334
    in the file hierarchy rooted at the source directory when used with
335
    shutil.copytree().
336

337
    https://stackoverflow.com/questions/35155382/copying-specific-files-to-a-
338
    new-folder-while-maintaining-the-original-subdirect
339
    """
340

341
    def _ignore_patterns(path, names):
2✔
342
        # This is our cuisine
343
        bname = os.path.basename(path)
2✔
344
        if 'divide' in bname or 'log' in bname:
2✔
345
            keep = []
×
346
        else:
347
            keep = set(name for pattern in patterns
2✔
348
                       for name in fnmatch.filter(names, pattern))
349
        ignore = set(name for name in names
2✔
350
                     if name not in keep and not
351
                     os.path.isdir(os.path.join(path, name)))
352
        return ignore
2✔
353

354
    return _ignore_patterns
2✔
355

356

357
class ncDataset(netCDF4.Dataset):
9✔
358
    """Wrapper around netCDF4 setting auto_mask to False"""
359

360
    def __init__(self, *args, **kwargs):
9✔
361
        super(ncDataset, self).__init__(*args, **kwargs)
7✔
362
        self.set_auto_mask(False)
7✔
363

364

365
def pipe_log(gdir, task_func_name, err=None):
9✔
366
    """Log the error in a specific directory."""
367

368
    time_str = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
6✔
369

370
    # Defaults to working directory: it must be set!
371
    if not cfg.PATHS['working_dir']:
6✔
372
        warnings.warn("Cannot log to file without a valid "
×
373
                      "cfg.PATHS['working_dir']!", RuntimeWarning)
374
        return
×
375

376
    fpath = os.path.join(cfg.PATHS['working_dir'], 'log')
6✔
377
    mkdir(fpath)
6✔
378

379
    fpath = os.path.join(fpath, gdir.rgi_id)
6✔
380

381
    sep = '; '
6✔
382

383
    if err is not None:
6✔
384
        fpath += '.ERROR'
6✔
385
    else:
386
        return  # for now
×
387
        fpath += '.SUCCESS'
388

389
    with open(fpath, 'a') as f:
6✔
390
        f.write(time_str + sep + task_func_name + sep)
6✔
391
        if err is not None:
6✔
392
            f.write(err.__class__.__name__ + sep + '{}\n'.format(err))
6✔
393
        else:
394
            f.write(sep + '\n')
×
395

396

397
class DisableLogger():
9✔
398
    """Context manager to temporarily disable all loggers."""
399

400
    def __enter__(self):
9✔
401
        logging.disable(logging.CRITICAL)
7✔
402

403
    def __exit__(self, a, b, c):
9✔
404
        logging.disable(logging.NOTSET)
7✔
405

406

407
def _timeout_handler(signum, frame):
9✔
408
    raise TimeoutError('This task was killed because of timeout')
×
409

410

411
class entity_task(object):
9✔
412
    """Decorator for common job-controlling logic.
413

414
    All tasks share common operations. This decorator is here to handle them:
415
    exceptions, logging, and (some day) database for job-controlling.
416
    """
417

418
    def __init__(self, log, writes=[], fallback=None):
9✔
419
        """Decorator syntax: ``@entity_task(log, writes=['dem', 'outlines'])``
420

421
        Parameters
422
        ----------
423
        log: logger
424
            module logger
425
        writes: list
426
            list of files that the task will write down to disk (must be
427
            available in ``cfg.BASENAMES``)
428
        fallback: python function
429
            will be executed on gdir if entity_task fails
430
        return_value: bool
431
            whether the return value from the task should be passed over
432
            to the caller or not. In general you will always want this to
433
            be true, but sometimes the task return things which are not
434
            useful in production and my use a lot of memory, etc,
435
        """
436
        self.log = log
9✔
437
        self.writes = writes
9✔
438
        self.fallback = fallback
9✔
439

440
        cnt = ['    Notes']
9✔
441
        cnt += ['    -----']
9✔
442
        cnt += ['    Files written to the glacier directory:']
9✔
443

444
        for k in sorted(writes):
9✔
445
            cnt += [cfg.BASENAMES.doc_str(k)]
9✔
446
        self.iodoc = '\n'.join(cnt)
9✔
447

448
    def __call__(self, task_func):
9✔
449
        """Decorate."""
450

451
        # Add to the original docstring
452
        if task_func.__doc__ is None:
9✔
453
            raise RuntimeError('Entity tasks should have a docstring!')
×
454

455
        task_func.__doc__ = '\n'.join((task_func.__doc__, self.iodoc))
9✔
456

457
        @wraps(task_func)
9✔
458
        def _entity_task(gdir, *, reset=None, print_log=True,
9✔
459
                         return_value=True, continue_on_error=None,
460
                         add_to_log_file=True, **kwargs):
461

462
            if reset is None:
7✔
463
                reset = not cfg.PARAMS['auto_skip_task']
7✔
464

465
            if continue_on_error is None:
7✔
466
                continue_on_error = cfg.PARAMS['continue_on_error']
7✔
467

468
            task_name = task_func.__name__
7✔
469

470
            # Filesuffix are typically used to differentiate tasks
471
            fsuffix = (kwargs.get('filesuffix', False) or
7✔
472
                       kwargs.get('output_filesuffix', False))
473
            if fsuffix:
7✔
474
                task_name += fsuffix
4✔
475

476
            # Do we need to run this task?
477
            s = gdir.get_task_status(task_name)
7✔
478
            if not reset and s and ('SUCCESS' in s):
7✔
479
                return
1✔
480

481
            # Log what we are doing
482
            if print_log:
7✔
483
                self.log.info('(%s) %s', gdir.rgi_id, task_name)
7✔
484

485
            # Run the task
486
            try:
7✔
487
                if cfg.PARAMS['task_timeout'] > 0:
7✔
488
                    signal.signal(signal.SIGALRM, _timeout_handler)
×
489
                    signal.alarm(cfg.PARAMS['task_timeout'])
×
490
                ex_t = time.time()
7✔
491
                out = task_func(gdir, **kwargs)
7✔
492
                ex_t = time.time() - ex_t
7✔
493
                if cfg.PARAMS['task_timeout'] > 0:
7✔
494
                    signal.alarm(0)
×
495
                if task_name != 'gdir_to_tar':
7✔
496
                    if add_to_log_file:
7✔
497
                        gdir.log(task_name, task_time=ex_t)
7✔
498
            except Exception as err:
6✔
499
                # Something happened
500
                out = None
6✔
501
                if add_to_log_file:
6✔
502
                    gdir.log(task_name, err=err)
6✔
503
                    pipe_log(gdir, task_name, err=err)
6✔
504
                if print_log:
6✔
505
                    self.log.error('%s occurred during task %s on %s: %s',
6✔
506
                                   type(err).__name__, task_name,
507
                                   gdir.rgi_id, str(err))
508
                if not continue_on_error:
6✔
509
                    raise
6✔
510

511
                if self.fallback is not None:
1✔
512
                    self.fallback(gdir)
×
513
            if return_value:
7✔
514
                return out
7✔
515

516
        _entity_task.__dict__['is_entity_task'] = True
9✔
517
        return _entity_task
9✔
518

519

520
class global_task(object):
9✔
521
    """Decorator for common job-controlling logic.
522

523
    Indicates that this task expects a list of all GlacierDirs as parameter
524
    instead of being called once per dir.
525
    """
526

527
    def __init__(self, log):
9✔
528
        """Decorator syntax: ``@global_task(log)``
529

530
        Parameters
531
        ----------
532
        log: logger
533
            module logger
534
        """
535
        self.log = log
9✔
536

537
    def __call__(self, task_func):
9✔
538
        """Decorate."""
539

540
        @wraps(task_func)
9✔
541
        def _global_task(gdirs, **kwargs):
9✔
542

543
            # Should be iterable
544
            gdirs = tolist(gdirs)
7✔
545

546
            self.log.workflow('Applying global task %s on %s glaciers',
7✔
547
                              task_func.__name__, len(gdirs))
548

549
            # Run the task
550
            return task_func(gdirs, **kwargs)
7✔
551

552
        _global_task.__dict__['is_global_task'] = True
9✔
553
        return _global_task
9✔
554

555

556
def get_ref_mb_glaciers_candidates(rgi_version=None):
9✔
557
    """Reads in the WGMS list of glaciers with available MB data.
558

559
    Can be found afterwards (and extended) in cdf.DATA['RGIXX_ref_ids'].
560
    """
561

562
    if rgi_version is None:
1✔
563
        rgi_version = cfg.PARAMS['rgi_version']
1✔
564

565
    if len(rgi_version) == 2:
1✔
566
        # We might change this one day
567
        rgi_version = rgi_version[:1]
1✔
568

569
    key = 'RGI{}0_ref_ids'.format(rgi_version)
1✔
570

571
    if key not in cfg.DATA:
1✔
572
        flink, _ = get_wgms_files()
1✔
573
        cfg.DATA[key] = flink['RGI{}0_ID'.format(rgi_version)].tolist()
1✔
574

575
    return cfg.DATA[key]
1✔
576

577

578
@global_task(log)
9✔
579
def get_ref_mb_glaciers(gdirs, y0=None, y1=None):
9✔
580
    """Get the list of glaciers we have valid mass balance measurements for.
581

582
    To be valid glaciers must have more than 5 years of measurements and
583
    be land terminating. Therefore, the list depends on the time period of the
584
    baseline climate data and this method selects them out of a list
585
    of potential candidates (`gdirs` arg).
586

587
    Parameters
588
    ----------
589
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
590
        list of glaciers to check for valid reference mass balance data
591
    y0 : int
592
        override the default behavior which is to check the available
593
        climate data (or PARAMS['ref_mb_valid_window']) and decide
594
    y1 : int
595
        override the default behavior which is to check the available
596
        climate data (or PARAMS['ref_mb_valid_window']) and decide
597

598
    Returns
599
    -------
600
    ref_gdirs : list of :py:class:`oggm.GlacierDirectory` objects
601
        list of those glaciers with valid reference mass balance data
602

603
    See Also
604
    --------
605
    get_ref_mb_glaciers_candidates
606
    """
607

608
    # Get the links
609
    ref_ids = get_ref_mb_glaciers_candidates(gdirs[0].rgi_version)
1✔
610

611
    # We remove tidewater glaciers and glaciers with < 5 years
612
    ref_gdirs = []
1✔
613
    for g in gdirs:
1✔
614
        if g.rgi_id not in ref_ids or g.is_tidewater:
1✔
615
            continue
1✔
616
        try:
1✔
617
            mbdf = g.get_ref_mb_data(y0=y0, y1=y1)
1✔
618
            if len(mbdf) >= 5:
1✔
619
                ref_gdirs.append(g)
1✔
620
        except RuntimeError as e:
1✔
621
            if 'Please process some climate data before call' in str(e):
×
622
                raise
×
623
    return ref_gdirs
1✔
624

625

626
def _chaikins_corner_cutting(line, refinements=5):
9✔
627
    """Some magic here.
628

629
    https://stackoverflow.com/questions/47068504/where-to-find-python-
630
    implementation-of-chaikins-corner-cutting-algorithm
631
    """
632
    coords = np.array(line.coords)
×
633

634
    for _ in range(refinements):
×
635
        L = coords.repeat(2, axis=0)
×
636
        R = np.empty_like(L)
×
637
        R[0] = L[0]
×
638
        R[2::2] = L[1:-1:2]
×
639
        R[1:-1:2] = L[2::2]
×
640
        R[-1] = L[-1]
×
641
        coords = L * 0.75 + R * 0.25
×
642

643
    return shpg.LineString(coords)
×
644

645

646
@entity_task(log)
9✔
647
def get_centerline_lonlat(gdir,
9✔
648
                          keep_main_only=False,
649
                          flowlines_output=False,
650
                          ensure_exterior_match=False,
651
                          geometrical_widths_output=False,
652
                          corrected_widths_output=False,
653
                          to_crs='wgs84',
654
                          simplify_line=0,
655
                          corner_cutting=0):
656
    """Helper task to convert the centerlines to a shapefile
657

658
    Parameters
659
    ----------
660
    gdir : the glacier directory
661
    flowlines_output : create a shapefile for the flowlines
662
    ensure_exterior_match : per design, OGGM centerlines match the underlying
663
    DEM grid. This may imply that they do not "touch" the exterior outlines
664
    of the glacier in vector space. Set this to True to correct for that.
665
    geometrical_widths_output : for the geometrical widths
666
    corrected_widths_output : for the corrected widths
667

668
    Returns
669
    -------
670
    a shapefile
671
    """
672
    if flowlines_output or geometrical_widths_output or corrected_widths_output:
1✔
673
        cls = gdir.read_pickle('inversion_flowlines')
×
674
    else:
675
        cls = gdir.read_pickle('centerlines')
1✔
676

677
    exterior = None
1✔
678
    if ensure_exterior_match:
1✔
679
        exterior = gdir.read_shapefile('outlines')
×
680
        # Transform to grid
681
        tra_func = partial(gdir.grid.transform, crs=exterior.crs)
×
682
        exterior = shpg.Polygon(shp_trafo(tra_func, exterior.geometry[0].exterior))
×
683

684
    tra_func = partial(gdir.grid.ij_to_crs, crs=to_crs)
1✔
685

686
    olist = []
1✔
687
    for j, cl in enumerate(cls):
1✔
688
        mm = 1 if j == (len(cls)-1) else 0
1✔
689
        if keep_main_only and mm == 0:
1✔
690
            continue
×
691
        if corrected_widths_output:
1✔
692
            le_segment = np.rint(np.max(cl.dis_on_line) * gdir.grid.dx)
×
693
            for wi, cur, (n1, n2), wi_m in zip(cl.widths, cl.line.coords,
×
694
                                               cl.normals, cl.widths_m):
695
                _l = shpg.LineString([shpg.Point(cur + wi / 2. * n1),
×
696
                                      shpg.Point(cur + wi / 2. * n2)])
697
                gs = dict()
×
698
                gs['RGIID'] = gdir.rgi_id
×
699
                gs['SEGMENT_ID'] = j
×
700
                gs['LE_SEGMENT'] = le_segment
×
701
                gs['MAIN'] = mm
×
702
                gs['WIDTH_m'] = wi_m
×
703
                gs['geometry'] = shp_trafo(tra_func, _l)
×
704
                olist.append(gs)
×
705
        elif geometrical_widths_output:
1✔
706
            le_segment = np.rint(np.max(cl.dis_on_line) * gdir.grid.dx)
×
707
            for _l, wi_m in zip(cl.geometrical_widths, cl.widths_m):
×
708
                gs = dict()
×
709
                gs['RGIID'] = gdir.rgi_id
×
710
                gs['SEGMENT_ID'] = j
×
711
                gs['LE_SEGMENT'] = le_segment
×
712
                gs['MAIN'] = mm
×
713
                gs['WIDTH_m'] = wi_m
×
714
                gs['geometry'] = shp_trafo(tra_func, _l)
×
715
                olist.append(gs)
×
716
        else:
717
            gs = dict()
1✔
718
            gs['RGIID'] = gdir.rgi_id
1✔
719
            gs['SEGMENT_ID'] = j
1✔
720
            gs['LE_SEGMENT'] = np.rint(np.max(cl.dis_on_line) * gdir.grid.dx)
1✔
721
            gs['MAIN'] = mm
1✔
722
            line = cl.line
1✔
723
            if ensure_exterior_match:
1✔
724
                # Extend line at the start by 10
725
                fs = shpg.LineString(line.coords[:2])
×
726
                # First check if this is necessary - this segment should
727
                # be within the geometry or it's already good to go
728
                if fs.within(exterior):
×
729
                    fs = shpa.scale(fs, xfact=3, yfact=3, origin=fs.boundary.geoms[1])
×
730
                    line = shpg.LineString([*fs.coords, *line.coords[2:]])
×
731
                # If last also extend at the end
732
                if mm == 1:
×
733
                    ls = shpg.LineString(line.coords[-2:])
×
734
                    if ls.within(exterior):
×
735
                        ls = shpa.scale(ls, xfact=3, yfact=3, origin=ls.boundary.geoms[0])
×
736
                        line = shpg.LineString([*line.coords[:-2], *ls.coords])
×
737

738
                # Simplify and smooth?
739
                if simplify_line:
×
740
                    line = line.simplify(simplify_line)
×
741
                if corner_cutting:
×
742
                    line = _chaikins_corner_cutting(line, corner_cutting)
×
743

744
                # Intersect with exterior geom
745
                line = line.intersection(exterior)
×
746
                if line.geom_type in ['MultiLineString', 'GeometryCollection']:
×
747
                    # Take the longest
748
                    lens = [il.length for il in line.geoms]
×
749
                    line = line.geoms[np.argmax(lens)]
×
750

751
                # Recompute length
752
                gs['LE_SEGMENT'] = np.rint(line.length * gdir.grid.dx)
×
753
            gs['geometry'] = shp_trafo(tra_func, line)
1✔
754
            olist.append(gs)
1✔
755

756
    return olist
1✔
757

758

759
def _write_shape_to_disk(gdf, fpath, to_tar=False):
9✔
760
    """Write a shapefile to disk with optional compression
761

762
    Parameters
763
    ----------
764
    gdf : gpd.GeoDataFrame
765
        the data to write
766
    fpath : str
767
        where to writ the file - should be ending in shp
768
    to_tar : bool
769
        put the files in a .tar file. If cfg.PARAMS['use_compression'],
770
        also compress to .gz
771
    """
772

773
    if '.shp' not in fpath:
7✔
774
        raise ValueError('File ending should be .shp')
×
775

776
    gdf.to_file(fpath)
7✔
777

778
    if not to_tar:
7✔
779
        # Done here
780
        return
3✔
781

782
    # Write them in tar
783
    fpath = fpath.replace('.shp', '.tar')
7✔
784
    mode = 'w'
7✔
785
    if cfg.PARAMS['use_compression']:
7✔
786
        fpath += '.gz'
7✔
787
        mode += ':gz'
7✔
788
    if os.path.exists(fpath):
7✔
789
        os.remove(fpath)
4✔
790

791
    # List all files that were written as shape
792
    fs = glob.glob(fpath.replace('.gz', '').replace('.tar', '.*'))
7✔
793
    # Add them to tar
794
    with tarfile.open(fpath, mode=mode) as tf:
7✔
795
        for ff in fs:
7✔
796
            tf.add(ff, arcname=os.path.basename(ff))
7✔
797

798
    # Delete the old ones
799
    for ff in fs:
7✔
800
        os.remove(ff)
7✔
801

802

803
@global_task(log)
9✔
804
def write_centerlines_to_shape(gdirs, *, path=True, to_tar=False,
9✔
805
                               to_crs='EPSG:4326',
806
                               filesuffix='', flowlines_output=False,
807
                               ensure_exterior_match=False,
808
                               geometrical_widths_output=False,
809
                               corrected_widths_output=False,
810
                               keep_main_only=False,
811
                               simplify_line=0,
812
                               corner_cutting=0):
813
    """Write the centerlines to a shapefile.
814

815
    Parameters
816
    ----------
817
    gdirs:
818
        the list of GlacierDir to process.
819
    path: str or bool
820
        Set to "True" in order  to store the shape in the working directory
821
        Set to a str path to store the file to your chosen location
822
    to_tar : bool
823
        put the files in a .tar file. If cfg.PARAMS['use_compression'],
824
        also compress to .gz
825
    filesuffix : str
826
        add a suffix to the output file
827
    flowlines_output : bool
828
        output the OGGM flowlines instead of the centerlines
829
    geometrical_widths_output : bool
830
        output the geometrical widths instead of the centerlines
831
    corrected_widths_output : bool
832
        output the corrected widths instead of the centerlines
833
    ensure_exterior_match : bool
834
        per design, the centerlines will match the underlying DEM grid.
835
        This may imply that they do not "touch" the exterior outlines of the
836
        glacier in vector space. Set this to True to correct for that.
837
    to_crs : str
838
        write the shape to another coordinate reference system (CRS)
839
    keep_main_only : bool
840
        write only the main flowlines to the output files
841
    simplify_line : float
842
        apply shapely's `simplify` method to the line before writing. It is
843
        a purely cosmetic option, although glacier length will be affected.
844
        All points in the simplified object will be within the tolerance
845
        distance of the original geometry (units: grid points). A good
846
        value to test first is 0.5
847
    corner_cutting : int
848
        apply the Chaikin's corner cutting algorithm to the geometry before
849
        writing. The integer represents the number of refinements to apply.
850
        A good first value to test is 5.
851
    """
852
    from oggm.workflow import execute_entity_task
3✔
853

854
    if path is True:
3✔
855
        path = os.path.join(cfg.PATHS['working_dir'],
2✔
856
                            'glacier_centerlines' + filesuffix + '.shp')
857

858
    _to_crs = salem.check_crs(to_crs)
3✔
859
    if not _to_crs:
3✔
860
        raise InvalidParamsError(f'CRS not understood: {to_crs}')
×
861

862
    log.workflow('write_centerlines_to_shape on {} ...'.format(path))
3✔
863

864
    olist = execute_entity_task(get_centerline_lonlat, gdirs,
3✔
865
                                flowlines_output=flowlines_output,
866
                                ensure_exterior_match=ensure_exterior_match,
867
                                geometrical_widths_output=geometrical_widths_output,
868
                                corrected_widths_output=corrected_widths_output,
869
                                keep_main_only=keep_main_only,
870
                                simplify_line=simplify_line,
871
                                corner_cutting=corner_cutting,
872
                                to_crs=_to_crs)
873
    # filter for none
874
    olist = [o for o in olist if o is not None]
3✔
875
    odf = gpd.GeoDataFrame(itertools.chain.from_iterable(olist))
3✔
876
    odf = odf.sort_values(by='RGIID')
3✔
877
    odf.crs = to_crs
3✔
878
    # Sanity checks to avoid bad surprises
879
    gtype = np.array([g.geom_type for g in odf.geometry])
3✔
880
    if 'GeometryCollection' in gtype:
3✔
881
        errdf = odf.loc[gtype == 'GeometryCollection']
×
882
        with warnings.catch_warnings():
×
883
            # errdf.length warns because of use of wgs84
884
            warnings.filterwarnings("ignore", category=UserWarning)
×
885
            if not np.all(errdf.length) == 0:
×
886
                errdf = errdf.loc[errdf.length > 0]
×
887
                raise RuntimeError('Some geometries are non-empty GeometryCollection '
×
888
                                   f'at RGI Ids: {errdf.RGIID.values}')
889
    _write_shape_to_disk(odf, path, to_tar=to_tar)
3✔
890

891

892
def demo_glacier_id(key):
9✔
893
    """Get the RGI id of a glacier by name or key: None if not found."""
894

895
    df = cfg.DATA['demo_glaciers']
1✔
896

897
    # Is the name in key?
898
    s = df.loc[df.Key.str.lower() == key.lower()]
1✔
899
    if len(s) == 1:
1✔
900
        return s.index[0]
1✔
901

902
    # Is the name in name?
903
    s = df.loc[df.Name.str.lower() == key.lower()]
1✔
904
    if len(s) == 1:
1✔
905
        return s.index[0]
1✔
906

907
    # Is the name in Ids?
908
    try:
1✔
909
        s = df.loc[[key]]
1✔
910
        if len(s) == 1:
1✔
911
            return s.index[0]
1✔
912
    except KeyError:
1✔
913
        pass
1✔
914

915
    return None
1✔
916

917

918
class compile_to_netcdf(object):
9✔
919
    """Decorator for common compiling NetCDF files logic.
920

921
    All compile_* tasks can be optimized the same way, by using temporary
922
    files and merging them afterwards.
923
    """
924

925
    def __init__(self, log):
9✔
926
        """Decorator syntax: ``@compile_to_netcdf(log, n_tmp_files=1000)``
927

928
        Parameters
929
        ----------
930
        log: logger
931
            module logger
932
        tmp_file_size: int
933
            number of glacier directories per temporary files
934
        """
935
        self.log = log
9✔
936

937
    def __call__(self, task_func):
9✔
938
        """Decorate."""
939

940
        @wraps(task_func)
9✔
941
        def _compile_to_netcdf(gdirs, input_filesuffix='',
9✔
942
                               output_filesuffix='',
943
                               path=True,
944
                               tmp_file_size=1000,
945
                               **kwargs):
946

947
            if not output_filesuffix:
5✔
948
                output_filesuffix = input_filesuffix
5✔
949

950
            gdirs = tolist(gdirs)
5✔
951
            task_name = task_func.__name__
5✔
952
            output_base = task_name.replace('compile_', '')
5✔
953

954
            if path is True:
5✔
955
                path = os.path.join(cfg.PATHS['working_dir'],
5✔
956
                                    output_base + output_filesuffix + '.nc')
957

958
            self.log.workflow('Applying %s on %d gdirs.',
5✔
959
                              task_name, len(gdirs))
960

961
            # Run the task
962
            # If small gdir size, no need for temporary files
963
            if len(gdirs) < tmp_file_size or not path:
5✔
964
                return task_func(gdirs, input_filesuffix=input_filesuffix,
5✔
965
                                 path=path, **kwargs)
966

967
            # Otherwise, divide and conquer
968
            sub_gdirs = [gdirs[i: i + tmp_file_size] for i in
1✔
969
                         range(0, len(gdirs), tmp_file_size)]
970

971
            tmp_paths = [os.path.join(cfg.PATHS['working_dir'],
1✔
972
                                      'compile_tmp_{:06d}.nc'.format(i))
973
                         for i in range(len(sub_gdirs))]
974

975
            try:
1✔
976
                for spath, sgdirs in zip(tmp_paths, sub_gdirs):
1✔
977
                    task_func(sgdirs, input_filesuffix=input_filesuffix,
1✔
978
                              path=spath, **kwargs)
979
            except BaseException:
×
980
                # If something wrong, delete the tmp files
981
                for f in tmp_paths:
×
982
                    try:
×
983
                        os.remove(f)
×
984
                    except FileNotFoundError:
×
985
                        pass
×
986
                raise
×
987

988
            # Ok, now merge and return
989
            try:
1✔
990
                with xr.open_mfdataset(tmp_paths, combine='nested',
1✔
991
                                       concat_dim='rgi_id') as ds:
992
                    # the .load() is actually quite uncool here, but it solves
993
                    # an unbelievable stalling problem in multiproc
994
                    ds.load().to_netcdf(path)
1✔
995
            except TypeError:
×
996
                # xr < v 0.13
997
                with xr.open_mfdataset(tmp_paths, concat_dim='rgi_id') as ds:
×
998
                    # the .load() is actually quite uncool here, but it solves
999
                    # an unbelievable stalling problem in multiproc
1000
                    ds.load().to_netcdf(path)
×
1001

1002
            # We can't return the dataset without loading it, so we don't
1003
            return None
1✔
1004

1005
        return _compile_to_netcdf
9✔
1006

1007

1008
@entity_task(log)
9✔
1009
def merge_consecutive_run_outputs(gdir,
9✔
1010
                                  input_filesuffix_1=None,
1011
                                  input_filesuffix_2=None,
1012
                                  output_filesuffix=None,
1013
                                  delete_input=False):
1014
    """Merges the output of two model_diagnostics files into one.
1015

1016
    It assumes that the last time of file1 is equal to the first time of file2.
1017

1018
    Parameters
1019
    ----------
1020
    gdir : the glacier directory
1021
    input_filesuffix_1 : str
1022
        how to recognize the first file
1023
    input_filesuffix_2 : str
1024
        how to recognize the second file
1025
    output_filesuffix : str
1026
        where to write the output (default: no suffix)
1027

1028
    Returns
1029
    -------
1030
    The merged dataset
1031
    """
1032

1033
    # Read in the input files and check
1034
    fp1 = gdir.get_filepath('model_diagnostics', filesuffix=input_filesuffix_1)
1✔
1035
    with xr.open_dataset(fp1) as ds:
1✔
1036
        ds1 = ds.load()
1✔
1037
    fp2 = gdir.get_filepath('model_diagnostics', filesuffix=input_filesuffix_2)
1✔
1038
    with xr.open_dataset(fp2) as ds:
1✔
1039
        ds2 = ds.load()
1✔
1040
    if ds1.time[-1] != ds2.time[0]:
1✔
1041
        raise InvalidWorkflowError('The two files are incompatible by time')
×
1042

1043
    # Samity check for all variables as well
1044
    for v in ds1:
1✔
1045
        if not np.all(np.isfinite(ds1[v].data[-1])):
1✔
1046
            # This is the last year of hydro output - we will discard anyway
1047
            continue
1✔
1048
        if np.allclose(ds1[v].data[-1], ds2[v].data[0]):
1✔
1049
            # This means that we're OK - the two match
1050
            continue
1✔
1051

1052
        # This has to be a bucket of some sort, probably snow or calving
1053
        if len(ds2[v].data.shape) == 1:
1✔
1054
            if ds2[v].data[0] != 0:
1✔
1055
                raise InvalidWorkflowError('The two files seem incompatible '
×
1056
                                           f'by data on variable : {v}')
1057
            bucket = ds1[v].data[-1]
1✔
1058
        elif len(ds2[v].data.shape) == 2:
1✔
1059
            if ds2[v].data[0, 0] != 0:
1✔
1060
                raise InvalidWorkflowError('The two files seem incompatible '
×
1061
                                           f'by data on variable : {v}')
1062
            bucket = ds1[v].data[-1, -1]
1✔
1063
        # Carry it to the rest
1064
        ds2[v] = ds2[v] + bucket
1✔
1065

1066
    # Merge by removing the last step of file 1 and delete the files if asked
1067
    out_ds = xr.concat([ds1.isel(time=slice(0, -1)), ds2], dim='time')
1✔
1068
    if delete_input:
1✔
1069
        os.remove(fp1)
×
1070
        os.remove(fp2)
×
1071
    # Write out and return
1072
    fp = gdir.get_filepath('model_diagnostics', filesuffix=output_filesuffix)
1✔
1073
    out_ds.to_netcdf(fp)
1✔
1074
    return out_ds
1✔
1075

1076

1077
@global_task(log)
9✔
1078
@compile_to_netcdf(log)
9✔
1079
def compile_run_output(gdirs, path=True, input_filesuffix='',
9✔
1080
                       use_compression=True):
1081
    """Compiles the output of the model runs of several gdirs into one file.
1082

1083
    Parameters
1084
    ----------
1085
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1086
        the glacier directories to process
1087
    path : str
1088
        where to store (default is on the working dir).
1089
        Set to `False` to disable disk storage.
1090
    input_filesuffix : str
1091
        the filesuffix of the files to be compiled
1092
    use_compression : bool
1093
        use zlib compression on the output netCDF files
1094

1095
    Returns
1096
    -------
1097
    ds : :py:class:`xarray.Dataset`
1098
        compiled output
1099
    """
1100

1101
    # Get the dimensions of all this
1102
    rgi_ids = [gd.rgi_id for gd in gdirs]
4✔
1103

1104
    # To find the longest time, we have to open all files unfortunately
1105
    time_info = {}
4✔
1106
    time_keys = ['hydro_year', 'hydro_month', 'calendar_year', 'calendar_month']
4✔
1107
    for gd in gdirs:
4✔
1108
        fp = gd.get_filepath('model_diagnostics', filesuffix=input_filesuffix)
4✔
1109
        try:
4✔
1110
            with ncDataset(fp) as ds:
4✔
1111
                time = ds.variables['time'][:]
4✔
1112
                if 'time' not in time_info:
4✔
1113
                    time_info['time'] = time
4✔
1114
                    for cn in time_keys:
4✔
1115
                        time_info[cn] = ds.variables[cn][:]
4✔
1116
                else:
1117
                    # Here we may need to append or add stuff
1118
                    ot = time_info['time']
4✔
1119
                    if time[0] > ot[-1] or ot[-1] < time[0]:
4✔
1120
                        raise InvalidWorkflowError('Trying to compile output '
×
1121
                                                   'without overlap.')
1122
                    if time[-1] > ot[-1]:
4✔
1123
                        p = np.nonzero(time == ot[-1])[0][0] + 1
×
1124
                        time_info['time'] = np.append(ot, time[p:])
×
1125
                        for cn in time_keys:
×
1126
                            time_info[cn] = np.append(time_info[cn],
×
1127
                                                      ds.variables[cn][p:])
1128
                    if time[0] < ot[0]:
4✔
1129
                        p = np.nonzero(time == ot[0])[0][0]
×
1130
                        time_info['time'] = np.append(time[:p], ot)
×
1131
                        for cn in time_keys:
×
1132
                            time_info[cn] = np.append(ds.variables[cn][:p],
×
1133
                                                      time_info[cn])
1134

1135
            # If this worked, keep it as template
1136
            ppath = fp
4✔
1137
        except FileNotFoundError:
×
1138
            pass
×
1139

1140
    if 'time' not in time_info:
4✔
1141
        raise RuntimeError('Found no valid glaciers!')
×
1142

1143
    # OK found it, open it and prepare the output
1144
    with xr.open_dataset(ppath) as ds_diag:
4✔
1145

1146
        # Prepare output
1147
        ds = xr.Dataset()
4✔
1148

1149
        # Global attributes
1150
        ds.attrs['description'] = 'OGGM model output'
4✔
1151
        ds.attrs['oggm_version'] = __version__
4✔
1152
        ds.attrs['calendar'] = '365-day no leap'
4✔
1153
        ds.attrs['creation_date'] = strftime("%Y-%m-%d %H:%M:%S", gmtime())
4✔
1154

1155
        # Copy coordinates
1156
        time = time_info['time']
4✔
1157
        ds.coords['time'] = ('time', time)
4✔
1158
        ds['time'].attrs['description'] = 'Floating year'
4✔
1159
        # New coord
1160
        ds.coords['rgi_id'] = ('rgi_id', rgi_ids)
4✔
1161
        ds['rgi_id'].attrs['description'] = 'RGI glacier identifier'
4✔
1162
        # This is just taken from there
1163
        for cn in ['hydro_year', 'hydro_month',
4✔
1164
                   'calendar_year', 'calendar_month']:
1165
            ds.coords[cn] = ('time', time_info[cn])
4✔
1166
            ds[cn].attrs['description'] = ds_diag[cn].attrs['description']
4✔
1167

1168
        # We decide on the name of "3d" variables in case we have daily
1169
        # output
1170
        name_2d_dim = 'day_2d' if 'day_2d' in ds_diag.dims else 'month_2d'
4✔
1171

1172
        # Prepare the 2D variables
1173
        shape = (len(time), len(rgi_ids))
4✔
1174
        out_2d = dict()
4✔
1175
        for vn in ds_diag.data_vars:
4✔
1176
            if name_2d_dim in ds_diag[vn].dims:
4✔
1177
                continue
1✔
1178
            var = dict()
4✔
1179
            var['data'] = np.full(shape, np.nan)
4✔
1180
            var['attrs'] = ds_diag[vn].attrs
4✔
1181
            out_2d[vn] = var
4✔
1182

1183
        # 1D Variables
1184
        out_1d = dict()
4✔
1185
        for vn, attrs in [('water_level', {'description': 'Calving water level',
4✔
1186
                                           'units': 'm'}),
1187
                          ('glen_a', {'description': 'Simulation Glen A',
1188
                                      'units': ''}),
1189
                          ('fs', {'description': 'Simulation sliding parameter',
1190
                                  'units': ''}),
1191
                          ]:
1192
            var = dict()
4✔
1193
            var['data'] = np.full(len(rgi_ids), np.nan)
4✔
1194
            var['attrs'] = attrs
4✔
1195
            out_1d[vn] = var
4✔
1196

1197
        # Maybe 3D?
1198
        out_3d = dict()
4✔
1199
        if name_2d_dim in ds_diag.dims:
4✔
1200
            # We have some 3d vars
1201
            month_2d = ds_diag[name_2d_dim]
1✔
1202
            ds.coords[name_2d_dim] = (name_2d_dim, month_2d.data)
1✔
1203
            cn = f'calendar_{name_2d_dim}'
1✔
1204
            ds.coords[cn] = (name_2d_dim, ds_diag[cn].values)
1✔
1205

1206
            shape = (len(time), len(month_2d), len(rgi_ids))
1✔
1207
            for vn in ds_diag.data_vars:
1✔
1208
                if name_2d_dim not in ds_diag[vn].dims:
1✔
1209
                    continue
1✔
1210
                var = dict()
1✔
1211
                var['data'] = np.full(shape, np.nan)
1✔
1212
                var['attrs'] = ds_diag[vn].attrs
1✔
1213
                out_3d[vn] = var
1✔
1214

1215
    # Read out
1216
    for i, gdir in enumerate(gdirs):
4✔
1217
        try:
4✔
1218
            ppath = gdir.get_filepath('model_diagnostics',
4✔
1219
                                      filesuffix=input_filesuffix)
1220
            with ncDataset(ppath) as ds_diag:
4✔
1221
                it = ds_diag.variables['time'][:]
4✔
1222
                a = np.nonzero(time == it[0])[0][0]
4✔
1223
                b = np.nonzero(time == it[-1])[0][0] + 1
4✔
1224
                for vn, var in out_2d.items():
4✔
1225
                    var['data'][a:b, i] = ds_diag.variables[vn][:]
4✔
1226
                for vn, var in out_3d.items():
4✔
1227
                    var['data'][a:b, :, i] = ds_diag.variables[vn][:]
1✔
1228
                for vn, var in out_1d.items():
4✔
1229
                    var['data'][i] = ds_diag.getncattr(vn)
4✔
1230
        except FileNotFoundError:
×
1231
            pass
×
1232

1233
    # To xarray
1234
    for vn, var in out_2d.items():
4✔
1235
        # Backwards compatibility - to remove one day...
1236
        for r in ['_m3', '_m2', '_myr', '_m']:
4✔
1237
            # Order matters
1238
            vn = regexp.sub(r + '$', '', vn)
4✔
1239
        ds[vn] = (('time', 'rgi_id'), var['data'])
4✔
1240
        ds[vn].attrs = var['attrs']
4✔
1241
    for vn, var in out_3d.items():
4✔
1242
        ds[vn] = (('time', name_2d_dim, 'rgi_id'), var['data'])
1✔
1243
        ds[vn].attrs = var['attrs']
1✔
1244
    for vn, var in out_1d.items():
4✔
1245
        ds[vn] = (('rgi_id', ), var['data'])
4✔
1246
        ds[vn].attrs = var['attrs']
4✔
1247

1248
    # To file?
1249
    if path:
4✔
1250
        enc_var = {'dtype': 'float32'}
4✔
1251
        if use_compression:
4✔
1252
            enc_var['complevel'] = 5
4✔
1253
            enc_var['zlib'] = True
4✔
1254
        encoding = {v: enc_var for v in ds.data_vars}
4✔
1255
        ds.to_netcdf(path, encoding=encoding)
4✔
1256

1257
    return ds
4✔
1258

1259

1260
@global_task(log)
9✔
1261
@compile_to_netcdf(log)
9✔
1262
def compile_climate_input(gdirs, path=True, filename='climate_historical',
9✔
1263
                          input_filesuffix='', use_compression=True):
1264
    """Merge the climate input files in the glacier directories into one file.
1265

1266
    Parameters
1267
    ----------
1268
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1269
        the glacier directories to process
1270
    path : str
1271
        where to store (default is on the working dir).
1272
        Set to `False` to disable disk storage.
1273
    filename : str
1274
        BASENAME of the climate input files
1275
    input_filesuffix : str
1276
        the filesuffix of the files to be compiled
1277
    use_compression : bool
1278
        use zlib compression on the output netCDF files
1279

1280
    Returns
1281
    -------
1282
    ds : :py:class:`xarray.Dataset`
1283
        compiled climate data
1284
    """
1285

1286
    # Get the dimensions of all this
1287
    rgi_ids = [gd.rgi_id for gd in gdirs]
1✔
1288

1289
    # The first gdir might have blown up, try some others
1290
    i = 0
1✔
1291
    while True:
1✔
1292
        if i >= len(gdirs):
1✔
1293
            raise RuntimeError('Found no valid glaciers!')
×
1294
        try:
1✔
1295
            pgdir = gdirs[i]
1✔
1296
            ppath = pgdir.get_filepath(filename=filename,
1✔
1297
                                       filesuffix=input_filesuffix)
1298
            with xr.open_dataset(ppath) as ds_clim:
1✔
1299
                ds_clim.time.values
1✔
1300
            # If this worked, we have a valid gdir
1301
            break
1✔
1302
        except BaseException:
×
1303
            i += 1
×
1304

1305
    with xr.open_dataset(ppath) as ds_clim:
1✔
1306
        cyrs = ds_clim['time.year']
1✔
1307
        cmonths = ds_clim['time.month']
1✔
1308
        sm = cfg.PARAMS['hydro_month_' + pgdir.hemisphere]
1✔
1309
        hyrs, hmonths = calendardate_to_hydrodate(cyrs, cmonths, start_month=sm)
1✔
1310
        time = date_to_floatyear(cyrs, cmonths)
1✔
1311

1312
    # Prepare output
1313
    ds = xr.Dataset()
1✔
1314

1315
    # Global attributes
1316
    ds.attrs['description'] = 'OGGM model output'
1✔
1317
    ds.attrs['oggm_version'] = __version__
1✔
1318
    ds.attrs['calendar'] = '365-day no leap'
1✔
1319
    ds.attrs['creation_date'] = strftime("%Y-%m-%d %H:%M:%S", gmtime())
1✔
1320

1321
    # Coordinates
1322
    ds.coords['time'] = ('time', time)
1✔
1323
    ds.coords['rgi_id'] = ('rgi_id', rgi_ids)
1✔
1324
    ds.coords['calendar_year'] = ('time', cyrs.data)
1✔
1325
    ds.coords['calendar_month'] = ('time', cmonths.data)
1✔
1326
    ds.coords['hydro_year'] = ('time', hyrs)
1✔
1327
    ds.coords['hydro_month'] = ('time', hmonths)
1✔
1328
    ds['time'].attrs['description'] = 'Floating year'
1✔
1329
    ds['rgi_id'].attrs['description'] = 'RGI glacier identifier'
1✔
1330
    ds['hydro_year'].attrs['description'] = 'Hydrological year'
1✔
1331
    ds['hydro_month'].attrs['description'] = 'Hydrological month'
1✔
1332
    ds['calendar_year'].attrs['description'] = 'Calendar year'
1✔
1333
    ds['calendar_month'].attrs['description'] = 'Calendar month'
1✔
1334

1335
    shape = (len(time), len(rgi_ids))
1✔
1336
    temp = np.zeros(shape) * np.NaN
1✔
1337
    prcp = np.zeros(shape) * np.NaN
1✔
1338

1339
    ref_hgt = np.zeros(len(rgi_ids)) * np.NaN
1✔
1340
    ref_pix_lon = np.zeros(len(rgi_ids)) * np.NaN
1✔
1341
    ref_pix_lat = np.zeros(len(rgi_ids)) * np.NaN
1✔
1342

1343
    for i, gdir in enumerate(gdirs):
1✔
1344
        try:
1✔
1345
            ppath = gdir.get_filepath(filename=filename,
1✔
1346
                                      filesuffix=input_filesuffix)
1347
            with xr.open_dataset(ppath) as ds_clim:
1✔
1348
                prcp[:, i] = ds_clim.prcp.values
1✔
1349
                temp[:, i] = ds_clim.temp.values
1✔
1350
                ref_hgt[i] = ds_clim.ref_hgt
1✔
1351
                ref_pix_lon[i] = ds_clim.ref_pix_lon
1✔
1352
                ref_pix_lat[i] = ds_clim.ref_pix_lat
1✔
1353
        except BaseException:
×
1354
            pass
×
1355

1356
    ds['temp'] = (('time', 'rgi_id'), temp)
1✔
1357
    ds['temp'].attrs['units'] = 'DegC'
1✔
1358
    ds['temp'].attrs['description'] = '2m Temperature at height ref_hgt'
1✔
1359
    ds['prcp'] = (('time', 'rgi_id'), prcp)
1✔
1360
    ds['prcp'].attrs['units'] = 'kg m-2'
1✔
1361
    ds['prcp'].attrs['description'] = 'total monthly precipitation amount'
1✔
1362
    ds['ref_hgt'] = ('rgi_id', ref_hgt)
1✔
1363
    ds['ref_hgt'].attrs['units'] = 'm'
1✔
1364
    ds['ref_hgt'].attrs['description'] = 'reference height'
1✔
1365
    ds['ref_pix_lon'] = ('rgi_id', ref_pix_lon)
1✔
1366
    ds['ref_pix_lon'].attrs['description'] = 'longitude'
1✔
1367
    ds['ref_pix_lat'] = ('rgi_id', ref_pix_lat)
1✔
1368
    ds['ref_pix_lat'].attrs['description'] = 'latitude'
1✔
1369

1370
    if path:
1✔
1371
        enc_var = {'dtype': 'float32'}
1✔
1372
        if use_compression:
1✔
1373
            enc_var['complevel'] = 5
1✔
1374
            enc_var['zlib'] = True
1✔
1375
        vars = ['temp', 'prcp']
1✔
1376
        encoding = {v: enc_var for v in vars}
1✔
1377
        ds.to_netcdf(path, encoding=encoding)
1✔
1378
    return ds
1✔
1379

1380

1381
@global_task(log)
9✔
1382
def compile_task_log(gdirs, task_names=[], filesuffix='', path=True,
9✔
1383
                     append=True):
1384
    """Gathers the log output for the selected task(s)
1385

1386
    Parameters
1387
    ----------
1388
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1389
        the glacier directories to process
1390
    task_names : list of str
1391
        The tasks to check for
1392
    filesuffix : str
1393
        add suffix to output file
1394
    path:
1395
        Set to `True` in order  to store the info in the working directory
1396
        Set to a path to store the file to your chosen location
1397
        Set to `False` to omit disk storage
1398
    append:
1399
        If a task log file already exists in the working directory, the new
1400
        logs will be added to the existing file
1401

1402
    Returns
1403
    -------
1404
    out : :py:class:`pandas.DataFrame`
1405
        log output
1406
    """
1407

1408
    out_df = []
3✔
1409
    for gdir in gdirs:
3✔
1410
        d = OrderedDict()
3✔
1411
        d['rgi_id'] = gdir.rgi_id
3✔
1412
        for task_name in task_names:
3✔
1413
            ts = gdir.get_task_status(task_name)
3✔
1414
            if ts is None:
3✔
1415
                ts = ''
1✔
1416
            d[task_name] = ts.replace(',', ' ')
3✔
1417
        out_df.append(d)
3✔
1418

1419
    out = pd.DataFrame(out_df).set_index('rgi_id')
3✔
1420
    if path:
3✔
1421
        if path is True:
3✔
1422
            path = os.path.join(cfg.PATHS['working_dir'],
3✔
1423
                                'task_log' + filesuffix + '.csv')
1424
        if os.path.exists(path) and append:
3✔
1425
            odf = pd.read_csv(path, index_col=0)
1✔
1426
            out = odf.join(out, rsuffix='_n')
1✔
1427
        out.to_csv(path)
3✔
1428
    return out
3✔
1429

1430

1431
@global_task(log)
9✔
1432
def compile_task_time(gdirs, task_names=[], filesuffix='', path=True,
9✔
1433
                      append=True):
1434
    """Gathers the time needed for the selected task(s) to run
1435

1436
    Parameters
1437
    ----------
1438
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1439
        the glacier directories to process
1440
    task_names : list of str
1441
        The tasks to check for
1442
    filesuffix : str
1443
        add suffix to output file
1444
    path:
1445
        Set to `True` in order  to store the info in the working directory
1446
        Set to a path to store the file to your chosen location
1447
        Set to `False` to omit disk storage
1448
    append:
1449
        If a task log file already exists in the working directory, the new
1450
        logs will be added to the existing file
1451

1452
    Returns
1453
    -------
1454
    out : :py:class:`pandas.DataFrame`
1455
        log output
1456
    """
1457

1458
    out_df = []
1✔
1459
    for gdir in gdirs:
1✔
1460
        d = OrderedDict()
1✔
1461
        d['rgi_id'] = gdir.rgi_id
1✔
1462
        for task_name in task_names:
1✔
1463
            d[task_name] = gdir.get_task_time(task_name)
1✔
1464
        out_df.append(d)
1✔
1465

1466
    out = pd.DataFrame(out_df).set_index('rgi_id')
1✔
1467
    if path:
1✔
1468
        if path is True:
1✔
1469
            path = os.path.join(cfg.PATHS['working_dir'],
1✔
1470
                                'task_time' + filesuffix + '.csv')
1471
        if os.path.exists(path) and append:
1✔
1472
            odf = pd.read_csv(path, index_col=0)
1✔
1473
            out = odf.join(out, rsuffix='_n')
1✔
1474
        out.to_csv(path)
1✔
1475
    return out
1✔
1476

1477

1478
@entity_task(log)
9✔
1479
def glacier_statistics(gdir, inversion_only=False, apply_func=None):
9✔
1480
    """Gather as much statistics as possible about this glacier.
1481

1482
    It can be used to do result diagnostics and other stuffs. If the data
1483
    necessary for a statistic is not available (e.g.: flowlines length) it
1484
    will simply be ignored.
1485

1486
    Parameters
1487
    ----------
1488
    inversion_only : bool
1489
        if one wants to summarize the inversion output only (including calving)
1490
    apply_func : function
1491
        if one wants to summarize further information about a glacier, set
1492
        this kwarg to a function that accepts a glacier directory as first
1493
        positional argument, and the directory to fill in with data as
1494
        second argument. The directory should only store scalar values (strings,
1495
        float, int)
1496
    """
1497

1498
    d = OrderedDict()
5✔
1499

1500
    # Easy stats - this should always be possible
1501
    d['rgi_id'] = gdir.rgi_id
5✔
1502
    d['rgi_region'] = gdir.rgi_region
5✔
1503
    d['rgi_subregion'] = gdir.rgi_subregion
5✔
1504
    d['name'] = gdir.name
5✔
1505
    d['cenlon'] = gdir.cenlon
5✔
1506
    d['cenlat'] = gdir.cenlat
5✔
1507
    d['rgi_area_km2'] = gdir.rgi_area_km2
5✔
1508
    d['rgi_year'] = gdir.rgi_date
5✔
1509
    d['glacier_type'] = gdir.glacier_type
5✔
1510
    d['terminus_type'] = gdir.terminus_type
5✔
1511
    d['is_tidewater'] = gdir.is_tidewater
5✔
1512
    d['status'] = gdir.status
5✔
1513

1514
    # The rest is less certain. We put these in a try block and see
1515
    # We're good with any error - we store the dict anyway below
1516
    # TODO: should be done with more preselected errors
1517
    with warnings.catch_warnings():
5✔
1518
        warnings.filterwarnings("ignore", category=RuntimeWarning)
5✔
1519

1520
        try:
5✔
1521
            # Inversion
1522
            if gdir.has_file('inversion_output'):
5✔
1523
                vol = []
4✔
1524
                vol_bsl = []
4✔
1525
                vol_bwl = []
4✔
1526
                cl = gdir.read_pickle('inversion_output')
4✔
1527
                for c in cl:
4✔
1528
                    vol.extend(c['volume'])
4✔
1529
                    vol_bsl.extend(c.get('volume_bsl', [0]))
4✔
1530
                    vol_bwl.extend(c.get('volume_bwl', [0]))
4✔
1531
                d['inv_volume_km3'] = np.nansum(vol) * 1e-9
4✔
1532
                area = gdir.rgi_area_km2
4✔
1533
                d['vas_volume_km3'] = 0.034 * (area ** 1.375)
4✔
1534
                # BSL / BWL
1535
                d['inv_volume_bsl_km3'] = np.nansum(vol_bsl) * 1e-9
4✔
1536
                d['inv_volume_bwl_km3'] = np.nansum(vol_bwl) * 1e-9
4✔
1537
        except BaseException:
×
1538
            pass
×
1539

1540
        try:
5✔
1541
            # Diagnostics
1542
            diags = gdir.get_diagnostics()
5✔
1543
            for k, v in diags.items():
5✔
1544
                d[k] = v
5✔
1545
        except BaseException:
×
1546
            pass
×
1547

1548
        if inversion_only:
5✔
1549
            return d
×
1550

1551
        try:
5✔
1552
            # Error log
1553
            errlog = gdir.get_error_log()
5✔
1554
            if errlog is not None:
5✔
1555
                d['error_task'] = errlog.split(';')[-2]
2✔
1556
                d['error_msg'] = errlog.split(';')[-1]
2✔
1557
            else:
1558
                d['error_task'] = None
5✔
1559
                d['error_msg'] = None
5✔
1560
        except BaseException:
×
1561
            pass
×
1562

1563
        try:
5✔
1564
            # Masks related stuff
1565
            fpath = gdir.get_filepath('gridded_data')
5✔
1566
            with ncDataset(fpath) as nc:
5✔
1567
                mask = nc.variables['glacier_mask'][:] == 1
5✔
1568
                topo = nc.variables['topo'][:][mask]
5✔
1569
            d['dem_mean_elev'] = np.mean(topo)
5✔
1570
            d['dem_med_elev'] = np.median(topo)
5✔
1571
            d['dem_min_elev'] = np.min(topo)
5✔
1572
            d['dem_max_elev'] = np.max(topo)
5✔
1573
        except BaseException:
2✔
1574
            pass
2✔
1575

1576
        try:
5✔
1577
            # Ext related stuff
1578
            fpath = gdir.get_filepath('gridded_data')
5✔
1579
            with ncDataset(fpath) as nc:
5✔
1580
                ext = nc.variables['glacier_ext'][:] == 1
5✔
1581
                mask = nc.variables['glacier_mask'][:] == 1
5✔
1582
                topo = nc.variables['topo'][:]
5✔
1583
            d['dem_max_elev_on_ext'] = np.max(topo[ext])
5✔
1584
            d['dem_min_elev_on_ext'] = np.min(topo[ext])
5✔
1585
            a = np.sum(mask & (topo > d['dem_max_elev_on_ext']))
5✔
1586
            d['dem_perc_area_above_max_elev_on_ext'] = a / np.sum(mask)
5✔
1587
            # Terminus loc
1588
            j, i = np.nonzero((topo[ext].min() == topo) & ext)
5✔
1589
            lon, lat = gdir.grid.ij_to_crs(i[0], j[0], crs=salem.wgs84)
5✔
1590
            d['terminus_lon'] = lon
5✔
1591
            d['terminus_lat'] = lat
5✔
1592
        except BaseException:
2✔
1593
            pass
2✔
1594

1595
        try:
5✔
1596
            # Centerlines
1597
            cls = gdir.read_pickle('centerlines')
5✔
1598
            longest = 0.
5✔
1599
            for cl in cls:
5✔
1600
                longest = np.max([longest, cl.dis_on_line[-1]])
5✔
1601
            d['n_centerlines'] = len(cls)
5✔
1602
            d['longest_centerline_km'] = longest * gdir.grid.dx / 1000.
5✔
1603
        except BaseException:
2✔
1604
            pass
2✔
1605

1606
        try:
5✔
1607
            # Flowline related stuff
1608
            h = np.array([])
5✔
1609
            widths = np.array([])
5✔
1610
            slope = np.array([])
5✔
1611
            fls = gdir.read_pickle('inversion_flowlines')
5✔
1612
            dx = fls[0].dx * gdir.grid.dx
5✔
1613
            for fl in fls:
5✔
1614
                hgt = fl.surface_h
5✔
1615
                h = np.append(h, hgt)
5✔
1616
                widths = np.append(widths, fl.widths * gdir.grid.dx)
5✔
1617
                slope = np.append(slope, np.arctan(-np.gradient(hgt, dx)))
4✔
1618
                length = len(hgt) * dx
4✔
1619
            d['main_flowline_length'] = length
4✔
1620
            d['inv_flowline_glacier_area'] = np.sum(widths * dx)
4✔
1621
            d['flowline_mean_elev'] = np.average(h, weights=widths)
4✔
1622
            d['flowline_max_elev'] = np.max(h)
4✔
1623
            d['flowline_min_elev'] = np.min(h)
4✔
1624
            d['flowline_avg_slope'] = np.mean(slope)
4✔
1625
            d['flowline_avg_width'] = np.mean(widths)
4✔
1626
            d['flowline_last_width'] = fls[-1].widths[-1] * gdir.grid.dx
4✔
1627
            d['flowline_last_5_widths'] = np.mean(fls[-1].widths[-5:] *
4✔
1628
                                                  gdir.grid.dx)
1629
        except BaseException:
2✔
1630
            pass
2✔
1631

1632
        try:
5✔
1633
            # climate
1634
            info = gdir.get_climate_info()
5✔
1635
            for k, v in info.items():
5✔
1636
                d[k] = v
5✔
1637
        except BaseException:
×
1638
            pass
×
1639

1640
        try:
5✔
1641
            # MB calib
1642
            mb_calib = gdir.read_json('mb_calib')
5✔
1643
            for k, v in mb_calib.items():
4✔
1644
                if np.isscalar(v):
4✔
1645
                    d[k] = v
4✔
1646
                else:
1647
                    for k2, v2 in v.items():
4✔
1648
                        d[k2] = v2
3✔
1649
        except BaseException:
3✔
1650
            pass
3✔
1651

1652
        if apply_func:
5✔
1653
            # User defined statistics
1654
            try:
2✔
1655
                apply_func(gdir, d)
2✔
1656
            except BaseException:
×
1657
                pass
×
1658

1659
    return d
5✔
1660

1661

1662
@global_task(log)
9✔
1663
def compile_glacier_statistics(gdirs, filesuffix='', path=True,
9✔
1664
                               inversion_only=False, apply_func=None):
1665
    """Gather as much statistics as possible about a list of glaciers.
1666

1667
    It can be used to do result diagnostics and other stuffs. If the data
1668
    necessary for a statistic is not available (e.g.: flowlines length) it
1669
    will simply be ignored.
1670

1671
    Parameters
1672
    ----------
1673
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1674
        the glacier directories to process
1675
    filesuffix : str
1676
        add suffix to output file
1677
    path : str, bool
1678
        Set to "True" in order  to store the info in the working directory
1679
        Set to a path to store the file to your chosen location
1680
    inversion_only : bool
1681
        if one wants to summarize the inversion output only (including calving)
1682
    apply_func : function
1683
        if one wants to summarize further information about a glacier, set
1684
        this kwarg to a function that accepts a glacier directory as first
1685
        positional argument, and the directory to fill in with data as
1686
        second argument. The directory should only store scalar values (strings,
1687
        float, int).
1688
        !Careful! For multiprocessing, the function cannot be located at the
1689
        top level, i.e. you may need to import it from a module for this to work,
1690
        or from a dummy class (https://stackoverflow.com/questions/8804830)
1691
    """
1692
    from oggm.workflow import execute_entity_task
6✔
1693

1694
    out_df = execute_entity_task(glacier_statistics, gdirs,
6✔
1695
                                 apply_func=apply_func,
1696
                                 inversion_only=inversion_only)
1697

1698
    out = pd.DataFrame(out_df).set_index('rgi_id')
6✔
1699

1700
    if path:
6✔
1701
        if path is True:
6✔
1702
            out.to_csv(os.path.join(cfg.PATHS['working_dir'],
5✔
1703
                                    ('glacier_statistics' +
1704
                                     filesuffix + '.csv')))
1705
        else:
1706
            out.to_csv(path)
2✔
1707
    return out
6✔
1708

1709

1710
@global_task(log)
9✔
1711
def compile_fixed_geometry_mass_balance(gdirs, filesuffix='',
9✔
1712
                                        path=True, csv=False,
1713
                                        use_inversion_flowlines=True,
1714
                                        ys=None, ye=None, years=None,
1715
                                        climate_filename='climate_historical',
1716
                                        climate_input_filesuffix='',
1717
                                        temperature_bias=None,
1718
                                        precipitation_factor=None):
1719

1720
    """Compiles a table of specific mass balance timeseries for all glaciers.
1721

1722
    The file is stored in a hdf file (not csv) per default. Use pd.read_hdf
1723
    to open it.
1724

1725
    Parameters
1726
    ----------
1727
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1728
        the glacier directories to process
1729
    filesuffix : str
1730
        add suffix to output file
1731
    path : str, bool
1732
        Set to "True" in order  to store the info in the working directory
1733
        Set to a path to store the file to your chosen location (file
1734
        extension matters)
1735
    csv : bool
1736
        Set to store the data in csv instead of hdf.
1737
    use_inversion_flowlines : bool
1738
        whether to use the inversion flowlines or the model flowlines
1739
    ys : int
1740
        start year of the model run (default: from the climate file)
1741
        date)
1742
    ye : int
1743
        end year of the model run (default: from the climate file)
1744
    years : array of ints
1745
        override ys and ye with the years of your choice
1746
    climate_filename : str
1747
        name of the climate file, e.g. 'climate_historical' (default) or
1748
        'gcm_data'
1749
    climate_input_filesuffix: str
1750
        filesuffix for the input climate file
1751
    temperature_bias : float
1752
        add a bias to the temperature timeseries
1753
    precipitation_factor: float
1754
        multiply a factor to the precipitation time series
1755
        default is None and means that the precipitation factor from the
1756
        calibration is applied which is cfg.PARAMS['prcp_fac']
1757
    """
1758

1759
    from oggm.workflow import execute_entity_task
3✔
1760
    from oggm.core.massbalance import fixed_geometry_mass_balance
3✔
1761

1762
    out_df = execute_entity_task(fixed_geometry_mass_balance, gdirs,
3✔
1763
                                 use_inversion_flowlines=use_inversion_flowlines,
1764
                                 ys=ys, ye=ye, years=years, climate_filename=climate_filename,
1765
                                 climate_input_filesuffix=climate_input_filesuffix,
1766
                                 temperature_bias=temperature_bias,
1767
                                 precipitation_factor=precipitation_factor)
1768

1769
    for idx, s in enumerate(out_df):
3✔
1770
        if s is None:
3✔
1771
            out_df[idx] = pd.Series(np.NaN)
×
1772

1773
    out = pd.concat(out_df, axis=1, keys=[gd.rgi_id for gd in gdirs])
3✔
1774
    out = out.dropna(axis=0, how='all')
3✔
1775

1776
    if path:
3✔
1777
        if path is True:
3✔
1778
            fpath = os.path.join(cfg.PATHS['working_dir'],
1✔
1779
                                 'fixed_geometry_mass_balance' + filesuffix)
1780
            if csv:
1✔
1781
                out.to_csv(fpath + '.csv')
×
1782
            else:
1783
                out.to_hdf(fpath + '.hdf', key='df')
1✔
1784
        else:
1785
            ext = os.path.splitext(path)[-1]
2✔
1786
            if ext.lower() == '.csv':
2✔
1787
                out.to_csv(path)
2✔
1788
            elif ext.lower() == '.hdf':
×
1789
                out.to_hdf(path, key='df')
×
1790
    return out
3✔
1791

1792

1793
@global_task(log)
9✔
1794
def compile_ela(gdirs, filesuffix='', path=True, csv=False, ys=None, ye=None,
9✔
1795
                years=None, climate_filename='climate_historical', temperature_bias=None,
1796
                precipitation_factor=None, climate_input_filesuffix='',
1797
                mb_model_class=None):
1798
    """Compiles a table of ELA timeseries for all glaciers for a given years,
1799
    using the mb_model_class (default MonthlyTIModel).
1800

1801
    The file is stored in a hdf file (not csv) per default. Use pd.read_hdf
1802
    to open it.
1803

1804
    Parameters
1805
    ----------
1806
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1807
        the glacier directories to process
1808
    filesuffix : str
1809
        add suffix to output file
1810
    path : str, bool
1811
        Set to "True" in order  to store the info in the working directory
1812
        Set to a path to store the file to your chosen location (file
1813
        extension matters)
1814
    csv: bool
1815
        Set to store the data in csv instead of hdf.
1816
    ys : int
1817
        start year
1818
    ye : int
1819
        end year
1820
    years : array of ints
1821
        override ys and ye with the years of your choice
1822
    climate_filename : str
1823
        name of the climate file, e.g. 'climate_historical' (default) or
1824
        'gcm_data'
1825
    climate_input_filesuffix : str
1826
        filesuffix for the input climate file
1827
    temperature_bias : float
1828
        add a bias to the temperature timeseries
1829
    precipitation_factor: float
1830
        multiply a factor to the precipitation time series
1831
        default is None and means that the precipitation factor from the
1832
        calibration is applied which is cfg.PARAMS['prcp_fac']
1833
    mb_model_class : MassBalanceModel class
1834
        the MassBalanceModel class to use, default is MonthlyTIModel
1835
    """
1836
    from oggm.workflow import execute_entity_task
1✔
1837
    from oggm.core.massbalance import compute_ela, MonthlyTIModel
1✔
1838

1839
    if mb_model_class is None:
1✔
1840
        mb_model_class = MonthlyTIModel
1✔
1841

1842
    out_df = execute_entity_task(compute_ela, gdirs, ys=ys, ye=ye, years=years,
1✔
1843
                                 climate_filename=climate_filename,
1844
                                 climate_input_filesuffix=climate_input_filesuffix,
1845
                                 temperature_bias=temperature_bias,
1846
                                 precipitation_factor=precipitation_factor,
1847
                                 mb_model_class=mb_model_class)
1848

1849
    for idx, s in enumerate(out_df):
1✔
1850
        if s is None:
1✔
1851
            out_df[idx] = pd.Series(np.NaN)
×
1852

1853
    out = pd.concat(out_df, axis=1, keys=[gd.rgi_id for gd in gdirs])
1✔
1854
    out = out.dropna(axis=0, how='all')
1✔
1855

1856
    if path:
1✔
1857
        if path is True:
1✔
1858
            fpath = os.path.join(cfg.PATHS['working_dir'],
1✔
1859
                                 'ELA' + filesuffix)
1860
            if csv:
1✔
1861
                out.to_csv(fpath + '.csv')
1✔
1862
            else:
1863
                out.to_hdf(fpath + '.hdf', key='df')
1✔
1864
        else:
1865
            ext = os.path.splitext(path)[-1]
×
1866
            if ext.lower() == '.csv':
×
1867
                out.to_csv(path)
×
1868
            elif ext.lower() == '.hdf':
×
1869
                out.to_hdf(path, key='df')
×
1870
    return out
1✔
1871

1872

1873
@entity_task(log)
9✔
1874
def climate_statistics(gdir, add_climate_period=1995, halfsize=15,
9✔
1875
                       input_filesuffix=''):
1876
    """Gather as much statistics as possible about this glacier.
1877

1878
    It can be used to do result diagnostics and other stuffs. If the data
1879
    necessary for a statistic is not available (e.g.: flowlines length) it
1880
    will simply be ignored.
1881

1882
    Important note: the climate is extracted from the mass-balance model and
1883
    is therefore "corrected" according to the mass-balance calibration scheme
1884
    (e.g. the precipitation factor and the temp bias correction). For more
1885
    flexible information about the raw climate data, use `compile_climate_input`
1886
    or `raw_climate_statistics`.
1887

1888
    Parameters
1889
    ----------
1890
    add_climate_period : int or list of ints
1891
        compile climate statistics for the halfsize*2 + 1 yrs period
1892
        around the selected date.
1893
    halfsize : int
1894
        the half size of the window
1895
    """
1896
    from oggm.core.massbalance import (ConstantMassBalance,
2✔
1897
                                       MultipleFlowlineMassBalance)
1898

1899
    d = OrderedDict()
2✔
1900

1901
    # Easy stats - this should always be possible
1902
    d['rgi_id'] = gdir.rgi_id
2✔
1903
    d['rgi_region'] = gdir.rgi_region
2✔
1904
    d['rgi_subregion'] = gdir.rgi_subregion
2✔
1905
    d['name'] = gdir.name
2✔
1906
    d['cenlon'] = gdir.cenlon
2✔
1907
    d['cenlat'] = gdir.cenlat
2✔
1908
    d['rgi_area_km2'] = gdir.rgi_area_km2
2✔
1909
    d['glacier_type'] = gdir.glacier_type
2✔
1910
    d['terminus_type'] = gdir.terminus_type
2✔
1911
    d['status'] = gdir.status
2✔
1912

1913
    # The rest is less certain
1914
    with warnings.catch_warnings():
2✔
1915
        warnings.filterwarnings("ignore", category=RuntimeWarning)
2✔
1916
        try:
2✔
1917
            # Flowline related stuff
1918
            h = np.array([])
2✔
1919
            widths = np.array([])
2✔
1920
            fls = gdir.read_pickle('inversion_flowlines')
2✔
1921
            dx = fls[0].dx * gdir.grid.dx
2✔
1922
            for fl in fls:
2✔
1923
                hgt = fl.surface_h
2✔
1924
                h = np.append(h, hgt)
2✔
1925
                widths = np.append(widths, fl.widths * dx)
2✔
1926
            d['flowline_mean_elev'] = np.average(h, weights=widths)
2✔
1927
            d['flowline_max_elev'] = np.max(h)
2✔
1928
            d['flowline_min_elev'] = np.min(h)
2✔
1929
        except BaseException:
1✔
1930
            pass
1✔
1931

1932
        # Climate and MB at specified dates
1933
        add_climate_period = tolist(add_climate_period)
2✔
1934
        for y0 in add_climate_period:
2✔
1935
            try:
2✔
1936
                fs = '{}-{}'.format(y0 - halfsize, y0 + halfsize)
2✔
1937
                mbcl = ConstantMassBalance
2✔
1938
                mbmod = MultipleFlowlineMassBalance(gdir, mb_model_class=mbcl,
2✔
1939
                                                    y0=y0, halfsize=halfsize,
1940
                                                    use_inversion_flowlines=True,
1941
                                                    input_filesuffix=input_filesuffix)
1942
                h, w, mbh = mbmod.get_annual_mb_on_flowlines()
2✔
1943
                mbh = mbh * cfg.SEC_IN_YEAR * cfg.PARAMS['ice_density']
2✔
1944
                pacc = np.where(mbh >= 0)
2✔
1945
                pab = np.where(mbh < 0)
2✔
1946
                d[fs + '_aar'] = np.sum(w[pacc]) / np.sum(w)
2✔
1947
                try:
2✔
1948
                    # Try to get the slope
1949
                    mb_slope, _, _, _, _ = stats.linregress(h[pab], mbh[pab])
2✔
1950
                    d[fs + '_mb_grad'] = mb_slope
2✔
1951
                except BaseException:
×
1952
                    # we don't mind if something goes wrong
1953
                    d[fs + '_mb_grad'] = np.NaN
×
1954
                d[fs + '_ela_h'] = mbmod.get_ela()
2✔
1955
                # Climate
1956
                t, tm, p, ps = mbmod.flowline_mb_models[0].get_annual_climate(
2✔
1957
                    [d[fs + '_ela_h'],
1958
                     d['flowline_mean_elev'],
1959
                     d['flowline_max_elev'],
1960
                     d['flowline_min_elev']])
1961
                for n, v in zip(['temp', 'tempmelt', 'prcpsol'], [t, tm, ps]):
2✔
1962
                    d[fs + '_avg_' + n + '_ela_h'] = v[0]
2✔
1963
                    d[fs + '_avg_' + n + '_mean_elev'] = v[1]
2✔
1964
                    d[fs + '_avg_' + n + '_max_elev'] = v[2]
2✔
1965
                    d[fs + '_avg_' + n + '_min_elev'] = v[3]
2✔
1966
                d[fs + '_avg_prcp'] = p[0]
2✔
1967
            except BaseException:
1✔
1968
                pass
1✔
1969

1970
    return d
2✔
1971

1972

1973
@entity_task(log)
9✔
1974
def raw_climate_statistics(gdir, add_climate_period=1995, halfsize=15,
9✔
1975
                           input_filesuffix=''):
1976
    """Gather as much statistics as possible about this glacier.
1977

1978
    This is like "climate_statistics" but without relying on the
1979
    mass-balance model, i.e. closer to the actual data (uncorrected)
1980

1981
    Parameters
1982
    ----------
1983
    add_climate_period : int or list of ints
1984
        compile climate statistics for the 30 yrs period around the selected
1985
        date.
1986
    """
1987
    d = OrderedDict()
1✔
1988
    # Easy stats - this should always be possible
1989
    d['rgi_id'] = gdir.rgi_id
1✔
1990
    d['rgi_region'] = gdir.rgi_region
1✔
1991
    d['rgi_subregion'] = gdir.rgi_subregion
1✔
1992
    d['name'] = gdir.name
1✔
1993
    d['cenlon'] = gdir.cenlon
1✔
1994
    d['cenlat'] = gdir.cenlat
1✔
1995
    d['rgi_area_km2'] = gdir.rgi_area_km2
1✔
1996
    d['glacier_type'] = gdir.glacier_type
1✔
1997
    d['terminus_type'] = gdir.terminus_type
1✔
1998
    d['status'] = gdir.status
1✔
1999

2000
    # The rest is less certain
2001
    with warnings.catch_warnings():
1✔
2002
        warnings.filterwarnings("ignore", category=RuntimeWarning)
1✔
2003

2004
        # Climate and MB at specified dates
2005
        add_climate_period = tolist(add_climate_period)
1✔
2006

2007
        # get non-corrected winter daily mean prcp (kg m-2 day-1) for
2008
        # the chosen time period
2009
        for y0 in add_climate_period:
1✔
2010
            fs = '{}-{}'.format(y0 - halfsize, y0 + halfsize)
1✔
2011
            try:
1✔
2012
                # get non-corrected winter daily mean prcp (kg m-2 day-1)
2013
                # it is easier to get this directly from the raw climate files
2014
                fp = gdir.get_filepath('climate_historical',
1✔
2015
                                       filesuffix=input_filesuffix)
2016
                with xr.open_dataset(fp).prcp as ds_pr:
1✔
2017
                    # just select winter months
2018
                    if gdir.hemisphere == 'nh':
1✔
2019
                        m_winter = [10, 11, 12, 1, 2, 3, 4]
1✔
2020
                    else:
2021
                        m_winter = [4, 5, 6, 7, 8, 9, 10]
×
2022
                    ds_pr_winter = ds_pr.where(ds_pr['time.month'].isin(m_winter), drop=True)
1✔
2023
                    # select the correct year time period
2024
                    ds_pr_winter = ds_pr_winter.sel(time=slice(f'{fs[:4]}-01-01',
1✔
2025
                                                               f'{fs[-4:]}-12-01'))
2026
                    # check if we have the full time period
2027
                    n_years = int(fs[-4:]) - int(fs[:4]) + 1
1✔
2028
                    assert len(ds_pr_winter.time) == n_years * 7, 'chosen time-span invalid'
1✔
2029
                    ds_d_pr_winter_mean = (ds_pr_winter / ds_pr_winter.time.dt.daysinmonth).mean()
1✔
2030
                    d[f'{fs}_uncorrected_winter_daily_mean_prcp'] = ds_d_pr_winter_mean.values
1✔
2031
            except BaseException:
1✔
2032
                pass
1✔
2033
    return d
1✔
2034

2035

2036
@global_task(log)
9✔
2037
def compile_climate_statistics(gdirs, filesuffix='', path=True,
9✔
2038
                               add_climate_period=1995,
2039
                               halfsize=15,
2040
                               add_raw_climate_statistics=False,
2041
                               input_filesuffix=''):
2042
    """Gather as much statistics as possible about a list of glaciers.
2043

2044
    It can be used to do result diagnostics and other stuffs. If the data
2045
    necessary for a statistic is not available (e.g.: flowlines length) it
2046
    will simply be ignored.
2047

2048
    Parameters
2049
    ----------
2050
    gdirs: the list of GlacierDir to process.
2051
    filesuffix : str
2052
        add suffix to output file
2053
    path : str, bool
2054
        Set to "True" in order  to store the info in the working directory
2055
        Set to a path to store the file to your chosen location
2056
    add_climate_period : int or list of ints
2057
        compile climate statistics for the 30 yrs period around the selected
2058
        date.
2059
    input_filesuffix : str
2060
        filesuffix of the used climate_historical file, default is no filesuffix
2061
    """
2062
    from oggm.workflow import execute_entity_task
3✔
2063

2064
    out_df = execute_entity_task(climate_statistics, gdirs,
3✔
2065
                                 add_climate_period=add_climate_period,
2066
                                 halfsize=halfsize,
2067
                                 input_filesuffix=input_filesuffix)
2068
    out = pd.DataFrame(out_df).set_index('rgi_id')
3✔
2069

2070
    if add_raw_climate_statistics:
3✔
2071
        out_df = execute_entity_task(raw_climate_statistics, gdirs,
1✔
2072
                                     add_climate_period=add_climate_period,
2073
                                     halfsize=halfsize,
2074
                                     input_filesuffix=input_filesuffix)
2075
        out = out.merge(pd.DataFrame(out_df).set_index('rgi_id'))
1✔
2076

2077
    if path:
3✔
2078
        if path is True:
3✔
2079
            out.to_csv(os.path.join(cfg.PATHS['working_dir'],
3✔
2080
                                    ('climate_statistics' +
2081
                                     filesuffix + '.csv')))
2082
        else:
2083
            out.to_csv(path)
1✔
2084
    return out
3✔
2085

2086

2087
def extend_past_climate_run(past_run_file=None,
9✔
2088
                            fixed_geometry_mb_file=None,
2089
                            glacier_statistics_file=None,
2090
                            path=False,
2091
                            use_compression=True):
2092
    """Utility function to extend past MB runs prior to the RGI date.
2093

2094
    We use a fixed geometry (and a fixed calving rate) for all dates prior
2095
    to the RGI date.
2096

2097
    This is not parallelized, i.e a bit slow.
2098

2099
    Parameters
2100
    ----------
2101
    past_run_file : str
2102
        path to the historical run (nc)
2103
    fixed_geometry_mb_file : str
2104
        path to the MB file (csv)
2105
    glacier_statistics_file : str
2106
        path to the glacier stats file (csv)
2107
    path : str
2108
        where to store the file
2109
    use_compression : bool
2110

2111
    Returns
2112
    -------
2113
    the extended dataset
2114
    """
2115

2116
    log.workflow('Applying extend_past_climate_run on '
2✔
2117
                 '{}'.format(past_run_file))
2118

2119
    fixed_geometry_mb_df = pd.read_csv(fixed_geometry_mb_file, index_col=0,
2✔
2120
                                       low_memory=False)
2121
    stats_df = pd.read_csv(glacier_statistics_file, index_col=0,
2✔
2122
                           low_memory=False)
2123

2124
    with xr.open_dataset(past_run_file) as past_ds:
2✔
2125

2126
        # We need at least area and vol to do something
2127
        if 'volume' not in past_ds.data_vars or 'area' not in past_ds.data_vars:
2✔
2128
            raise InvalidWorkflowError('Need both volume and area to proceed')
×
2129

2130
        y0_run = int(past_ds.time[0])
2✔
2131
        y1_run = int(past_ds.time[-1])
2✔
2132
        if (y1_run - y0_run + 1) != len(past_ds.time):
2✔
2133
            raise NotImplementedError('Currently only supports annual outputs')
2134
        y0_clim = int(fixed_geometry_mb_df.index[0])
2✔
2135
        y1_clim = int(fixed_geometry_mb_df.index[-1])
2✔
2136
        if y0_clim > y0_run or y1_clim < y0_run:
2✔
2137
            raise InvalidWorkflowError('Dates do not match.')
×
2138
        if y1_clim != y1_run - 1:
2✔
2139
            raise InvalidWorkflowError('Dates do not match.')
×
2140
        if len(past_ds.rgi_id) != len(fixed_geometry_mb_df.columns):
2✔
2141
            # This might happen if we are testing on new directories
2142
            fixed_geometry_mb_df = fixed_geometry_mb_df[past_ds.rgi_id]
×
2143
        if len(past_ds.rgi_id) != len(stats_df.index):
2✔
2144
            stats_df = stats_df.loc[past_ds.rgi_id]
×
2145

2146
        # Make sure we agree on order
2147
        df = fixed_geometry_mb_df[past_ds.rgi_id]
2✔
2148

2149
        # Output data
2150
        years = np.arange(y0_clim, y1_run+1)
2✔
2151
        ods = past_ds.reindex({'time': years})
2✔
2152

2153
        # Time
2154
        ods['hydro_year'].data[:] = years
2✔
2155
        ods['hydro_month'].data[:] = ods['hydro_month'][-1]
2✔
2156
        ods['calendar_year'].data[:] = years
2✔
2157
        ods['calendar_month'].data[:] = ods['calendar_month'][-1]
2✔
2158
        for vn in ['hydro_year', 'hydro_month', 'calendar_year', 'calendar_month']:
2✔
2159
            ods[vn] = ods[vn].astype(int)
2✔
2160

2161
        # New vars
2162
        for vn in ['volume', 'volume_m3_min_h', 'volume_bsl', 'volume_bwl',
2✔
2163
                   'area', 'area_m2_min_h', 'length', 'calving', 'calving_rate']:
2164
            if vn in ods.data_vars:
2✔
2165
                ods[vn + '_ext'] = ods[vn].copy(deep=True)
2✔
2166
                ods[vn + '_ext'].attrs['description'] += ' (extended with MB data)'
2✔
2167

2168
        vn = 'volume_fixed_geom_ext'
2✔
2169
        ods[vn] = ods['volume'].copy(deep=True)
2✔
2170
        ods[vn].attrs['description'] += ' (replaced with fixed geom data)'
2✔
2171

2172
        rho = cfg.PARAMS['ice_density']
2✔
2173
        # Loop over the ids
2174
        for i, rid in enumerate(ods.rgi_id.data):
2✔
2175
            # Both do not need to be same length but they need to start same
2176
            mb_ts = df.values[:, i]
2✔
2177
            orig_vol_ts = ods.volume_ext.data[:, i]
2✔
2178
            if not (np.isfinite(mb_ts[-1]) and np.isfinite(orig_vol_ts[-1])):
2✔
2179
                # Not a valid glacier
2180
                continue
×
2181
            if np.isfinite(orig_vol_ts[0]):
2✔
2182
                # Nothing to extend, really
2183
                continue
×
2184

2185
            # First valid id
2186
            fid = np.argmax(np.isfinite(orig_vol_ts))
2✔
2187

2188
            # Add calving to the mix
2189
            try:
2✔
2190
                calv_flux = stats_df.loc[rid, 'calving_flux'] * 1e9
2✔
2191
                calv_rate = stats_df.loc[rid, 'calving_rate_myr']
×
2192
            except KeyError:
2✔
2193
                calv_flux = 0
2✔
2194
                calv_rate = 0
2✔
2195
            if not np.isfinite(calv_flux):
2✔
2196
                calv_flux = 0
×
2197
            if not np.isfinite(calv_rate):
2✔
2198
                calv_rate = 0
×
2199

2200
            # Fill area and length which stays constant before date
2201
            orig_area_ts = ods.area_ext.data[:, i]
2✔
2202
            orig_area_ts[:fid] = orig_area_ts[fid]
2✔
2203

2204
            # We convert SMB to volume
2205
            mb_vol_ts = (mb_ts / rho * orig_area_ts[fid] - calv_flux).cumsum()
2✔
2206
            calv_ts = (mb_ts * 0 + calv_flux).cumsum()
2✔
2207

2208
            # The -1 is because the volume change is known at end of year
2209
            mb_vol_ts = mb_vol_ts + orig_vol_ts[fid] - mb_vol_ts[fid-1]
2✔
2210

2211
            # Now back to netcdf
2212
            ods.volume_fixed_geom_ext.data[1:, i] = mb_vol_ts
2✔
2213
            ods.volume_ext.data[1:fid, i] = mb_vol_ts[0:fid-1]
2✔
2214
            ods.area_ext.data[:, i] = orig_area_ts
2✔
2215

2216
            # Optional variables
2217
            if 'length' in ods.data_vars:
2✔
2218
                orig_length_ts = ods.length_ext.data[:, i]
2✔
2219
                orig_length_ts[:fid] = orig_length_ts[fid]
2✔
2220
                ods.length_ext.data[:, i] = orig_length_ts
2✔
2221

2222
            if 'calving' in ods.data_vars:
2✔
2223
                orig_calv_ts = ods.calving_ext.data[:, i]
1✔
2224
                # The -1 is because the volume change is known at end of year
2225
                calv_ts = calv_ts + orig_calv_ts[fid] - calv_ts[fid-1]
1✔
2226
                ods.calving_ext.data[1:fid, i] = calv_ts[0:fid-1]
1✔
2227

2228
            if 'calving_rate' in ods.data_vars:
2✔
2229
                orig_calv_rate_ts = ods.calving_rate_ext.data[:, i]
1✔
2230
                # +1 because calving rate at year 0 is unknown from the dyns model
2231
                orig_calv_rate_ts[:fid+1] = calv_rate
1✔
2232
                ods.calving_rate_ext.data[:, i] = orig_calv_rate_ts
1✔
2233

2234
            # Extend vol bsl by assuming that % stays constant
2235
            if 'volume_bsl' in ods.data_vars:
2✔
2236
                bsl = ods.volume_bsl.data[fid, i] / ods.volume.data[fid, i]
1✔
2237
                ods.volume_bsl_ext.data[:fid, i] = bsl * ods.volume_ext.data[:fid, i]
1✔
2238
            if 'volume_bwl' in ods.data_vars:
2✔
2239
                bwl = ods.volume_bwl.data[fid, i] / ods.volume.data[fid, i]
1✔
2240
                ods.volume_bwl_ext.data[:fid, i] = bwl * ods.volume_ext.data[:fid, i]
1✔
2241

2242
        # Remove old vars
2243
        for vn in list(ods.data_vars):
2✔
2244
            if '_ext' not in vn and 'time' in ods[vn].dims:
2✔
2245
                del ods[vn]
2✔
2246

2247
        # Rename vars to their old names
2248
        ods = ods.rename(dict((o, o.replace('_ext', ''))
2✔
2249
                              for o in ods.data_vars))
2250

2251
        # Remove t0 (which is NaN)
2252
        ods = ods.isel(time=slice(1, None))
2✔
2253

2254
        # To file?
2255
        if path:
2✔
2256
            enc_var = {'dtype': 'float32'}
2✔
2257
            if use_compression:
2✔
2258
                enc_var['complevel'] = 5
2✔
2259
                enc_var['zlib'] = True
2✔
2260
            encoding = {v: enc_var for v in ods.data_vars}
2✔
2261
            ods.to_netcdf(path, encoding=encoding)
2✔
2262

2263
    return ods
2✔
2264

2265

2266
def idealized_gdir(surface_h, widths_m, map_dx, flowline_dx=1,
9✔
2267
                   base_dir=None, reset=False):
2268
    """Creates a glacier directory with flowline input data only.
2269

2270
    This is useful for testing, or for idealized experiments.
2271

2272
    Parameters
2273
    ----------
2274
    surface_h : ndarray
2275
        the surface elevation of the flowline's grid points (in m).
2276
    widths_m : ndarray
2277
        the widths of the flowline's grid points (in m).
2278
    map_dx : float
2279
        the grid spacing (in m)
2280
    flowline_dx : int
2281
        the flowline grid spacing (in units of map_dx, often it should be 1)
2282
    base_dir : str
2283
        path to the directory where to open the directory.
2284
        Defaults to `cfg.PATHS['working_dir'] + /per_glacier/`
2285
    reset : bool, default=False
2286
        empties the directory at construction
2287

2288
    Returns
2289
    -------
2290
    a GlacierDirectory instance
2291
    """
2292

2293
    from oggm.core.centerlines import Centerline
1✔
2294

2295
    # Area from geometry
2296
    area_km2 = np.sum(widths_m * map_dx * flowline_dx) * 1e-6
1✔
2297

2298
    # Dummy entity - should probably also change the geometry
2299
    entity = gpd.read_file(get_demo_file('Hintereisferner_RGI5.shp')).iloc[0]
1✔
2300
    entity.Area = area_km2
1✔
2301
    entity.CenLat = 0
1✔
2302
    entity.CenLon = 0
1✔
2303
    entity.Name = ''
1✔
2304
    entity.RGIId = 'RGI50-00.00000'
1✔
2305
    entity.O1Region = '00'
1✔
2306
    entity.O2Region = '0'
1✔
2307
    gdir = GlacierDirectory(entity, base_dir=base_dir, reset=reset)
1✔
2308
    gdir.write_shapefile(gpd.GeoDataFrame([entity]), 'outlines')
1✔
2309

2310
    # Idealized flowline
2311
    coords = np.arange(0, len(surface_h) - 0.5, 1)
1✔
2312
    line = shpg.LineString(np.vstack([coords, coords * 0.]).T)
1✔
2313
    fl = Centerline(line, dx=flowline_dx, surface_h=surface_h, map_dx=map_dx)
1✔
2314
    fl.widths = widths_m / map_dx
1✔
2315
    fl.is_rectangular = np.ones(fl.nx).astype(bool)
1✔
2316
    gdir.write_pickle([fl], 'inversion_flowlines')
1✔
2317

2318
    # Idealized map
2319
    grid = salem.Grid(nxny=(1, 1), dxdy=(map_dx, map_dx), x0y0=(0, 0))
1✔
2320
    grid.to_json(gdir.get_filepath('glacier_grid'))
1✔
2321

2322
    return gdir
1✔
2323

2324

2325
def _back_up_retry(func, exceptions, max_count=5):
9✔
2326
    """Re-Try an action up to max_count times.
2327
    """
2328

2329
    count = 0
2✔
2330
    while count < max_count:
2✔
2331
        try:
2✔
2332
            if count > 0:
2✔
2333
                time.sleep(random.uniform(0.05, 0.1))
×
2334
            return func()
2✔
2335
        except exceptions:
1✔
2336
            count += 1
×
2337
            if count >= max_count:
×
2338
                raise
×
2339

2340

2341
def _robust_extract(to_dir, *args, **kwargs):
9✔
2342
    """For some obscure reason this operation randomly fails.
2343

2344
    Try to make it more robust.
2345
    """
2346

2347
    def func():
2✔
2348
        with tarfile.open(*args, **kwargs) as tf:
2✔
2349
            if not len(tf.getnames()):
2✔
2350
                raise RuntimeError("Empty tarfile")
×
2351
            tf.extractall(os.path.dirname(to_dir))
2✔
2352

2353
    _back_up_retry(func, FileExistsError)
2✔
2354

2355

2356
def robust_tar_extract(from_tar, to_dir, delete_tar=False):
9✔
2357
    """Extract a tar file - also checks for a "tar in tar" situation"""
2358

2359
    if os.path.isfile(from_tar):
2✔
2360
        _robust_extract(to_dir, from_tar, 'r')
1✔
2361
    else:
2362
        # maybe a tar in tar
2363
        base_tar = os.path.dirname(from_tar) + '.tar'
2✔
2364
        if not os.path.isfile(base_tar):
2✔
2365
            raise FileNotFoundError('Could not find a tarfile with path: '
×
2366
                                    '{}'.format(from_tar))
2367
        if delete_tar:
2✔
2368
            raise InvalidParamsError('Cannot delete tar in tar.')
×
2369
        # Open the tar
2370
        bname = os.path.basename(from_tar)
2✔
2371
        dirbname = os.path.basename(os.path.dirname(from_tar))
2✔
2372

2373
        def func():
2✔
2374
            with tarfile.open(base_tar, 'r') as tf:
2✔
2375
                i_from_tar = tf.getmember(os.path.join(dirbname, bname))
2✔
2376
                with tf.extractfile(i_from_tar) as fileobj:
2✔
2377
                    _robust_extract(to_dir, fileobj=fileobj)
2✔
2378

2379
        _back_up_retry(func, RuntimeError)
2✔
2380

2381
    if delete_tar:
2✔
2382
        os.remove(from_tar)
1✔
2383

2384

2385
class GlacierDirectory(object):
9✔
2386
    """Organizes read and write access to the glacier's files.
2387

2388
    It handles a glacier directory created in a base directory (default
2389
    is the "per_glacier" folder in the working directory). The role of a
2390
    GlacierDirectory is to give access to file paths and to I/O operations.
2391
    The user should not care about *where* the files are
2392
    located, but should know their name (see :ref:`basenames`).
2393

2394
    If the directory does not exist, it will be created.
2395

2396
    See :ref:`glacierdir` for more information.
2397

2398
    Attributes
2399
    ----------
2400
    dir : str
2401
        path to the directory
2402
    base_dir : str
2403
        path to the base directory
2404
    rgi_id : str
2405
        The glacier's RGI identifier
2406
    glims_id : str
2407
        The glacier's GLIMS identifier (when available)
2408
    rgi_area_km2 : float
2409
        The glacier's RGI area (km2)
2410
    cenlon, cenlat : float
2411
        The glacier centerpoint's lon/lat
2412
    rgi_date : int
2413
        The RGI's BGNDATE year attribute if available. Otherwise, defaults to
2414
        the median year for the RGI region
2415
    rgi_region : str
2416
        The RGI region ID
2417
    rgi_subregion : str
2418
        The RGI subregion ID
2419
    rgi_version : str
2420
        The RGI version name
2421
    rgi_region_name : str
2422
        The RGI region name
2423
    rgi_subregion_name : str
2424
        The RGI subregion name
2425
    name : str
2426
        The RGI glacier name (if available)
2427
    hemisphere : str
2428
        `nh` or `sh`
2429
    glacier_type : str
2430
        The RGI glacier type ('Glacier', 'Ice cap', 'Perennial snowfield',
2431
        'Seasonal snowfield')
2432
    terminus_type : str
2433
        The RGI terminus type ('Land-terminating', 'Marine-terminating',
2434
        'Lake-terminating', 'Dry calving', 'Regenerated', 'Shelf-terminating')
2435
    is_tidewater : bool
2436
        Is the glacier a calving glacier?
2437
    is_lake_terminating : bool
2438
        Is the glacier a lake terminating glacier?
2439
    is_nominal : bool
2440
        Is the glacier an RGI nominal glacier?
2441
    is_icecap : bool
2442
        Is the glacier an ice cap?
2443
    extent_ll : list
2444
        Extent of the glacier in lon/lat
2445
    logfile : str
2446
        Path to the log file (txt)
2447
    inversion_calving_rate : float
2448
        Calving rate used for the inversion
2449
    grid
2450
    dem_info
2451
    dem_daterange
2452
    intersects_ids
2453
    rgi_area_m2
2454
    rgi_area_km2
2455
    """
2456

2457
    def __init__(self, rgi_entity, base_dir=None, reset=False,
9✔
2458
                 from_tar=False, delete_tar=False):
2459
        """Creates a new directory or opens an existing one.
2460

2461
        Parameters
2462
        ----------
2463
        rgi_entity : a ``geopandas.GeoSeries`` or str
2464
            glacier entity read from the shapefile (or a valid RGI ID if the
2465
            directory exists)
2466
        base_dir : str
2467
            path to the directory where to open the directory.
2468
            Defaults to `cfg.PATHS['working_dir'] + /per_glacier/`
2469
        reset : bool, default=False
2470
            empties the directory at construction (careful!)
2471
        from_tar : str or bool, default=False
2472
            path to a tar file to extract the gdir data from. If set to `True`,
2473
            will check for a tar file at the expected location in `base_dir`.
2474
        delete_tar : bool, default=False
2475
            delete the original tar file after extraction.
2476
        """
2477

2478
        if base_dir is None:
7✔
2479
            if not cfg.PATHS.get('working_dir', None):
7✔
2480
                raise ValueError("Need a valid PATHS['working_dir']!")
×
2481
            base_dir = os.path.join(cfg.PATHS['working_dir'], 'per_glacier')
7✔
2482

2483
        # RGI IDs are also valid entries
2484
        if isinstance(rgi_entity, str):
7✔
2485
            # Get the meta from the shape file directly
2486
            if from_tar:
3✔
2487
                _dir = os.path.join(base_dir, rgi_entity[:-6], rgi_entity[:-3],
2✔
2488
                                    rgi_entity)
2489
                # Avoid bad surprises
2490
                if os.path.exists(_dir):
2✔
2491
                    shutil.rmtree(_dir)
2✔
2492
                if from_tar is True:
2✔
2493
                    from_tar = _dir + '.tar.gz'
×
2494
                robust_tar_extract(from_tar, _dir, delete_tar=delete_tar)
2✔
2495
                from_tar = False  # to not re-unpack later below
2✔
2496
                _shp = os.path.join(_dir, 'outlines.shp')
2✔
2497
            else:
2498
                _shp = os.path.join(base_dir, rgi_entity[:-6], rgi_entity[:-3],
3✔
2499
                                    rgi_entity, 'outlines.shp')
2500
            rgi_entity = self._read_shapefile_from_path(_shp)
3✔
2501
            crs = salem.check_crs(rgi_entity.crs)
3✔
2502
            rgi_entity = rgi_entity.iloc[0]
3✔
2503
            g = rgi_entity['geometry']
3✔
2504
            xx, yy = salem.transform_proj(crs, salem.wgs84,
3✔
2505
                                          [g.bounds[0], g.bounds[2]],
2506
                                          [g.bounds[1], g.bounds[3]])
2507
            write_shp = False
3✔
2508
        else:
2509
            g = rgi_entity['geometry']
7✔
2510
            xx, yy = ([g.bounds[0], g.bounds[2]],
7✔
2511
                      [g.bounds[1], g.bounds[3]])
2512
            write_shp = True
7✔
2513

2514
        # Extent of the glacier in lon/lat
2515
        self.extent_ll = [xx, yy]
7✔
2516

2517
        try:
7✔
2518
            # RGI V4?
2519
            rgi_entity.RGIID
7✔
2520
            raise ValueError('RGI Version 4 is not supported anymore')
×
2521
        except AttributeError:
7✔
2522
            pass
7✔
2523

2524
        try:
7✔
2525
            self.rgi_id = rgi_entity.rgi_id
7✔
2526
            self.glims_id = rgi_entity.glims_id
2✔
2527
        except AttributeError:
7✔
2528
            # RGI V6
2529
            self.rgi_id = rgi_entity.RGIId
7✔
2530
            self.glims_id = rgi_entity.GLIMSId
7✔
2531

2532
        # Do we want to use the RGI center point or ours?
2533
        if cfg.PARAMS['use_rgi_area']:
7✔
2534
            try:
7✔
2535
                self.cenlon = float(rgi_entity.cenlon)
7✔
2536
                self.cenlat = float(rgi_entity.cenlat)
2✔
2537
            except AttributeError:
7✔
2538
                # RGI V6
2539
                self.cenlon = float(rgi_entity.CenLon)
7✔
2540
                self.cenlat = float(rgi_entity.CenLat)
7✔
2541
        else:
2542
            cenlon, cenlat = rgi_entity.geometry.representative_point().xy
1✔
2543
            self.cenlon = float(cenlon[0])
1✔
2544
            self.cenlat = float(cenlat[0])
1✔
2545

2546
        try:
7✔
2547
            self.rgi_region = rgi_entity.o1region
7✔
2548
            self.rgi_subregion = rgi_entity.o2region
2✔
2549
        except AttributeError:
7✔
2550
            # RGI V6
2551
            self.rgi_region = '{:02d}'.format(int(rgi_entity.O1Region))
7✔
2552
            self.rgi_subregion = (self.rgi_region + '-' +
7✔
2553
                                  '{:02d}'.format(int(rgi_entity.O2Region)))
2554

2555
        try:
7✔
2556
            name = str(rgi_entity.name)
7✔
2557
            rgi_datestr = rgi_entity.src_date
7✔
2558
        except AttributeError:
7✔
2559
            # RGI V6
2560
            name = rgi_entity.Name
7✔
2561
            rgi_datestr = rgi_entity.BgnDate
7✔
2562

2563

2564
        try:
7✔
2565
            gtype = rgi_entity.GlacType
7✔
2566
        except AttributeError:
7✔
2567
            try:
7✔
2568
                # RGI V6
2569
                gtype = [str(rgi_entity.Form), str(rgi_entity.TermType)]
7✔
2570
            except AttributeError:
2✔
2571
                # temporary default for RGI V7:
2572
                gtype = ['0', '0']
2✔
2573

2574
        try:
7✔
2575
            gstatus = rgi_entity.RGIFlag[0]
7✔
2576
        except AttributeError:
7✔
2577
            try:
7✔
2578
                # RGI V6
2579
                gstatus = rgi_entity.Status
7✔
2580
            except AttributeError:
2✔
2581
                # temporary default for RGI V7:
2582
                gstatus = '0'
2✔
2583

2584
        # rgi version can be useful
2585
        # RGI2000-v7.0-G-06-00029
2586
        # RGI60-07.00245
2587
        if self.rgi_id.count('-') == 4:
7✔
2588
            self.rgi_version = '70'
2✔
2589
        else:
2590
            rgi_version = self.rgi_id.split('-')[0][-2:]
7✔
2591
            if rgi_version not in ['50', '60', '61']:
7✔
2592
                raise RuntimeError('RGI Version not supported: '
×
2593
                                   '{}'.format(self.rgi_version))
2594
            else:
2595
                self.rgi_version = rgi_version
7✔
2596
        # remove spurious characters and trailing blanks
2597
        self.name = filter_rgi_name(name)
7✔
2598

2599
        # region
2600
        reg_names, subreg_names = parse_rgi_meta(version=self.rgi_version[0])
7✔
2601
        reg_name = reg_names.loc[int(self.rgi_region)]
7✔
2602
        # RGI V6
2603
        if not isinstance(reg_name, str):
7✔
2604
            reg_name = reg_name.values[0]
7✔
2605
        self.rgi_region_name = self.rgi_region + ': ' + reg_name
7✔
2606
        try:
7✔
2607
            subreg_name = subreg_names.loc[self.rgi_subregion]
7✔
2608
            # RGI V6
2609
            if not isinstance(subreg_name, str):
7✔
2610
                subreg_name = subreg_name.values[0]
7✔
2611
            self.rgi_subregion_name = self.rgi_subregion + ': ' + subreg_name
7✔
2612
        except KeyError:
×
2613
            self.rgi_subregion_name = self.rgi_subregion + ': NoName'
×
2614

2615
        # Read glacier attrs
2616
        gtkeys = {'0': 'Glacier',
7✔
2617
                  '1': 'Ice cap',
2618
                  '2': 'Perennial snowfield',
2619
                  '3': 'Seasonal snowfield',
2620
                  '9': 'Not assigned',
2621
                  }
2622
        ttkeys = {'0': 'Land-terminating',
7✔
2623
                  '1': 'Marine-terminating',
2624
                  '2': 'Lake-terminating',
2625
                  '3': 'Dry calving',
2626
                  '4': 'Regenerated',
2627
                  '5': 'Shelf-terminating',
2628
                  '9': 'Not assigned',
2629
                  }
2630
        stkeys = {'0': 'Glacier or ice cap',
7✔
2631
                  '1': 'Glacier complex',
2632
                  '2': 'Nominal glacier',
2633
                  '9': 'Not assigned',
2634
                  }
2635
        self.glacier_type = gtkeys[gtype[0]]
7✔
2636
        self.terminus_type = ttkeys[gtype[1]]
7✔
2637
        self.status = stkeys['{}'.format(gstatus)]
7✔
2638

2639
        # Decide what is a tidewater glacier
2640
        user = cfg.PARAMS['tidewater_type']
7✔
2641
        if user == 1:
7✔
2642
            sel = ['Marine-terminating']
×
2643
        elif user == 2:
7✔
2644
            sel = ['Marine-terminating', 'Shelf-terminating']
7✔
2645
        elif user == 3:
2✔
2646
            sel = ['Marine-terminating', 'Lake-terminating']
×
2647
        elif user == 4:
2✔
2648
            sel = ['Marine-terminating', 'Lake-terminating', 'Shelf-terminating']
2✔
2649
        else:
2650
            raise InvalidParamsError("PARAMS['tidewater_type'] not understood")
×
2651
        self.is_tidewater = self.terminus_type in sel
7✔
2652
        self.is_lake_terminating = self.terminus_type == 'Lake-terminating'
7✔
2653
        self.is_marine_terminating = self.terminus_type == 'Marine-terminating'
7✔
2654
        self.is_shelf_terminating = self.terminus_type == 'Shelf-terminating'
7✔
2655
        self.is_nominal = self.status == 'Nominal glacier'
7✔
2656
        self.inversion_calving_rate = 0.
7✔
2657
        self.is_icecap = self.glacier_type == 'Ice cap'
7✔
2658

2659
        # Hemisphere
2660
        if self.cenlat < 0 or self.rgi_region == '16':
7✔
2661
            self.hemisphere = 'sh'
×
2662
        else:
2663
            self.hemisphere = 'nh'
7✔
2664

2665
        # convert the date
2666
        rgi_date = int(rgi_datestr[0:4])
7✔
2667
        if rgi_date < 0:
7✔
2668
            rgi_date = RGI_DATE[self.rgi_region]
1✔
2669
        self.rgi_date = rgi_date
7✔
2670
        # Root directory
2671
        self.base_dir = os.path.normpath(base_dir)
7✔
2672
        self.dir = os.path.join(self.base_dir, self.rgi_id[:-6],
7✔
2673
                                self.rgi_id[:-3], self.rgi_id)
2674

2675
        # Do we have to extract the files first?
2676
        if (reset or from_tar) and os.path.exists(self.dir):
7✔
2677
            shutil.rmtree(self.dir)
4✔
2678

2679
        if from_tar:
7✔
2680
            if from_tar is True:
1✔
2681
                from_tar = self.dir + '.tar.gz'
1✔
2682
            robust_tar_extract(from_tar, self.dir, delete_tar=delete_tar)
1✔
2683
            write_shp = False
1✔
2684
        else:
2685
            mkdir(self.dir)
7✔
2686

2687
        if not os.path.isdir(self.dir):
7✔
2688
            raise RuntimeError('GlacierDirectory %s does not exist!' % self.dir)
×
2689

2690
        # logging file
2691
        self.logfile = os.path.join(self.dir, 'log.txt')
7✔
2692

2693
        if write_shp:
7✔
2694
            # Write shapefile
2695
            self._reproject_and_write_shapefile(rgi_entity)
7✔
2696

2697
        # Optimization
2698
        self._mbdf = None
7✔
2699
        self._mbprofdf = None
7✔
2700
        self._mbprofdf_cte_dh = None
7✔
2701

2702
    def __repr__(self):
2703

2704
        summary = ['<oggm.GlacierDirectory>']
2705
        summary += ['  RGI id: ' + self.rgi_id]
2706
        summary += ['  Region: ' + self.rgi_region_name]
2707
        summary += ['  Subregion: ' + self.rgi_subregion_name]
2708
        if self.name:
2709
            summary += ['  Name: ' + self.name]
2710
        summary += ['  Glacier type: ' + str(self.glacier_type)]
2711
        summary += ['  Terminus type: ' + str(self.terminus_type)]
2712
        summary += ['  Status: ' + str(self.status)]
2713
        summary += ['  Area: ' + str(self.rgi_area_km2) + ' km2']
2714
        summary += ['  Lon, Lat: (' + str(self.cenlon) + ', ' +
2715
                    str(self.cenlat) + ')']
2716
        if os.path.isfile(self.get_filepath('glacier_grid')):
2717
            summary += ['  Grid (nx, ny): (' + str(self.grid.nx) + ', ' +
2718
                        str(self.grid.ny) + ')']
2719
            summary += ['  Grid (dx, dy): (' + str(self.grid.dx) + ', ' +
2720
                        str(self.grid.dy) + ')']
2721
        return '\n'.join(summary) + '\n'
2722

2723
    def _reproject_and_write_shapefile(self, entity):
9✔
2724

2725
        # Make a local glacier map
2726
        if cfg.PARAMS['map_proj'] == 'utm':
7✔
2727
            from pyproj.aoi import AreaOfInterest
×
2728
            from pyproj.database import query_utm_crs_info
×
2729
            utm_crs_list = query_utm_crs_info(
×
2730
                datum_name="WGS 84",
2731
                area_of_interest=AreaOfInterest(
2732
                    west_lon_degree=self.cenlon,
2733
                    south_lat_degree=self.cenlat,
2734
                    east_lon_degree=self.cenlon,
2735
                    north_lat_degree=self.cenlat,
2736
                ),
2737
            )
2738
            proj4_str = utm_crs_list[0].code
×
2739
        elif cfg.PARAMS['map_proj'] == 'tmerc':
7✔
2740
            params = dict(name='tmerc', lat_0=0., lon_0=self.cenlon,
7✔
2741
                          k=0.9996, x_0=0, y_0=0, datum='WGS84')
2742
            proj4_str = ("+proj={name} +lat_0={lat_0} +lon_0={lon_0} +k={k} " 
7✔
2743
                         "+x_0={x_0} +y_0={y_0} +datum={datum}".format(**params))
2744
        else:
2745
            raise InvalidParamsError("cfg.PARAMS['map_proj'] must be one of "
×
2746
                                     "'tmerc', 'utm'.")
2747
        # Reproject
2748
        proj_in = pyproj.Proj("epsg:4326", preserve_units=True)
7✔
2749
        proj_out = pyproj.Proj(proj4_str, preserve_units=True)
7✔
2750

2751
        # transform geometry to map
2752
        project = partial(transform_proj, proj_in, proj_out)
7✔
2753
        geometry = shp_trafo(project, entity['geometry'])
7✔
2754
        geometry = multipolygon_to_polygon(geometry, gdir=self)
7✔
2755

2756
        # Save transformed geometry to disk
2757
        entity = entity.copy()
7✔
2758
        entity['geometry'] = geometry
7✔
2759

2760
        # Do we want to use the RGI area or ours?
2761
        if not cfg.PARAMS['use_rgi_area']:
7✔
2762
            # Update Area
2763
            area = geometry.area * 1e-6
1✔
2764
            entity['Area'] = area
1✔
2765

2766
        # Avoid fiona bug: https://github.com/Toblerity/Fiona/issues/365
2767
        for k, s in entity.items():
7✔
2768
            if type(s) in [np.int32, np.int64]:
7✔
2769
                entity[k] = int(s)
5✔
2770
        towrite = gpd.GeoDataFrame(entity).T
7✔
2771
        towrite.crs = proj4_str
7✔
2772

2773
        # Write shapefile
2774
        self.write_shapefile(towrite, 'outlines')
7✔
2775

2776
        # Also transform the intersects if necessary
2777
        gdf = cfg.PARAMS['intersects_gdf']
7✔
2778
        if len(gdf) > 0:
7✔
2779
            gdf = gdf.loc[((gdf.RGIId_1 == self.rgi_id) |
6✔
2780
                           (gdf.RGIId_2 == self.rgi_id))]
2781
            if len(gdf) > 0:
6✔
2782
                gdf = salem.transform_geopandas(gdf, to_crs=proj_out)
4✔
2783
                if hasattr(gdf.crs, 'srs'):
4✔
2784
                    # salem uses pyproj
2785
                    gdf.crs = gdf.crs.srs
4✔
2786
                self.write_shapefile(gdf, 'intersects')
4✔
2787
        else:
2788
            # Sanity check
2789
            if cfg.PARAMS['use_intersects']:
6✔
2790
                raise InvalidParamsError(
×
2791
                    'You seem to have forgotten to set the '
2792
                    'intersects file for this run. OGGM '
2793
                    'works better with such a file. If you '
2794
                    'know what your are doing, set '
2795
                    "cfg.PARAMS['use_intersects'] = False to "
2796
                    "suppress this error.")
2797

2798
    def grid_from_params(self):
9✔
2799
        """If the glacier_grid.json file is lost, reconstruct it."""
2800
        from oggm.core.gis import glacier_grid_params
1✔
2801
        utm_proj, nx, ny, ulx, uly, dx = glacier_grid_params(self)
1✔
2802
        x0y0 = (ulx+dx/2, uly-dx/2)  # To pixel center coordinates
1✔
2803
        return salem.Grid(proj=utm_proj, nxny=(nx, ny), dxdy=(dx, -dx),
1✔
2804
                          x0y0=x0y0)
2805

2806
    @lazy_property
9✔
2807
    def grid(self):
9✔
2808
        """A ``salem.Grid`` handling the georeferencing of the local grid"""
2809
        try:
7✔
2810
            return salem.Grid.from_json(self.get_filepath('glacier_grid'))
7✔
2811
        except FileNotFoundError:
×
2812
            raise InvalidWorkflowError('This glacier directory seems to '
×
2813
                                       'have lost its glacier_grid.json file.'
2814
                                       'Use .grid_from_params(), but make sure'
2815
                                       'that the PARAMS are the ones you '
2816
                                       'want.')
2817

2818
    @lazy_property
9✔
2819
    def rgi_area_km2(self):
9✔
2820
        """The glacier's RGI area (km2)."""
2821
        try:
7✔
2822
            _area = self.read_shapefile('outlines')['Area']
7✔
2823
        except OSError:
3✔
2824
            raise RuntimeError('No outlines available')
×
2825
        except KeyError:
3✔
2826
            # RGI V7
2827
            _area = self.read_shapefile('outlines')['area_km2']
2✔
2828
        return np.round(float(_area), decimals=3)
7✔
2829

2830
    @lazy_property
9✔
2831
    def intersects_ids(self):
9✔
2832
        """The glacier's intersects RGI ids."""
2833
        try:
1✔
2834
            gdf = self.read_shapefile('intersects')
1✔
2835
            ids = np.append(gdf['RGIId_1'], gdf['RGIId_2'])
1✔
2836
            ids = list(np.unique(np.sort(ids)))
1✔
2837
            ids.remove(self.rgi_id)
1✔
2838
            return ids
1✔
2839
        except OSError:
×
2840
            return []
×
2841

2842
    @lazy_property
9✔
2843
    def dem_daterange(self):
9✔
2844
        """Years in which most of the DEM data was acquired"""
2845
        source_txt = self.get_filepath('dem_source')
1✔
2846
        if os.path.isfile(source_txt):
1✔
2847
            with open(source_txt, 'r') as f:
1✔
2848
                for line in f.readlines():
1✔
2849
                    if 'Date range:' in line:
1✔
2850
                        return tuple(map(int, line.split(':')[1].split('-')))
1✔
2851
        # we did not find the information in the dem_source file
2852
        log.warning('No DEM date range specified in `dem_source.txt`')
1✔
2853
        return None
1✔
2854

2855
    @lazy_property
9✔
2856
    def dem_info(self):
9✔
2857
        """More detailed information on the acquisition of the DEM data"""
2858
        source_file = self.get_filepath('dem_source')
1✔
2859
        source_text = ''
1✔
2860
        if os.path.isfile(source_file):
1✔
2861
            with open(source_file, 'r') as f:
1✔
2862
                for line in f.readlines():
1✔
2863
                    source_text += line
1✔
2864
        else:
2865
            log.warning('No DEM source file found.')
×
2866
        return source_text
1✔
2867

2868
    @property
9✔
2869
    def rgi_area_m2(self):
9✔
2870
        """The glacier's RGI area (m2)."""
2871
        return self.rgi_area_km2 * 10**6
7✔
2872

2873
    def get_filepath(self, filename, delete=False, filesuffix='',
9✔
2874
                     _deprecation_check=True):
2875
        """Absolute path to a specific file.
2876

2877
        Parameters
2878
        ----------
2879
        filename : str
2880
            file name (must be listed in cfg.BASENAME)
2881
        delete : bool
2882
            delete the file if exists
2883
        filesuffix : str
2884
            append a suffix to the filename (useful for model runs). Note
2885
            that the BASENAME remains same.
2886

2887
        Returns
2888
        -------
2889
        The absolute path to the desired file
2890
        """
2891

2892
        if filename not in cfg.BASENAMES:
7✔
2893
            raise ValueError(filename + ' not in cfg.BASENAMES.')
×
2894

2895
        fname = cfg.BASENAMES[filename]
7✔
2896
        if filesuffix:
7✔
2897
            fname = fname.split('.')
5✔
2898
            assert len(fname) == 2
5✔
2899
            fname = fname[0] + filesuffix + '.' + fname[1]
5✔
2900

2901
        out = os.path.join(self.dir, fname)
7✔
2902
        if delete and os.path.isfile(out):
7✔
2903
            os.remove(out)
1✔
2904
        return out
7✔
2905

2906
    def has_file(self, filename, filesuffix='', _deprecation_check=True):
9✔
2907
        """Checks if a file exists.
2908

2909
        Parameters
2910
        ----------
2911
        filename : str
2912
            file name (must be listed in cfg.BASENAME)
2913
        filesuffix : str
2914
            append a suffix to the filename (useful for model runs). Note
2915
            that the BASENAME remains same.
2916
        """
2917
        fp = self.get_filepath(filename, filesuffix=filesuffix,
7✔
2918
                               _deprecation_check=_deprecation_check)
2919
        if '.shp' in fp and cfg.PARAMS['use_tar_shapefiles']:
7✔
2920
            fp = fp.replace('.shp', '.tar')
5✔
2921
            if cfg.PARAMS['use_compression']:
5✔
2922
                fp += '.gz'
5✔
2923
        return os.path.exists(fp)
7✔
2924

2925
    def add_to_diagnostics(self, key, value):
9✔
2926
        """Write a key, value pair to the gdir's runtime diagnostics.
2927

2928
        Parameters
2929
        ----------
2930
        key : str
2931
            dict entry key
2932
        value : str or number
2933
            dict entry value
2934
        """
2935

2936
        d = self.get_diagnostics()
7✔
2937
        d[key] = value
7✔
2938
        with open(self.get_filepath('diagnostics'), 'w') as f:
7✔
2939
            json.dump(d, f)
7✔
2940

2941
    def get_diagnostics(self):
9✔
2942
        """Read the gdir's runtime diagnostics.
2943

2944
        Returns
2945
        -------
2946
        the diagnostics dict
2947
        """
2948
        # If not there, create an empty one
2949
        if not self.has_file('diagnostics'):
7✔
2950
            with open(self.get_filepath('diagnostics'), 'w') as f:
5✔
2951
                json.dump(dict(), f)
5✔
2952

2953
        # Read and return
2954
        with open(self.get_filepath('diagnostics'), 'r') as f:
7✔
2955
            out = json.load(f)
7✔
2956
        return out
7✔
2957

2958
    def read_pickle(self, filename, use_compression=None, filesuffix=''):
9✔
2959
        """Reads a pickle located in the directory.
2960

2961
        Parameters
2962
        ----------
2963
        filename : str
2964
            file name (must be listed in cfg.BASENAME)
2965
        use_compression : bool
2966
            whether or not the file ws compressed. Default is to use
2967
            cfg.PARAMS['use_compression'] for this (recommended)
2968
        filesuffix : str
2969
            append a suffix to the filename (useful for experiments).
2970

2971
        Returns
2972
        -------
2973
        An object read from the pickle
2974
        """
2975

2976
        use_comp = (use_compression if use_compression is not None
7✔
2977
                    else cfg.PARAMS['use_compression'])
2978
        _open = gzip.open if use_comp else open
7✔
2979
        fp = self.get_filepath(filename, filesuffix=filesuffix)
7✔
2980
        with _open(fp, 'rb') as f:
7✔
2981
            out = pickle.load(f)
7✔
2982

2983
        # Some new attrs to add to old pre-processed directories
2984
        if filename == 'model_flowlines':
7✔
2985
            if getattr(out[0], 'map_trafo', None) is None:
7✔
2986
                try:
1✔
2987
                    # This may fail for very old gdirs
2988
                    grid = self.grid
1✔
2989
                except InvalidWorkflowError:
×
2990
                    return out
×
2991

2992
                # Add the trafo
2993
                trafo = partial(grid.ij_to_crs, crs=salem.wgs84)
1✔
2994
                for fl in out:
1✔
2995
                    fl.map_trafo = trafo
1✔
2996

2997
        return out
7✔
2998

2999
    def write_pickle(self, var, filename, use_compression=None, filesuffix=''):
9✔
3000
        """ Writes a variable to a pickle on disk.
3001

3002
        Parameters
3003
        ----------
3004
        var : object
3005
            the variable to write to disk
3006
        filename : str
3007
            file name (must be listed in cfg.BASENAME)
3008
        use_compression : bool
3009
            whether or not the file ws compressed. Default is to use
3010
            cfg.PARAMS['use_compression'] for this (recommended)
3011
        filesuffix : str
3012
            append a suffix to the filename (useful for experiments).
3013
        """
3014
        use_comp = (use_compression if use_compression is not None
7✔
3015
                    else cfg.PARAMS['use_compression'])
3016
        _open = gzip.open if use_comp else open
7✔
3017
        fp = self.get_filepath(filename, filesuffix=filesuffix)
7✔
3018
        with _open(fp, 'wb') as f:
7✔
3019
            pickle.dump(var, f, protocol=4)
7✔
3020

3021
    def read_json(self, filename, filesuffix='', allow_empty=False):
9✔
3022
        """Reads a JSON file located in the directory.
3023

3024
        Parameters
3025
        ----------
3026
        filename : str
3027
            file name (must be listed in cfg.BASENAME)
3028
        filesuffix : str
3029
            append a suffix to the filename (useful for experiments).
3030
        allow_empty : bool
3031
            if True, does not raise an error if the file is not there.
3032

3033
        Returns
3034
        -------
3035
        A dictionary read from the JSON file
3036
        """
3037

3038
        fp = self.get_filepath(filename, filesuffix=filesuffix)
7✔
3039
        if allow_empty:
7✔
3040
            try:
7✔
3041
                with open(fp, 'r') as f:
7✔
3042
                    out = json.load(f)
4✔
3043
            except FileNotFoundError:
5✔
3044
                out = {}
5✔
3045
        else:
3046
            with open(fp, 'r') as f:
7✔
3047
                out = json.load(f)
7✔
3048
        return out
7✔
3049

3050
    def write_json(self, var, filename, filesuffix=''):
9✔
3051
        """ Writes a variable to a pickle on disk.
3052

3053
        Parameters
3054
        ----------
3055
        var : object
3056
            the variable to write to JSON (must be a dictionary)
3057
        filename : str
3058
            file name (must be listed in cfg.BASENAME)
3059
        filesuffix : str
3060
            append a suffix to the filename (useful for experiments).
3061
        """
3062

3063
        def np_convert(o):
7✔
3064
            if isinstance(o, np.int64):
×
3065
                return int(o)
×
3066
            raise TypeError
×
3067

3068
        fp = self.get_filepath(filename, filesuffix=filesuffix)
7✔
3069
        with open(fp, 'w') as f:
7✔
3070
            json.dump(var, f, default=np_convert)
7✔
3071

3072
    def get_climate_info(self, input_filesuffix=''):
9✔
3073
        """Convenience function to read attributes of the historical climate.
3074

3075
        Parameters
3076
        ----------
3077
        input_filesuffix : str
3078
            input_filesuffix of the climate_historical that should be used.
3079
        """
3080
        out = {}
7✔
3081
        try:
7✔
3082
            f = self.get_filepath('climate_historical',
7✔
3083
                                  filesuffix=input_filesuffix)
3084
            with ncDataset(f) as nc:
7✔
3085
                out['baseline_climate_source'] = nc.climate_source
7✔
3086
                try:
7✔
3087
                    out['baseline_yr_0'] = nc.yr_0
7✔
3088
                except AttributeError:
1✔
3089
                    # needed for back-compatibility before v1.6
3090
                    out['baseline_yr_0'] = nc.hydro_yr_0
1✔
3091
                try:
7✔
3092
                    out['baseline_yr_1'] = nc.yr_1
7✔
3093
                except AttributeError:
1✔
3094
                    # needed for back-compatibility before v1.6
3095
                    out['baseline_yr_1'] = nc.hydro_yr_1
1✔
3096
                out['baseline_climate_ref_hgt'] = nc.ref_hgt
7✔
3097
                out['baseline_climate_ref_pix_lon'] = nc.ref_pix_lon
7✔
3098
                out['baseline_climate_ref_pix_lat'] = nc.ref_pix_lat
7✔
3099
        except FileNotFoundError:
2✔
3100
            pass
2✔
3101

3102
        return out
7✔
3103

3104
    def read_text(self, filename, filesuffix=''):
9✔
3105
        """Reads a text file located in the directory.
3106

3107
        Parameters
3108
        ----------
3109
        filename : str
3110
            file name (must be listed in cfg.BASENAME)
3111
        filesuffix : str
3112
            append a suffix to the filename (useful for experiments).
3113

3114
        Returns
3115
        -------
3116
        the text
3117
        """
3118

3119
        fp = self.get_filepath(filename, filesuffix=filesuffix)
×
3120
        with open(fp, 'r') as f:
×
3121
            out = f.read()
×
3122
        return out
×
3123

3124
    @classmethod
9✔
3125
    def _read_shapefile_from_path(cls, fp):
9✔
3126
        if '.shp' not in fp:
7✔
3127
            raise ValueError('File ending not that of a shapefile')
×
3128

3129
        if cfg.PARAMS['use_tar_shapefiles']:
7✔
3130
            fp = 'tar://' + fp.replace('.shp', '.tar')
7✔
3131
            if cfg.PARAMS['use_compression']:
7✔
3132
                fp += '.gz'
7✔
3133

3134
        shp = gpd.read_file(fp)
7✔
3135

3136
        # .properties file is created for compressed shapefiles. github: #904
3137
        _properties = fp.replace('tar://', '') + '.properties'
7✔
3138
        if os.path.isfile(_properties):
7✔
3139
            # remove it, to keep GDir slim
3140
            os.remove(_properties)
7✔
3141

3142
        return shp
7✔
3143

3144
    def read_shapefile(self, filename, filesuffix=''):
9✔
3145
        """Reads a shapefile located in the directory.
3146

3147
        Parameters
3148
        ----------
3149
        filename : str
3150
            file name (must be listed in cfg.BASENAME)
3151
        filesuffix : str
3152
            append a suffix to the filename (useful for experiments).
3153

3154
        Returns
3155
        -------
3156
        A geopandas.DataFrame
3157
        """
3158
        fp = self.get_filepath(filename, filesuffix=filesuffix)
7✔
3159
        return self._read_shapefile_from_path(fp)
7✔
3160

3161
    def write_shapefile(self, var, filename, filesuffix=''):
9✔
3162
        """ Writes a variable to a shapefile on disk.
3163

3164
        Parameters
3165
        ----------
3166
        var : object
3167
            the variable to write to shapefile (must be a geopandas.DataFrame)
3168
        filename : str
3169
            file name (must be listed in cfg.BASENAME)
3170
        filesuffix : str
3171
            append a suffix to the filename (useful for experiments).
3172
        """
3173
        fp = self.get_filepath(filename, filesuffix=filesuffix)
7✔
3174
        _write_shape_to_disk(var, fp, to_tar=cfg.PARAMS['use_tar_shapefiles'])
7✔
3175

3176
    def write_monthly_climate_file(self, time, prcp, temp,
9✔
3177
                                   ref_pix_hgt, ref_pix_lon, ref_pix_lat, *,
3178
                                   temp_std=None,
3179
                                   time_unit=None,
3180
                                   calendar=None,
3181
                                   source=None,
3182
                                   file_name='climate_historical',
3183
                                   filesuffix=''):
3184
        """Creates a netCDF4 file with climate data timeseries.
3185

3186
        Parameters
3187
        ----------
3188
        time : ndarray
3189
            the time array, in a format understood by netCDF4
3190
        prcp : ndarray
3191
            the precipitation array (unit: 'kg m-2 month-1')
3192
        temp : ndarray
3193
            the temperature array (unit: 'degC')
3194
        ref_pix_hgt : float
3195
            the elevation of the dataset's reference altitude
3196
            (for correction). In practice, it is the same altitude as the
3197
            baseline climate.
3198
        ref_pix_lon : float
3199
            the location of the gridded data's grid point
3200
        ref_pix_lat : float
3201
            the location of the gridded data's grid point
3202
        temp_std : ndarray, optional
3203
            the daily standard deviation of temperature (useful for PyGEM)
3204
        time_unit : str
3205
            the reference time unit for your time array. This should be chosen
3206
            depending on the length of your data. The default is to choose
3207
            it ourselves based on the starting year.
3208
        calendar : str
3209
            If you use an exotic calendar (e.g. 'noleap')
3210
        source : str
3211
            the climate data source (required)
3212
        file_name : str
3213
            How to name the file
3214
        filesuffix : str
3215
            Apply a suffix to the file
3216
        """
3217

3218
        # overwrite as default
3219
        fpath = self.get_filepath(file_name, filesuffix=filesuffix)
7✔
3220
        if os.path.exists(fpath):
7✔
3221
            os.remove(fpath)
4✔
3222

3223
        if source is None:
7✔
3224
            raise InvalidParamsError('`source` kwarg is required')
×
3225

3226
        zlib = cfg.PARAMS['compress_climate_netcdf']
7✔
3227

3228
        try:
7✔
3229
            y0 = time[0].year
7✔
3230
            y1 = time[-1].year
7✔
3231
        except AttributeError:
2✔
3232
            time = pd.DatetimeIndex(time)
2✔
3233
            y0 = time[0].year
2✔
3234
            y1 = time[-1].year
2✔
3235

3236
        if time_unit is None:
7✔
3237
            # http://pandas.pydata.org/pandas-docs/stable/timeseries.html
3238
            # #timestamp-limitations
3239
            if y0 > 1800:
7✔
3240
                time_unit = 'days since 1801-01-01 00:00:00'
7✔
3241
            elif y0 >= 0:
1✔
3242
                time_unit = ('days since {:04d}-01-01 '
1✔
3243
                             '00:00:00'.format(time[0].year))
3244
            else:
3245
                raise InvalidParamsError('Time format not supported')
×
3246

3247
        with ncDataset(fpath, 'w', format='NETCDF4') as nc:
7✔
3248
            nc.ref_hgt = ref_pix_hgt
7✔
3249
            nc.ref_pix_lon = ref_pix_lon
7✔
3250
            nc.ref_pix_lat = ref_pix_lat
7✔
3251
            nc.ref_pix_dis = haversine(self.cenlon, self.cenlat,
7✔
3252
                                       ref_pix_lon, ref_pix_lat)
3253
            nc.climate_source = source
7✔
3254

3255
            nc.yr_0 = y0
7✔
3256
            nc.yr_1 = y1
7✔
3257

3258
            nc.createDimension('time', None)
7✔
3259

3260
            nc.author = 'OGGM'
7✔
3261
            nc.author_info = 'Open Global Glacier Model'
7✔
3262

3263
            timev = nc.createVariable('time', 'i4', ('time',))
7✔
3264

3265
            tatts = {'units': time_unit}
7✔
3266
            if calendar is None:
7✔
3267
                calendar = 'standard'
7✔
3268

3269
            tatts['calendar'] = calendar
7✔
3270
            try:
7✔
3271
                numdate = netCDF4.date2num([t for t in time], time_unit,
7✔
3272
                                           calendar=calendar)
3273
            except TypeError:
×
3274
                # numpy's broken datetime only works for us precision
3275
                time = time.astype('M8[us]').astype(datetime.datetime)
×
3276
                numdate = netCDF4.date2num(time, time_unit, calendar=calendar)
×
3277

3278
            timev.setncatts(tatts)
7✔
3279
            timev[:] = numdate
7✔
3280

3281
            v = nc.createVariable('prcp', 'f4', ('time',), zlib=zlib)
7✔
3282
            v.units = 'kg m-2'
7✔
3283
            v.long_name = 'total monthly precipitation amount'
7✔
3284

3285
            v[:] = prcp
7✔
3286

3287
            v = nc.createVariable('temp', 'f4', ('time',), zlib=zlib)
7✔
3288
            v.units = 'degC'
7✔
3289
            v.long_name = '2m temperature at height ref_hgt'
7✔
3290
            v[:] = temp
7✔
3291

3292
            if temp_std is not None:
7✔
3293
                v = nc.createVariable('temp_std', 'f4', ('time',), zlib=zlib)
2✔
3294
                v.units = 'degC'
2✔
3295
                v.long_name = 'standard deviation of daily temperatures'
2✔
3296
                v[:] = temp_std
2✔
3297

3298
    def get_inversion_flowline_hw(self):
9✔
3299
        """ Shortcut function to read the heights and widths of the glacier.
3300

3301
        Parameters
3302
        ----------
3303

3304
        Returns
3305
        -------
3306
        (height, widths) in units of m
3307
        """
3308

3309
        h = np.array([])
4✔
3310
        w = np.array([])
4✔
3311
        fls = self.read_pickle('inversion_flowlines')
4✔
3312
        for fl in fls:
4✔
3313
            w = np.append(w, fl.widths)
4✔
3314
            h = np.append(h, fl.surface_h)
4✔
3315
        return h, w * self.grid.dx
4✔
3316

3317
    def set_ref_mb_data(self, mb_df=None):
9✔
3318
        """Adds reference mass balance data to this glacier.
3319

3320
        The format should be a dataframe with the years as index and
3321
        'ANNUAL_BALANCE' as values in mm yr-1.
3322
        """
3323

3324
        if self.is_tidewater:
4✔
3325
            log.warning('You are trying to set MB data on a tidewater glacier!'
×
3326
                        ' These data will be ignored by the MB model '
3327
                        'calibration routine.')
3328

3329
        if mb_df is None:
4✔
3330

3331
            flink, mbdatadir = get_wgms_files()
4✔
3332
            c = 'RGI{}0_ID'.format(self.rgi_version[0])
4✔
3333
            wid = flink.loc[flink[c] == self.rgi_id]
4✔
3334
            if len(wid) == 0:
4✔
3335
                raise RuntimeError('Not a reference glacier!')
1✔
3336
            wid = wid.WGMS_ID.values[0]
4✔
3337

3338
            # file
3339
            reff = os.path.join(mbdatadir,
4✔
3340
                                'mbdata_WGMS-{:05d}.csv'.format(wid))
3341
            # list of years
3342
            mb_df = pd.read_csv(reff).set_index('YEAR')
4✔
3343

3344
        # Quality checks
3345
        if 'ANNUAL_BALANCE' not in mb_df:
4✔
3346
            raise InvalidParamsError('Need an "ANNUAL_BALANCE" column in the '
×
3347
                                     'dataframe.')
3348
        if not mb_df.index.is_integer():
4✔
3349
            raise InvalidParamsError('The index needs to be integer years')
×
3350

3351
        mb_df.index.name = 'YEAR'
4✔
3352
        self._mbdf = mb_df
4✔
3353

3354
    def get_ref_mb_data(self, y0=None, y1=None, input_filesuffix=''):
9✔
3355
        """Get the reference mb data from WGMS (for some glaciers only!).
3356

3357
        Raises an Error if it isn't a reference glacier at all.
3358

3359
        Parameters
3360
        ----------
3361
        y0 : int
3362
            override the default behavior which is to check the available
3363
            climate data (or PARAMS['ref_mb_valid_window']) and decide
3364
        y1 : int
3365
            override the default behavior which is to check the available
3366
            climate data (or PARAMS['ref_mb_valid_window']) and decide
3367
        input_filesuffix : str
3368
            input_filesuffix of the climate_historical that should be used
3369
            if y0 and y1 are not given. The default is to take the
3370
            climate_historical without input_filesuffix
3371
        """
3372

3373
        if self._mbdf is None:
4✔
3374
            self.set_ref_mb_data()
4✔
3375

3376
        # logic for period
3377
        t0, t1 = cfg.PARAMS['ref_mb_valid_window']
4✔
3378
        if t0 > 0 and y0 is None:
4✔
3379
            y0 = t0
×
3380
        if t1 > 0 and y1 is None:
4✔
3381
            y1 = t1
×
3382

3383
        if y0 is None or y1 is None:
4✔
3384
            ci = self.get_climate_info(input_filesuffix=input_filesuffix)
4✔
3385
            if 'baseline_yr_0' not in ci:
4✔
3386
                raise InvalidWorkflowError('Please process some climate data '
1✔
3387
                                           'before call')
3388
            y0 = ci['baseline_yr_0'] if y0 is None else y0
4✔
3389
            y1 = ci['baseline_yr_1'] if y1 is None else y1
4✔
3390

3391
        if len(self._mbdf) > 1:
4✔
3392
            out = self._mbdf.loc[y0:y1]
4✔
3393
        else:
3394
            # Some files are just empty
3395
            out = self._mbdf
×
3396
        return out.dropna(subset=['ANNUAL_BALANCE'])
4✔
3397

3398
    def get_ref_mb_profile(self, input_filesuffix='', constant_dh=False, obs_ratio_needed=0):
9✔
3399
        """Get the reference mb profile data from WGMS (if available!).
3400

3401
        Returns None if this glacier has no profile and an Error if it isn't
3402
        a reference glacier at all.
3403

3404
        Parameters
3405
        ----------
3406
        input_filesuffix : str
3407
            input_filesuffix of the climate_historical that should be used. The
3408
            default is to take the climate_historical without input_filesuffix
3409
        constant_dh : boolean
3410
            If set to True, it outputs the MB profiles with a constant step size
3411
            of dh=50m by using interpolation. This can be useful for comparisons
3412
            between years. Default is False which gives the raw
3413
            elevation-dependent point MB
3414
        obs_ratio_needed : float
3415
            necessary relative amount of observations per elevation band in order
3416
            to be included in the MB profile (0<=obs_ratio_needed<=1).
3417
            If obs_ratio_needed set to 0, the output shows all elevation-band
3418
            observations (default is 0).
3419
            When estimating mean MB profiles, it is advisable to set obs_ratio_needed
3420
            to 0.6. E.g. if there are in total 5 years of measurements only those elevation
3421
            bands with at least 3 years of measurements are used. If obs_ratio_needed is not
3422
            0, constant_dh has to be set to True.
3423
        """
3424

3425
        if obs_ratio_needed != 0 and constant_dh is False:
1✔
3426
            raise InvalidParamsError('If a filter is applied, you have to set'
×
3427
                                     ' constant_dh to True')
3428
        if obs_ratio_needed < 0 or obs_ratio_needed > 1:
1✔
3429
            raise InvalidParamsError('obs_ratio_needed is the ratio of necessary relative amount'
×
3430
                                     'of observations per elevation band. It has to be between'
3431
                                     '0 and 1!')
3432

3433
        if self._mbprofdf is None and not constant_dh:
1✔
3434
            flink, mbdatadir = get_wgms_files()
1✔
3435
            c = 'RGI{}0_ID'.format(self.rgi_version[0])
1✔
3436
            wid = flink.loc[flink[c] == self.rgi_id]
1✔
3437
            if len(wid) == 0:
1✔
3438
                raise RuntimeError('Not a reference glacier!')
×
3439
            wid = wid.WGMS_ID.values[0]
1✔
3440

3441
            # file
3442
            mbdatadir = os.path.join(os.path.dirname(mbdatadir), 'mb_profiles')
1✔
3443
            reff = os.path.join(mbdatadir,
1✔
3444
                                'profile_WGMS-{:05d}.csv'.format(wid))
3445
            if not os.path.exists(reff):
1✔
3446
                return None
×
3447
            # list of years
3448
            self._mbprofdf = pd.read_csv(reff, index_col=0)
1✔
3449

3450
        if self._mbprofdf_cte_dh is None and constant_dh:
1✔
3451
            flink, mbdatadir = get_wgms_files()
1✔
3452
            c = 'RGI{}0_ID'.format(self.rgi_version[0])
1✔
3453
            wid = flink.loc[flink[c] == self.rgi_id]
1✔
3454
            if len(wid) == 0:
1✔
3455
                raise RuntimeError('Not a reference glacier!')
×
3456
            wid = wid.WGMS_ID.values[0]
1✔
3457

3458
            # file
3459
            mbdatadir = os.path.join(os.path.dirname(mbdatadir), 'mb_profiles_constant_dh')
1✔
3460
            reff = os.path.join(mbdatadir,
1✔
3461
                                'profile_constant_dh_WGMS-{:05d}.csv'.format(wid))
3462
            if not os.path.exists(reff):
1✔
3463
                return None
×
3464
            # list of years
3465
            self._mbprofdf_cte_dh = pd.read_csv(reff, index_col=0)
1✔
3466

3467
        ci = self.get_climate_info(input_filesuffix=input_filesuffix)
1✔
3468
        if 'baseline_yr_0' not in ci:
1✔
3469
            raise RuntimeError('Please process some climate data before call')
×
3470
        y0 = ci['baseline_yr_0']
1✔
3471
        y1 = ci['baseline_yr_1']
1✔
3472
        if not constant_dh:
1✔
3473
            if len(self._mbprofdf) > 1:
1✔
3474
                out = self._mbprofdf.loc[y0:y1]
1✔
3475
            else:
3476
                # Some files are just empty
3477
                out = self._mbprofdf
×
3478
        else:
3479
            if len(self._mbprofdf_cte_dh) > 1:
1✔
3480
                out = self._mbprofdf_cte_dh.loc[y0:y1]
1✔
3481
                if obs_ratio_needed != 0:
1✔
3482
                    # amount of years with any observation
3483
                    n_obs = len(out.index)
1✔
3484
                    # amount of years with observations for each elevation band
3485
                    n_obs_h = out.describe().loc['count']
1✔
3486
                    # relative amount of observations per elevation band
3487
                    rel_obs_h = n_obs_h / n_obs
1✔
3488
                    # select only those elevation bands with a specific ratio
3489
                    # of years with available measurements
3490
                    out = out[rel_obs_h[rel_obs_h >= obs_ratio_needed].index]
1✔
3491

3492
            else:
3493
                # Some files are just empty
3494
                out = self._mbprofdf_cte_dh
×
3495
        out.columns = [float(c) for c in out.columns]
1✔
3496
        return out.dropna(axis=1, how='all').dropna(axis=0, how='all')
1✔
3497

3498

3499
    def get_ref_length_data(self):
9✔
3500
        """Get the glacier length data from P. Leclercq's data base.
3501

3502
         https://folk.uio.no/paulwl/data.php
3503

3504
         For some glaciers only!
3505
         """
3506

3507
        df = pd.read_csv(get_demo_file('rgi_leclercq_links_2012_RGIV5.csv'))
1✔
3508
        df = df.loc[df.RGI_ID == self.rgi_id]
1✔
3509
        if len(df) == 0:
1✔
3510
            raise RuntimeError('No length data found for this glacier!')
×
3511
        ide = df.LID.values[0]
1✔
3512

3513
        f = get_demo_file('Glacier_Lengths_Leclercq.nc')
1✔
3514
        with xr.open_dataset(f) as dsg:
1✔
3515
            # The database is not sorted by ID. Don't ask me...
3516
            grp_id = np.argwhere(dsg['index'].values == ide)[0][0] + 1
1✔
3517
        with xr.open_dataset(f, group=str(grp_id)) as ds:
1✔
3518
            df = ds.to_dataframe()
1✔
3519
            df.name = ds.glacier_name
1✔
3520
        return df
1✔
3521

3522
    def log(self, task_name, *, err=None, task_time=None):
9✔
3523
        """Logs a message to the glacier directory.
3524

3525
        It is usually called by the :py:class:`entity_task` decorator, normally
3526
        you shouldn't take care about that.
3527

3528
        Parameters
3529
        ----------
3530
        func : a function
3531
            the function which wants to log
3532
        err : Exception
3533
            the exception which has been raised by func (if no exception was
3534
            raised, a success is logged)
3535
        time : float
3536
            the time (in seconds) that the task needed to run
3537
        """
3538

3539
        # a line per function call
3540
        nowsrt = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
7✔
3541
        line = nowsrt + ';' + task_name + ';'
7✔
3542

3543
        if task_time is not None:
7✔
3544
            line += 'time:{};'.format(task_time)
7✔
3545

3546
        if err is None:
7✔
3547
            line += 'SUCCESS'
7✔
3548
        else:
3549
            line += err.__class__.__name__ + ': {}'.format(err)\
6✔
3550

3551
        line = line.replace('\n', ' ')
7✔
3552

3553
        count = 0
7✔
3554
        while count < 5:
7✔
3555
            try:
7✔
3556
                with open(self.logfile, 'a') as logfile:
7✔
3557
                    logfile.write(line + '\n')
7✔
3558
                break
7✔
3559
            except FileNotFoundError:
×
3560
                # I really don't know when this error happens
3561
                # In this case sleep and try again
3562
                time.sleep(0.05)
×
3563
                count += 1
×
3564

3565
        if count == 5:
7✔
3566
            log.warning('Could not write to logfile: ' + line)
×
3567

3568
    def get_task_status(self, task_name):
9✔
3569
        """Opens this directory's log file to check for a task's outcome.
3570

3571
        Parameters
3572
        ----------
3573
        task_name : str
3574
            the name of the task which has to be tested for
3575

3576
        Returns
3577
        -------
3578
        The last message for this task (SUCCESS if was successful),
3579
        None if the task was not run yet
3580
        """
3581

3582
        if not os.path.isfile(self.logfile):
7✔
3583
            return None
7✔
3584

3585
        with open(self.logfile) as logfile:
7✔
3586
            lines = logfile.readlines()
7✔
3587

3588
        lines = [l.replace('\n', '') for l in lines
7✔
3589
                 if ';' in l and (task_name == l.split(';')[1])]
3590
        if lines:
7✔
3591
            # keep only the last log
3592
            return lines[-1].split(';')[-1]
7✔
3593
        else:
3594
            return None
7✔
3595

3596
    def get_task_time(self, task_name):
9✔
3597
        """Opens this directory's log file to check for a task's run time.
3598

3599
        Parameters
3600
        ----------
3601
        task_name : str
3602
            the name of the task which has to be tested for
3603

3604
        Returns
3605
        -------
3606
        The timing that the last call of this task needed.
3607
        None if the task was not run yet, or if it errored
3608
        """
3609

3610
        if not os.path.isfile(self.logfile):
1✔
3611
            return None
×
3612

3613
        with open(self.logfile) as logfile:
1✔
3614
            lines = logfile.readlines()
1✔
3615

3616
        lines = [l.replace('\n', '') for l in lines
1✔
3617
                 if task_name == l.split(';')[1]]
3618
        if lines:
1✔
3619
            line = lines[-1]
1✔
3620
            # Last log is message
3621
            if 'ERROR' in line.split(';')[-1] or 'time:' not in line:
1✔
3622
                return None
1✔
3623
            # Get the time
3624
            return float(line.split('time:')[-1].split(';')[0])
1✔
3625
        else:
3626
            return None
1✔
3627

3628
    def get_error_log(self):
9✔
3629
        """Reads the directory's log file to find the invalid task (if any).
3630

3631
        Returns
3632
        -------
3633
        The first error message in this log, None if all good
3634
        """
3635

3636
        if not os.path.isfile(self.logfile):
5✔
3637
            return None
1✔
3638

3639
        with open(self.logfile) as logfile:
5✔
3640
            lines = logfile.readlines()
5✔
3641

3642
        for l in lines:
5✔
3643
            if 'SUCCESS' in l:
5✔
3644
                continue
5✔
3645
            return l.replace('\n', '')
2✔
3646

3647
        # OK all good
3648
        return None
5✔
3649

3650

3651
@entity_task(log)
9✔
3652
def copy_to_basedir(gdir, base_dir=None, setup='run'):
9✔
3653
    """Copies the glacier directories and their content to a new location.
3654

3655
    This utility function allows to select certain files only, thus
3656
    saving time at copy.
3657

3658
    Parameters
3659
    ----------
3660
    gdir : :py:class:`oggm.GlacierDirectory`
3661
        the glacier directory to copy
3662
    base_dir : str
3663
        path to the new base directory (should end with "per_glacier"
3664
        most of the time)
3665
    setup : str
3666
        set up you want the copied directory to be useful for. Currently
3667
        supported are 'all' (copy the entire directory), 'inversion'
3668
        (copy the necessary files for the inversion AND the run)
3669
        , 'run' (copy the necessary files for a dynamical run) or 'run/spinup'
3670
        (copy the necessary files and all already conducted model runs, e.g.
3671
        from a dynamic spinup).
3672

3673
    Returns
3674
    -------
3675
    New glacier directories from the copied folders
3676
    """
3677
    base_dir = os.path.abspath(base_dir)
2✔
3678
    new_dir = os.path.join(base_dir, gdir.rgi_id[:8], gdir.rgi_id[:11],
2✔
3679
                           gdir.rgi_id)
3680
    if setup == 'run':
2✔
3681
        paths = ['model_flowlines', 'inversion_params', 'outlines',
1✔
3682
                 'mb_calib', 'climate_historical', 'glacier_grid',
3683
                 'gcm_data', 'diagnostics', 'log']
3684
        paths = ('*' + p + '*' for p in paths)
1✔
3685
        shutil.copytree(gdir.dir, new_dir,
1✔
3686
                        ignore=include_patterns(*paths))
3687
    elif setup == 'inversion':
2✔
3688
        paths = ['inversion_params', 'downstream_line', 'outlines',
1✔
3689
                 'inversion_flowlines', 'glacier_grid', 'diagnostics',
3690
                 'mb_calib', 'climate_historical', 'gridded_data',
3691
                 'gcm_data', 'log']
3692
        paths = ('*' + p + '*' for p in paths)
1✔
3693
        shutil.copytree(gdir.dir, new_dir,
1✔
3694
                        ignore=include_patterns(*paths))
3695
    elif setup == 'run/spinup':
2✔
3696
        paths = ['model_flowlines', 'inversion_params', 'outlines',
1✔
3697
                 'mb_calib', 'climate_historical', 'glacier_grid',
3698
                 'gcm_data', 'diagnostics', 'log', 'model_run',
3699
                 'model_diagnostics', 'model_geometry']
3700
        paths = ('*' + p + '*' for p in paths)
1✔
3701
        shutil.copytree(gdir.dir, new_dir,
1✔
3702
                        ignore=include_patterns(*paths))
3703
    elif setup == 'all':
1✔
3704
        shutil.copytree(gdir.dir, new_dir)
1✔
3705
    else:
3706
        raise ValueError('setup not understood: {}'.format(setup))
×
3707
    return GlacierDirectory(gdir.rgi_id, base_dir=base_dir)
2✔
3708

3709

3710
def initialize_merged_gdir(main, tribs=[], glcdf=None,
9✔
3711
                           filename='climate_historical',
3712
                           input_filesuffix='',
3713
                           dem_source=None):
3714
    """Creates a new GlacierDirectory if tributaries are merged to a glacier
3715

3716
    This function should be called after centerlines.intersect_downstream_lines
3717
    and before flowline.merge_tributary_flowlines.
3718
    It will create a new GlacierDirectory, with a suitable DEM and reproject
3719
    the flowlines of the main glacier.
3720

3721
    Parameters
3722
    ----------
3723
    main : oggm.GlacierDirectory
3724
        the main glacier
3725
    tribs : list or dictionary containing oggm.GlacierDirectories
3726
        true tributary glaciers to the main glacier
3727
    glcdf: geopandas.GeoDataFrame
3728
        which contains the main glacier, will be downloaded if None
3729
    filename: str
3730
        Baseline climate file
3731
    input_filesuffix: str
3732
        Filesuffix to the climate file
3733
    dem_source: str
3734
        the DEM source to use
3735
    Returns
3736
    -------
3737
    merged : oggm.GlacierDirectory
3738
        the new GDir
3739
    """
3740
    from oggm.core.gis import define_glacier_region, merged_glacier_masks
×
3741

3742
    # If its a dict, select the relevant ones
3743
    if isinstance(tribs, dict):
×
3744
        tribs = tribs[main.rgi_id]
×
3745
    # make sure tributaries are iterable
3746
    tribs = tolist(tribs)
×
3747

3748
    # read flowlines of the Main glacier
3749
    mfls = main.read_pickle('model_flowlines')
×
3750

3751
    # ------------------------------
3752
    # 0. create the new GlacierDirectory from main glaciers GeoDataFrame
3753
    # Should be passed along, if not download it
3754
    if glcdf is None:
×
3755
        glcdf = get_rgi_glacier_entities([main.rgi_id])
×
3756
    # Get index location of the specific glacier
3757
    idx = glcdf.loc[glcdf.RGIId == main.rgi_id].index
×
3758
    maindf = glcdf.loc[idx].copy()
×
3759

3760
    # add tributary geometries to maindf
3761
    merged_geometry = maindf.loc[idx, 'geometry'].iloc[0].buffer(0)
×
3762
    for trib in tribs:
×
3763
        geom = trib.read_pickle('geometries')['polygon_hr']
×
3764
        geom = salem.transform_geometry(geom, crs=trib.grid)
×
3765
        merged_geometry = merged_geometry.union(geom).buffer(0)
×
3766

3767
    # to get the center point, maximal extensions for DEM and single Polygon:
3768
    new_geometry = merged_geometry.convex_hull
×
3769
    maindf.loc[idx, 'geometry'] = new_geometry
×
3770

3771
    # make some adjustments to the rgi dataframe
3772
    # 1. calculate central point of new glacier
3773
    #    reproject twice to avoid Warning, first to flat projection
3774
    flat_centroid = salem.transform_geometry(new_geometry,
×
3775
                                             to_crs=main.grid).centroid
3776
    #    second reprojection of centroid to wgms
3777
    new_centroid = salem.transform_geometry(flat_centroid, crs=main.grid)
×
3778
    maindf.loc[idx, 'CenLon'] = new_centroid.x
×
3779
    maindf.loc[idx, 'CenLat'] = new_centroid.y
×
3780
    # 2. update names
3781
    maindf.loc[idx, 'RGIId'] += '_merged'
×
3782
    if maindf.loc[idx, 'Name'].iloc[0] is None:
×
3783
        maindf.loc[idx, 'Name'] = main.name + ' (merged)'
×
3784
    else:
3785
        maindf.loc[idx, 'Name'] += ' (merged)'
×
3786

3787
    # finally create new Glacier Directory
3788
    # 1. set dx spacing to the one used for the flowlines
3789
    dx_method = cfg.PARAMS['grid_dx_method']
×
3790
    dx_spacing = cfg.PARAMS['fixed_dx']
×
3791
    cfg.PARAMS['grid_dx_method'] = 'fixed'
×
3792
    cfg.PARAMS['fixed_dx'] = mfls[-1].map_dx
×
3793
    merged = GlacierDirectory(maindf.loc[idx].iloc[0])
×
3794

3795
    # run define_glacier_region to get a fitting DEM and proper grid
3796
    define_glacier_region(merged, entity=maindf.loc[idx].iloc[0],
×
3797
                          source=dem_source)
3798

3799
    # write gridded data and geometries for visualization
3800
    merged_glacier_masks(merged, merged_geometry)
×
3801

3802
    # reset dx method
3803
    cfg.PARAMS['grid_dx_method'] = dx_method
×
3804
    cfg.PARAMS['fixed_dx'] = dx_spacing
×
3805

3806
    # copy main climate file, climate info and calib to new gdir
3807
    climfilename = filename + '_' + main.rgi_id + input_filesuffix + '.nc'
×
3808
    climfile = os.path.join(merged.dir, climfilename)
×
3809
    shutil.copyfile(main.get_filepath(filename, filesuffix=input_filesuffix),
×
3810
                    climfile)
3811
    _mufile = os.path.basename(merged.get_filepath('mb_calib')).split('.')
×
3812
    mufile = _mufile[0] + '_' + main.rgi_id + '.' + _mufile[1]
×
3813
    shutil.copyfile(main.get_filepath('mb_calib'),
×
3814
                    os.path.join(merged.dir, mufile))
3815

3816
    # reproject the flowlines to the new grid
3817
    for nr, fl in reversed(list(enumerate(mfls))):
×
3818

3819
        # 1. Step: Change projection to the main glaciers grid
3820
        _line = salem.transform_geometry(fl.line,
×
3821
                                         crs=main.grid, to_crs=merged.grid)
3822
        # 2. set new line
3823
        fl.set_line(_line)
×
3824

3825
        # 3. set flow to attributes
3826
        if fl.flows_to is not None:
×
3827
            fl.set_flows_to(fl.flows_to)
×
3828
        # remove inflow points, will be set by other flowlines if need be
3829
        fl.inflow_points = []
×
3830

3831
        # 5. set grid size attributes
3832
        dx = [shpg.Point(fl.line.coords[i]).distance(
×
3833
            shpg.Point(fl.line.coords[i+1]))
3834
            for i, pt in enumerate(fl.line.coords[:-1])]  # get distance
3835
        # and check if equally spaced
3836
        if not np.allclose(dx, np.mean(dx), atol=1e-2):
×
3837
            raise RuntimeError('Flowline is not evenly spaced.')
×
3838
        # dx might very slightly change, but should not
3839
        fl.dx = np.mean(dx).round(2)
×
3840
        # map_dx should stay exactly the same
3841
        # fl.map_dx = mfls[-1].map_dx
3842
        fl.dx_meter = fl.map_dx * fl.dx
×
3843

3844
        # replace flowline within the list
3845
        mfls[nr] = fl
×
3846

3847
    # Write the reprojecflowlines
3848
    merged.write_pickle(mfls, 'model_flowlines')
×
3849

3850
    return merged
×
3851

3852

3853
@entity_task(log)
9✔
3854
def gdir_to_tar(gdir, base_dir=None, delete=True):
9✔
3855
    """Writes the content of a glacier directory to a tar file.
3856

3857
    The tar file is located at the same location of the original directory.
3858
    The glacier directory objects are useless if deleted!
3859

3860
    Parameters
3861
    ----------
3862
    base_dir : str
3863
        path to the basedir where to write the directory (defaults to the
3864
        same location of the original directory)
3865
    delete : bool
3866
        delete the original directory afterwards (default)
3867

3868
    Returns
3869
    -------
3870
    the path to the tar file
3871
    """
3872

3873
    source_dir = os.path.normpath(gdir.dir)
1✔
3874
    opath = source_dir + '.tar.gz'
1✔
3875
    if base_dir is not None:
1✔
3876
        opath = os.path.join(base_dir, os.path.relpath(opath, gdir.base_dir))
1✔
3877
        mkdir(os.path.dirname(opath))
1✔
3878

3879
    with tarfile.open(opath, "w:gz") as tar:
1✔
3880
        tar.add(source_dir, arcname=os.path.basename(source_dir))
1✔
3881

3882
    if delete:
1✔
3883
        shutil.rmtree(source_dir)
1✔
3884

3885
    return opath
1✔
3886

3887

3888
def base_dir_to_tar(base_dir=None, delete=True):
9✔
3889
    """Merge the directories into 1000 bundles as tar files.
3890

3891
    The tar file is located at the same location of the original directory.
3892

3893
    Parameters
3894
    ----------
3895
    base_dir : str
3896
        path to the basedir to parse (defaults to the working directory)
3897
    to_base_dir : str
3898
        path to the basedir where to write the directory (defaults to the
3899
        same location of the original directory)
3900
    delete : bool
3901
        delete the original directory tars afterwards (default)
3902
    """
3903

3904
    if base_dir is None:
1✔
3905
        if not cfg.PATHS.get('working_dir', None):
1✔
3906
            raise ValueError("Need a valid PATHS['working_dir']!")
×
3907
        base_dir = os.path.join(cfg.PATHS['working_dir'], 'per_glacier')
1✔
3908

3909
    to_delete = []
1✔
3910
    for dirname, subdirlist, filelist in os.walk(base_dir):
1✔
3911
        # RGI60-01.00
3912
        bname = os.path.basename(dirname)
1✔
3913
        if not (len(bname) == 11 and bname[-3] == '.'):
1✔
3914
            continue
1✔
3915
        opath = dirname + '.tar'
1✔
3916
        with tarfile.open(opath, 'w') as tar:
1✔
3917
            tar.add(dirname, arcname=os.path.basename(dirname))
1✔
3918
        if delete:
1✔
3919
            to_delete.append(dirname)
1✔
3920

3921
    for dirname in to_delete:
1✔
3922
        shutil.rmtree(dirname)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc