• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OGGM / oggm / 17036427554

18 Aug 2025 09:19AM UTC coverage: 84.387% (-0.03%) from 84.412%
17036427554

Pull #1795

github

web-flow
Merge f58c0836a into 0793d4813
Pull Request #1795: add new observation handling

208 of 217 new or added lines in 7 files covered. (95.85%)

28 existing lines in 2 files now uncovered.

12404 of 14699 relevant lines covered (84.39%)

3.78 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.69
/oggm/utils/_workflow.py
1
"""Classes and functions used by the OGGM workflow"""
2

3
# Builtins
4
import glob
9✔
5
import os
9✔
6
import tempfile
9✔
7
import gzip
9✔
8
import json
9✔
9
import time
9✔
10
import random
9✔
11
import shutil
9✔
12
import tarfile
9✔
13
import sys
9✔
14
import signal
9✔
15
import datetime
9✔
16
import logging
9✔
17
import pickle
9✔
18
import warnings
9✔
19
import itertools
9✔
20
from collections import OrderedDict
9✔
21
from functools import partial, wraps
9✔
22
from time import gmtime, strftime
9✔
23
import fnmatch
9✔
24
import platform
9✔
25
import struct
9✔
26
import importlib
9✔
27
import re as regexp
9✔
28
from pathlib import Path
9✔
29

30
# External libs
31
import pandas as pd
9✔
32
import numpy as np
9✔
33
from scipy import stats
9✔
34
import xarray as xr
9✔
35
import shapely.geometry as shpg
9✔
36
import shapely.affinity as shpa
9✔
37
from shapely.ops import transform as shp_trafo
9✔
38
import netCDF4
9✔
39

40
# Optional libs
41
try:
9✔
42
    import geopandas as gpd
9✔
43
except ImportError:
44
    pass
45
try:
9✔
46
    import salem
9✔
47
except ImportError:
48
    pass
49
try:
9✔
50
    from salem import wgs84
9✔
51
    from salem.gis import transform_proj
9✔
52
except ImportError:
53
    pass
54
try:
9✔
55
    import pyproj
9✔
56
except ImportError:
57
    pass
58
try:
9✔
59
    import yaml
9✔
60
except ImportError:
61
    pass
62

63
# Python 3.12+ gives a deprecation warning if TarFile.extraction_filter is None.
64
# https://docs.python.org/3.12/library/tarfile.html#tarfile-extraction-filter
65
if hasattr(tarfile, "fully_trusted_filter"):
9✔
66
    tarfile.TarFile.extraction_filter = staticmethod(tarfile.fully_trusted_filter)  # type: ignores
9✔
67

68
# Locals
69
from oggm import __version__
9✔
70
from oggm.utils._funcs import (calendardate_to_hydrodate, date_to_floatyear,
9✔
71
                               tolist, filter_rgi_name, parse_rgi_meta,
72
                               haversine, multipolygon_to_polygon,
73
                               recursive_valid_polygons)
74
from oggm.utils._downloads import (get_demo_file, get_wgms_files,
9✔
75
                                   get_rgi_glacier_entities)
76
from oggm import cfg
9✔
77
from oggm.exceptions import InvalidParamsError, InvalidWorkflowError
9✔
78

79

80
# Default RGI date (median per region in RGI6)
81
RGI_DATE = {'01': 2009,
9✔
82
            '02': 2004,
83
            '03': 1999,
84
            '04': 2001,
85
            '05': 2001,
86
            '06': 2000,
87
            '07': 2008,
88
            '08': 2002,
89
            '09': 2001,
90
            '10': 2011,
91
            '11': 2003,
92
            '12': 2001,
93
            '13': 2006,
94
            '14': 2001,
95
            '15': 2001,
96
            '16': 2000,
97
            '17': 2000,
98
            '18': 1978,
99
            '19': 1989,
100
            }
101

102
# Module logger
103
log = logging.getLogger('.'.join(__name__.split('.')[:-1]))
9✔
104

105

106
def empty_cache():
9✔
107
    """Empty oggm's cache directory."""
108

109
    if os.path.exists(cfg.CACHE_DIR):
×
110
        shutil.rmtree(cfg.CACHE_DIR)
×
111
    os.makedirs(cfg.CACHE_DIR)
×
112

113

114
def expand_path(p):
9✔
115
    """Helper function for os.path.expanduser and os.path.expandvars"""
116

117
    return os.path.expandvars(os.path.expanduser(p))
×
118

119

120
def gettempdir(dirname='', reset=False, home=False):
9✔
121
    """Get a temporary directory.
122

123
    The default is to locate it in the system's temporary directory as
124
    given by python's `tempfile.gettempdir()/OGGM'. You can set `home=True` for
125
    a directory in the user's `home/tmp` folder instead (this isn't really
126
    a temporary folder but well...)
127

128
    Parameters
129
    ----------
130
    dirname : str
131
        if you want to give it a name
132
    reset : bool
133
        if it has to be emptied first.
134
    home : bool
135
        if True, returns `HOME/tmp/OGGM` instead
136

137
    Returns
138
    -------
139
    the path to the temporary directory
140
    """
141

142
    basedir = (os.path.join(os.path.expanduser('~'), 'tmp') if home
1✔
143
               else tempfile.gettempdir())
144
    return mkdir(os.path.join(basedir, 'OGGM', dirname), reset=reset)
1✔
145

146

147
# alias
148
get_temp_dir = gettempdir
9✔
149

150

151
def get_sys_info():
9✔
152
    """Returns system information as a list of tuples"""
153

154
    blob = []
8✔
155
    try:
8✔
156
        (sysname, nodename, release,
8✔
157
         version, machine, processor) = platform.uname()
158
        blob.extend([
8✔
159
            ("python", "%d.%d.%d.%s.%s" % sys.version_info[:]),
160
            ("python-bits", struct.calcsize("P") * 8),
161
            ("OS", "%s" % (sysname)),
162
            ("OS-release", "%s" % (release)),
163
            ("machine", "%s" % (machine)),
164
            ("processor", "%s" % (processor)),
165
        ])
166
    except BaseException:
×
167
        pass
×
168

169
    return blob
8✔
170

171

172
def get_env_info():
9✔
173
    """Returns env information as a list of tuples"""
174

175
    deps = [
8✔
176
        # (MODULE_NAME, f(mod) -> mod version)
177
        ("oggm", lambda mod: mod.__version__),
178
        ("numpy", lambda mod: mod.__version__),
179
        ("scipy", lambda mod: mod.__version__),
180
        ("pandas", lambda mod: mod.__version__),
181
        ("geopandas", lambda mod: mod.__version__),
182
        ("netCDF4", lambda mod: mod.__version__),
183
        ("matplotlib", lambda mod: mod.__version__),
184
        ("rasterio", lambda mod: mod.__version__),
185
        ("fiona", lambda mod: mod.__version__),
186
        ("pyproj", lambda mod: mod.__version__),
187
        ("shapely", lambda mod: mod.__version__),
188
        ("xarray", lambda mod: mod.__version__),
189
        ("dask", lambda mod: mod.__version__),
190
        ("salem", lambda mod: mod.__version__),
191
    ]
192

193
    deps_blob = list()
8✔
194
    for (modname, ver_f) in deps:
8✔
195
        try:
8✔
196
            if modname in sys.modules:
8✔
197
                mod = sys.modules[modname]
8✔
198
            else:
199
                mod = importlib.import_module(modname)
×
200
            ver = ver_f(mod)
8✔
201
            deps_blob.append((modname, ver))
8✔
202
        except BaseException:
×
203
            deps_blob.append((modname, None))
×
204

205
    return deps_blob
8✔
206

207

208
def get_git_ident():
9✔
209
    ident_str = '$Id$'
8✔
210
    if ":" not in ident_str:
8✔
211
        return 'no_git_id'
×
212
    return ident_str.replace("$", "").replace("Id:", "").replace(" ", "")
8✔
213

214

215
def show_versions(logger=None):
9✔
216
    """Prints the OGGM version and other system information.
217

218
    Parameters
219
    ----------
220
    logger : optional
221
        the logger you want to send the printouts to. If None, will use stdout
222

223
    Returns
224
    -------
225
    the output string
226
    """
227

228
    sys_info = get_sys_info()
1✔
229
    deps_blob = get_env_info()
1✔
230

231
    out = ['# OGGM environment: ']
1✔
232
    out.append("## System info:")
1✔
233
    for k, stat in sys_info:
1✔
234
        out.append("    %s: %s" % (k, stat))
1✔
235
    out.append("## Packages info:")
1✔
236
    for k, stat in deps_blob:
1✔
237
        out.append("    %s: %s" % (k, stat))
1✔
238
    out.append("    OGGM git identifier: " + get_git_ident())
1✔
239

240
    if logger is not None:
1✔
241
        logger.workflow('\n'.join(out))
1✔
242

243
    return '\n'.join(out)
1✔
244

245

246
class SuperclassMeta(type):
9✔
247
    """Metaclass for abstract base classes.
248

249
    http://stackoverflow.com/questions/40508492/python-sphinx-inherit-
250
    method-documentation-from-superclass
251
    """
252
    def __new__(mcls, classname, bases, cls_dict):
9✔
253
        cls = super().__new__(mcls, classname, bases, cls_dict)
9✔
254
        for name, member in cls_dict.items():
9✔
255
            if not getattr(member, '__doc__'):
9✔
256
                try:
9✔
257
                    member.__doc__ = getattr(bases[-1], name).__doc__
9✔
258
                except AttributeError:
9✔
259
                    pass
9✔
260
        return cls
9✔
261

262

263
class LRUFileCache():
9✔
264
    """A least recently used cache for temporary files.
265

266
    The files which are no longer used are deleted from the disk.
267
    """
268

269
    def __init__(self, l0=None, maxsize=None):
9✔
270
        """Instantiate.
271

272
        Parameters
273
        ----------
274
        l0 : list
275
            a list of file paths
276
        maxsize : int
277
            the max number of files to keep
278
        """
279
        self.files = [] if l0 is None else l0
6✔
280
        # if no maxsize is specified, use value from configuration
281
        maxsize = cfg.PARAMS['lru_maxsize'] if maxsize is None else maxsize
6✔
282
        self.maxsize = maxsize
6✔
283
        self.purge()
6✔
284

285
    def purge(self):
9✔
286
        """Remove expired entries."""
287
        if len(self.files) > self.maxsize:
6✔
288
            fpath = self.files.pop(0)
1✔
289
            if os.path.exists(fpath):
1✔
290
                os.remove(fpath)
1✔
291

292
    def append(self, fpath):
9✔
293
        """Append a file to the list."""
294
        if fpath not in self.files:
6✔
295
            self.files.append(fpath)
4✔
296
        self.purge()
6✔
297

298

299
def lazy_property(fn):
9✔
300
    """Decorator that makes a property lazy-evaluated."""
301

302
    attr_name = '_lazy_' + fn.__name__
9✔
303

304
    @property
9✔
305
    @wraps(fn)
9✔
306
    def _lazy_property(self):
9✔
307
        if not hasattr(self, attr_name):
8✔
308
            setattr(self, attr_name, fn(self))
8✔
309
        return getattr(self, attr_name)
8✔
310

311
    return _lazy_property
9✔
312

313

314
def mkdir(path, reset=False):
9✔
315
    """Checks if directory exists and if not, create one.
316

317
    Parameters
318
    ----------
319
    reset: erase the content of the directory if exists
320

321
    Returns
322
    -------
323
    the path
324
    """
325

326
    if reset and os.path.exists(path):
8✔
327
        shutil.rmtree(path)
2✔
328
        # deleting stuff takes time
329
        while os.path.exists(path):  # check if it still exists
2✔
330
            pass
×
331
    try:
8✔
332
        os.makedirs(path)
8✔
333
    except FileExistsError:
6✔
334
        pass
6✔
335
    return path
8✔
336

337

338
def include_patterns(*patterns):
9✔
339
    """Factory function that can be used with copytree() ignore parameter.
340

341
    Arguments define a sequence of glob-style patterns
342
    that are used to specify what files to NOT ignore.
343
    Creates and returns a function that determines this for each directory
344
    in the file hierarchy rooted at the source directory when used with
345
    shutil.copytree().
346

347
    https://stackoverflow.com/questions/35155382/copying-specific-files-to-a-
348
    new-folder-while-maintaining-the-original-subdirect
349
    """
350

351
    def _ignore_patterns(path, names):
2✔
352
        # This is our cuisine
353
        bname = os.path.basename(path)
2✔
354
        if 'divide' in bname or 'log' in bname:
2✔
355
            keep = []
×
356
        else:
357
            keep = set(name for pattern in patterns
2✔
358
                       for name in fnmatch.filter(names, pattern))
359
        ignore = set(name for name in names
2✔
360
                     if name not in keep and not
361
                     os.path.isdir(os.path.join(path, name)))
362
        return ignore
2✔
363

364
    return _ignore_patterns
2✔
365

366

367
class ncDataset(netCDF4.Dataset):
9✔
368
    """Wrapper around netCDF4 setting auto_mask to False"""
369

370
    def __init__(self, *args, **kwargs):
9✔
371
        super(ncDataset, self).__init__(*args, **kwargs)
7✔
372
        self.set_auto_mask(False)
7✔
373

374

375
def pipe_log(gdir, task_func_name, err=None):
9✔
376
    """Log the error in a specific directory."""
377

378
    time_str = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
6✔
379

380
    # Defaults to working directory: it must be set!
381
    if not cfg.PATHS['working_dir']:
6✔
382
        warnings.warn("Cannot log to file without a valid "
×
383
                      "cfg.PATHS['working_dir']!", RuntimeWarning)
384
        return
×
385

386
    fpath = os.path.join(cfg.PATHS['working_dir'], 'log')
6✔
387
    mkdir(fpath)
6✔
388

389
    fpath = os.path.join(fpath, gdir.rgi_id)
6✔
390

391
    sep = '; '
6✔
392

393
    if err is not None:
6✔
394
        fpath += '.ERROR'
6✔
395
    else:
396
        return  # for now
×
397
        fpath += '.SUCCESS'
398

399
    with open(fpath, 'a') as f:
6✔
400
        f.write(time_str + sep + task_func_name + sep)
6✔
401
        if err is not None:
6✔
402
            f.write(err.__class__.__name__ + sep + '{}\n'.format(err))
6✔
403
        else:
404
            f.write(sep + '\n')
×
405

406

407
class DisableLogger():
9✔
408
    """Context manager to temporarily disable all loggers."""
409

410
    def __enter__(self):
9✔
411
        logging.disable(logging.CRITICAL)
7✔
412

413
    def __exit__(self, a, b, c):
9✔
414
        logging.disable(logging.NOTSET)
7✔
415

416

417
def _timeout_handler(signum, frame):
9✔
418
    raise TimeoutError('This task was killed because of timeout')
×
419

420

421
class entity_task(object):
9✔
422
    """Decorator for common job-controlling logic.
423

424
    All tasks share common operations. This decorator is here to handle them:
425
    exceptions, logging, and (some day) database for job-controlling.
426
    """
427

428
    def __init__(self, log, writes=[], fallback=None):
9✔
429
        """Decorator syntax: ``@entity_task(log, writes=['dem', 'outlines'])``
430

431
        Parameters
432
        ----------
433
        log: logger
434
            module logger
435
        writes: list
436
            list of files that the task will write down to disk (must be
437
            available in ``cfg.BASENAMES``)
438
        fallback: python function
439
            will be executed on gdir if entity_task fails
440
        return_value: bool
441
            whether the return value from the task should be passed over
442
            to the caller or not. In general you will always want this to
443
            be true, but sometimes the task return things which are not
444
            useful in production and my use a lot of memory, etc,
445
        """
446
        self.log = log
9✔
447
        self.writes = writes
9✔
448
        self.fallback = fallback
9✔
449

450
        cnt = ['    Notes']
9✔
451
        cnt += ['    -----']
9✔
452
        cnt += ['    Files written to the glacier directory:']
9✔
453

454
        for k in sorted(writes):
9✔
455
            cnt += [cfg.BASENAMES.doc_str(k)]
9✔
456
        self.iodoc = '\n'.join(cnt)
9✔
457

458
    def __call__(self, task_func):
9✔
459
        """Decorate."""
460

461
        # Add to the original docstring
462
        if task_func.__doc__ is None:
9✔
463
            raise RuntimeError('Entity tasks should have a docstring!')
×
464

465
        task_func.__doc__ = '\n'.join((task_func.__doc__, self.iodoc))
9✔
466

467
        @wraps(task_func)
9✔
468
        def _entity_task(gdir, *, reset=None, print_log=True,
9✔
469
                         return_value=True, continue_on_error=None,
470
                         add_to_log_file=True, **kwargs):
471

472
            settings_filesuffix = kwargs.get('settings_filesuffix', '')
7✔
473
            gdir.settings_filesuffix = settings_filesuffix
7✔
474

475
            observations_filesuffix = kwargs.get('observations_filesuffix', '')
7✔
476
            gdir.observations_filesuffix = observations_filesuffix
7✔
477

478
            if reset is None:
7✔
479
                reset = not cfg.PARAMS['auto_skip_task']
7✔
480

481
            if continue_on_error is None:
7✔
482
                continue_on_error = cfg.PARAMS['continue_on_error']
7✔
483

484
            task_name = task_func.__name__
7✔
485

486
            # Filesuffix are typically used to differentiate tasks
487
            fsuffix = settings_filesuffix
7✔
488
            if kwargs.get('filesuffix', False):
7✔
489
                fsuffix += kwargs.get('filesuffix', False)
2✔
490
            if kwargs.get('output_filesuffix', False):
7✔
491
                fsuffix += kwargs.get('output_filesuffix', False)
5✔
492
            if fsuffix:
7✔
493
                task_name += fsuffix
5✔
494

495
            # Do we need to run this task?
496
            s = gdir.get_task_status(task_name)
7✔
497
            if not reset and s and ('SUCCESS' in s):
7✔
498
                return
1✔
499

500
            # Log what we are doing
501
            if print_log:
7✔
502
                self.log.info('(%s) %s', gdir.rgi_id, task_name)
7✔
503

504
            # Run the task
505
            try:
7✔
506
                if gdir.settings['task_timeout'] > 0:
7✔
507
                    signal.signal(signal.SIGALRM, _timeout_handler)
×
508
                    signal.alarm(gdir.settings['task_timeout'])
×
509
                ex_t = time.time()
7✔
510
                out = task_func(gdir, **kwargs)
7✔
511
                ex_t = time.time() - ex_t
7✔
512
                if gdir.settings['task_timeout'] > 0:
7✔
513
                    signal.alarm(0)
×
514
                if task_name != 'gdir_to_tar':
7✔
515
                    if add_to_log_file:
7✔
516
                        gdir.log(task_name, task_time=ex_t)
7✔
517
            except Exception as err:
6✔
518
                # Something happened
519
                out = None
6✔
520
                if add_to_log_file:
6✔
521
                    gdir.log(task_name, err=err)
6✔
522
                    pipe_log(gdir, task_name, err=err)
6✔
523
                if print_log:
6✔
524
                    self.log.error('%s occurred during task %s on %s: %s',
6✔
525
                                   type(err).__name__, task_name,
526
                                   gdir.rgi_id, str(err))
527
                if not continue_on_error:
6✔
528
                    raise
6✔
529

530
                if self.fallback is not None:
3✔
531
                    out = self.fallback(gdir)
×
532
            if return_value:
7✔
533
                return out
7✔
534

535
        _entity_task.__dict__['is_entity_task'] = True
9✔
536
        # adds the possibility to use a function, decorated as entity_task, without its decoration.
537
        _entity_task.unwrapped = task_func
9✔
538
        return _entity_task
9✔
539

540

541
class global_task(object):
9✔
542
    """Decorator for common job-controlling logic.
543

544
    Indicates that this task expects a list of all GlacierDirs as parameter
545
    instead of being called once per dir.
546
    """
547

548
    def __init__(self, log):
9✔
549
        """Decorator syntax: ``@global_task(log)``
550

551
        Parameters
552
        ----------
553
        log: logger
554
            module logger
555
        """
556
        self.log = log
9✔
557

558
    def __call__(self, task_func):
9✔
559
        """Decorate."""
560

561
        @wraps(task_func)
9✔
562
        def _global_task(gdirs, **kwargs):
9✔
563

564
            # Should be iterable
565
            gdirs = tolist(gdirs)
7✔
566

567
            self.log.workflow('Applying global task %s on %s glaciers',
7✔
568
                              task_func.__name__, len(gdirs))
569

570
            # Run the task
571
            return task_func(gdirs, **kwargs)
7✔
572

573
        _global_task.__dict__['is_global_task'] = True
9✔
574
        return _global_task
9✔
575

576

577
def get_ref_mb_glaciers_candidates(rgi_version=None):
9✔
578
    """Reads in the WGMS list of glaciers with available MB data.
579

580
    Can be found afterwards (and extended) in cdf.DATA['RGIXX_ref_ids'].
581
    """
582

583
    if rgi_version is None:
1✔
584
        rgi_version = cfg.PARAMS['rgi_version']
1✔
585

586
    if len(rgi_version) == 2:
1✔
587
        # We might change this one day
588
        rgi_version = rgi_version[:1]
1✔
589

590
    key = 'RGI{}0_ref_ids'.format(rgi_version)
1✔
591

592
    if key not in cfg.DATA:
1✔
593
        flink, _ = get_wgms_files()
1✔
594
        cfg.DATA[key] = flink['RGI{}0_ID'.format(rgi_version)].tolist()
1✔
595

596
    return cfg.DATA[key]
1✔
597

598

599
@global_task(log)
9✔
600
def get_ref_mb_glaciers(gdirs, y0=None, y1=None):
9✔
601
    """Get the list of glaciers we have valid mass balance measurements for.
602

603
    To be valid glaciers must have more than 5 years of measurements and
604
    be land terminating. Therefore, the list depends on the time period of the
605
    baseline climate data and this method selects them out of a list
606
    of potential candidates (`gdirs` arg).
607

608
    Parameters
609
    ----------
610
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
611
        list of glaciers to check for valid reference mass balance data
612
    y0 : int
613
        override the default behavior which is to check the available
614
        climate data (or PARAMS['ref_mb_valid_window']) and decide
615
    y1 : int
616
        override the default behavior which is to check the available
617
        climate data (or PARAMS['ref_mb_valid_window']) and decide
618

619
    Returns
620
    -------
621
    ref_gdirs : list of :py:class:`oggm.GlacierDirectory` objects
622
        list of those glaciers with valid reference mass balance data
623

624
    See Also
625
    --------
626
    get_ref_mb_glaciers_candidates
627
    """
628

629
    # Get the links
630
    ref_ids = get_ref_mb_glaciers_candidates(gdirs[0].rgi_version)
1✔
631

632
    # We remove tidewater glaciers and glaciers with < 5 years
633
    ref_gdirs = []
1✔
634
    for g in gdirs:
1✔
635
        if g.rgi_id not in ref_ids or g.is_tidewater:
1✔
636
            continue
1✔
637
        try:
1✔
638
            mbdf = g.get_ref_mb_data(y0=y0, y1=y1)
1✔
639
            if len(mbdf) >= 5:
1✔
640
                ref_gdirs.append(g)
1✔
641
        except RuntimeError as e:
1✔
642
            if 'Please process some climate data before call' in str(e):
×
643
                raise
×
644
    return ref_gdirs
1✔
645

646

647
def _chaikins_corner_cutting(line, refinements=5):
9✔
648
    """Some magic here.
649

650
    https://stackoverflow.com/questions/47068504/where-to-find-python-
651
    implementation-of-chaikins-corner-cutting-algorithm
652
    """
653
    coords = np.array(line.coords)
2✔
654

655
    for _ in range(refinements):
2✔
656
        L = coords.repeat(2, axis=0)
2✔
657
        R = np.empty_like(L)
2✔
658
        R[0] = L[0]
2✔
659
        R[2::2] = L[1:-1:2]
2✔
660
        R[1:-1:2] = L[2::2]
2✔
661
        R[-1] = L[-1]
2✔
662
        coords = L * 0.75 + R * 0.25
2✔
663

664
    return shpg.LineString(coords)
2✔
665

666

667
@entity_task(log)
9✔
668
def get_centerline_lonlat(gdir,
9✔
669
                          keep_main_only=False,
670
                          flowlines_output=False,
671
                          ensure_exterior_match=False,
672
                          geometrical_widths_output=False,
673
                          corrected_widths_output=False,
674
                          to_crs='wgs84',
675
                          simplify_line_before=0,
676
                          corner_cutting=0,
677
                          simplify_line_after=0):
678
    """Helper task to convert the centerlines to a shapefile
679

680
    Parameters
681
    ----------
682
    gdir : the glacier directory
683
    flowlines_output : create a shapefile for the flowlines
684
    ensure_exterior_match : per design, OGGM centerlines match the underlying
685
    DEM grid. This may imply that they do not "touch" the exterior outlines
686
    of the glacier in vector space. Set this to True to correct for that.
687
    geometrical_widths_output : for the geometrical widths
688
    corrected_widths_output : for the corrected widths
689

690
    Returns
691
    -------
692
    a shapefile
693
    """
694
    if flowlines_output or geometrical_widths_output or corrected_widths_output:
2✔
695
        cls = gdir.read_pickle('inversion_flowlines')
2✔
696
    else:
697
        cls = gdir.read_pickle('centerlines')
2✔
698

699
    exterior = None
2✔
700
    if ensure_exterior_match:
2✔
701
        exterior = gdir.read_shapefile('outlines')
2✔
702
        # Transform to grid
703
        tra_func = partial(gdir.grid.transform, crs=exterior.crs)
2✔
704
        exterior = shpg.Polygon(shp_trafo(tra_func, exterior.geometry[0].exterior))
2✔
705

706
    tra_func = partial(gdir.grid.ij_to_crs, crs=to_crs)
2✔
707

708
    olist = []
2✔
709
    for j, cl in enumerate(cls):
2✔
710
        mm = 1 if j == (len(cls)-1) else 0
2✔
711
        if keep_main_only and mm == 0:
2✔
712
            continue
×
713
        if corrected_widths_output:
2✔
714
            le_segment = np.rint(np.max(cl.dis_on_line) * gdir.grid.dx)
2✔
715
            for wi, cur, (n1, n2), wi_m in zip(cl.widths, cl.line.coords,
2✔
716
                                               cl.normals, cl.widths_m):
717
                _l = shpg.LineString([shpg.Point(cur + wi / 2. * n1),
2✔
718
                                      shpg.Point(cur + wi / 2. * n2)])
719
                gs = dict()
2✔
720
                gs['RGIID'] = gdir.rgi_id
2✔
721
                gs['SEGMENT_ID'] = j
2✔
722
                gs['LE_SEGMENT'] = le_segment
2✔
723
                gs['MAIN'] = mm
2✔
724
                gs['WIDTH_m'] = wi_m
2✔
725
                gs['geometry'] = shp_trafo(tra_func, _l)
2✔
726
                olist.append(gs)
2✔
727
        elif geometrical_widths_output:
2✔
728
            le_segment = np.rint(np.max(cl.dis_on_line) * gdir.grid.dx)
2✔
729
            for _l, wi_m in zip(cl.geometrical_widths, cl.widths_m):
2✔
730
                gs = dict()
2✔
731
                gs['RGIID'] = gdir.rgi_id
2✔
732
                gs['SEGMENT_ID'] = j
2✔
733
                gs['LE_SEGMENT'] = le_segment
2✔
734
                gs['MAIN'] = mm
2✔
735
                gs['WIDTH_m'] = wi_m
2✔
736
                gs['geometry'] = shp_trafo(tra_func, _l)
2✔
737
                olist.append(gs)
2✔
738
        else:
739
            gs = dict()
2✔
740
            gs['RGIID'] = gdir.rgi_id
2✔
741
            gs['SEGMENT_ID'] = j
2✔
742
            gs['STRAHLER'] = cl.order
2✔
743
            if mm == 0:
2✔
744
                gs['OUTFLOW_ID'] = cls.index(cl.flows_to)
2✔
745
            else:
746
                gs['OUTFLOW_ID'] = -1
2✔
747
            gs['LE_SEGMENT'] = np.rint(np.max(cl.dis_on_line) * gdir.grid.dx)
2✔
748
            gs['MAIN'] = mm
2✔
749
            line = cl.line
2✔
750
            if ensure_exterior_match:
2✔
751
                # Extend line at the start by 10
752
                fs = shpg.LineString(line.coords[:2])
2✔
753
                # First check if this is necessary - this segment should
754
                # be within the geometry or it's already good to go
755
                if fs.within(exterior):
2✔
756
                    fs = shpa.scale(fs, xfact=3, yfact=3, origin=fs.boundary.geoms[1])
2✔
757
                    line = shpg.LineString([*fs.coords, *line.coords[2:]])
2✔
758
                # If last also extend at the end
759
                if mm == 1:
2✔
760
                    ls = shpg.LineString(line.coords[-2:])
2✔
761
                    if ls.within(exterior):
2✔
762
                        ls = shpa.scale(ls, xfact=3, yfact=3, origin=ls.boundary.geoms[0])
2✔
763
                        line = shpg.LineString([*line.coords[:-2], *ls.coords])
2✔
764

765
                # Simplify and smooth?
766
                if simplify_line_before:
2✔
767
                    line = line.simplify(simplify_line_before)
2✔
768
                if corner_cutting:
2✔
769
                    line = _chaikins_corner_cutting(line, corner_cutting)
2✔
770
                if simplify_line_after:
2✔
771
                    line = line.simplify(simplify_line_after)
2✔
772

773
                # Intersect with exterior geom
774
                line = line.intersection(exterior)
2✔
775
                if line.geom_type in ['MultiLineString', 'GeometryCollection']:
2✔
776
                    # Take the longest
777
                    lens = [il.length for il in line.geoms]
2✔
778
                    line = line.geoms[np.argmax(lens)]
2✔
779

780
                # Recompute length
781
                gs['LE_SEGMENT'] = np.rint(line.length * gdir.grid.dx)
2✔
782
            gs['geometry'] = shp_trafo(tra_func, line)
2✔
783
            olist.append(gs)
2✔
784

785
    return olist
2✔
786

787

788
def _write_shape_to_disk(gdf, fpath, to_tar=False):
9✔
789
    """Write a shapefile to disk with optional compression
790

791
    Parameters
792
    ----------
793
    gdf : gpd.GeoDataFrame
794
        the data to write
795
    fpath : str
796
        where to writ the file - should be ending in shp
797
    to_tar : bool
798
        put the files in a .tar file. If cfg.PARAMS['use_compression'],
799
        also compress to .gz
800
    """
801

802
    if '.shp' not in fpath:
7✔
803
        raise ValueError('File ending should be .shp')
×
804

805
    with warnings.catch_warnings():
7✔
806
        warnings.filterwarnings('ignore', 'GeoSeries.notna', UserWarning)
7✔
807
        gdf.to_file(fpath)
7✔
808

809
    if not to_tar:
7✔
810
        # Done here
811
        return
3✔
812

813
    # Write them in tar
814
    fpath = fpath.replace('.shp', '.tar')
7✔
815
    mode = 'w'
7✔
816
    if cfg.PARAMS['use_compression']:
7✔
817
        fpath += '.gz'
7✔
818
        mode += ':gz'
7✔
819
    if os.path.exists(fpath):
7✔
820
        os.remove(fpath)
4✔
821

822
    # List all files that were written as shape
823
    fs = glob.glob(fpath.replace('.gz', '').replace('.tar', '.*'))
7✔
824
    # Add them to tar
825
    with tarfile.open(fpath, mode=mode) as tf:
7✔
826
        for ff in fs:
7✔
827
            tf.add(ff, arcname=os.path.basename(ff))
7✔
828

829
    # Delete the old ones
830
    for ff in fs:
7✔
831
        os.remove(ff)
7✔
832

833

834
@global_task(log)
9✔
835
def write_centerlines_to_shape(gdirs, *, path=True, to_tar=False,
9✔
836
                               to_crs='EPSG:4326',
837
                               filesuffix='', flowlines_output=False,
838
                               ensure_exterior_match=False,
839
                               geometrical_widths_output=False,
840
                               corrected_widths_output=False,
841
                               keep_main_only=False,
842
                               simplify_line_before=0,
843
                               corner_cutting=0,
844
                               simplify_line_after=0):
845
    """Write the centerlines to a shapefile.
846

847
    Parameters
848
    ----------
849
    gdirs:
850
        the list of GlacierDir to process.
851
    path: str or bool
852
        Set to "True" in order  to store the shape in the working directory
853
        Set to a str path to store the file to your chosen location
854
    to_tar : bool
855
        put the files in a .tar file. If cfg.PARAMS['use_compression'],
856
        also compress to .gz
857
    filesuffix : str
858
        add a suffix to the output file
859
    flowlines_output : bool
860
        output the OGGM flowlines instead of the centerlines
861
    geometrical_widths_output : bool
862
        output the geometrical widths instead of the centerlines
863
    corrected_widths_output : bool
864
        output the corrected widths instead of the centerlines
865
    ensure_exterior_match : bool
866
        per design, the centerlines will match the underlying DEM grid.
867
        This may imply that they do not "touch" the exterior outlines of the
868
        glacier in vector space. Set this to True to correct for that.
869
    to_crs : str
870
        write the shape to another coordinate reference system (CRS)
871
    keep_main_only : bool
872
        write only the main flowlines to the output files
873
    simplify_line_before : float
874
        apply shapely's `simplify` method to the line before corner cutting.
875
        It is a cosmetic option: it avoids hard "angles" in the centerlines.
876
        All points in the simplified object will be within the tolerance
877
        distance of the original geometry (units: grid points). A good
878
        value to test first is 0.75
879
    corner_cutting : int
880
        apply the Chaikin's corner cutting algorithm to the geometry before
881
        writing. The integer represents the number of refinements to apply.
882
        A good first value to test is 3.
883
    simplify_line_after : float
884
        apply shapely's `simplify` method to the line *after* corner cutting.
885
        This is to reduce the size of the geometeries after they have been
886
        smoothed. The default value of 0 is fine if you use corner cutting less
887
        than 4. Otherwize try a small number, like 0.05 or 0.1.
888
    """
889
    from oggm.workflow import execute_entity_task
3✔
890

891
    if path is True:
3✔
892
        path = os.path.join(cfg.PATHS['working_dir'],
2✔
893
                            'glacier_centerlines' + filesuffix + '.shp')
894

895
    _to_crs = salem.check_crs(to_crs)
3✔
896
    if not _to_crs:
3✔
897
        raise InvalidParamsError(f'CRS not understood: {to_crs}')
×
898

899
    log.workflow('write_centerlines_to_shape on {} ...'.format(path))
3✔
900

901
    olist = execute_entity_task(get_centerline_lonlat, gdirs,
3✔
902
                                flowlines_output=flowlines_output,
903
                                ensure_exterior_match=ensure_exterior_match,
904
                                geometrical_widths_output=geometrical_widths_output,
905
                                corrected_widths_output=corrected_widths_output,
906
                                keep_main_only=keep_main_only,
907
                                simplify_line_before=simplify_line_before,
908
                                corner_cutting=corner_cutting,
909
                                simplify_line_after=simplify_line_after,
910
                                to_crs=_to_crs)
911
    # filter for none
912
    olist = [o for o in olist if o is not None]
3✔
913
    odf = gpd.GeoDataFrame(itertools.chain.from_iterable(olist), crs=to_crs)
3✔
914
    odf = odf.sort_values(by=['RGIID', 'SEGMENT_ID'])
3✔
915
    # Sanity checks to avoid bad surprises
916
    gtype = np.array([g.geom_type for g in odf.geometry])
3✔
917
    if 'GeometryCollection' in gtype:
3✔
918
        errdf = odf.loc[gtype == 'GeometryCollection']
×
919
        with warnings.catch_warnings():
×
920
            # errdf.length warns because of use of wgs84
921
            warnings.filterwarnings("ignore", category=UserWarning)
×
922
            if not np.all(errdf.length) == 0:
×
923
                errdf = errdf.loc[errdf.length > 0]
×
924
                raise RuntimeError('Some geometries are non-empty GeometryCollection '
×
925
                                   f'at RGI Ids: {errdf.RGIID.values}')
926
    _write_shape_to_disk(odf, path, to_tar=to_tar)
3✔
927

928

929
def demo_glacier_id(key):
9✔
930
    """Get the RGI id of a glacier by name or key: None if not found."""
931

932
    df = cfg.DATA['demo_glaciers']
1✔
933

934
    # Is the name in key?
935
    s = df.loc[df.Key.str.lower() == key.lower()]
1✔
936
    if len(s) == 1:
1✔
937
        return s.index[0]
1✔
938

939
    # Is the name in name?
940
    s = df.loc[df.Name.str.lower() == key.lower()]
1✔
941
    if len(s) == 1:
1✔
942
        return s.index[0]
1✔
943

944
    # Is the name in Ids?
945
    try:
1✔
946
        s = df.loc[[key]]
1✔
947
        if len(s) == 1:
1✔
948
            return s.index[0]
1✔
949
    except KeyError:
1✔
950
        pass
1✔
951

952
    return None
1✔
953

954

955
class compile_to_netcdf(object):
9✔
956
    """Decorator for common compiling NetCDF files logic.
957

958
    All compile_* tasks can be optimized the same way, by using temporary
959
    files and merging them afterwards.
960
    """
961

962
    def __init__(self, log):
9✔
963
        """Decorator syntax: ``@compile_to_netcdf(log, n_tmp_files=1000)``
964

965
        Parameters
966
        ----------
967
        log: logger
968
            module logger
969
        tmp_file_size: int
970
            number of glacier directories per temporary files
971
        """
972
        self.log = log
9✔
973

974
    def __call__(self, task_func):
9✔
975
        """Decorate."""
976

977
        @wraps(task_func)
9✔
978
        def _compile_to_netcdf(gdirs, input_filesuffix='',
9✔
979
                               output_filesuffix='',
980
                               path=True,
981
                               tmp_file_size=1000,
982
                               **kwargs):
983

984
            if not output_filesuffix:
5✔
985
                output_filesuffix = input_filesuffix
5✔
986

987
            gdirs = tolist(gdirs)
5✔
988
            task_name = task_func.__name__
5✔
989
            output_base = task_name.replace('compile_', '')
5✔
990

991
            if path is True:
5✔
992
                path = os.path.join(cfg.PATHS['working_dir'],
5✔
993
                                    output_base + output_filesuffix + '.nc')
994

995
            self.log.workflow('Applying %s on %d gdirs.',
5✔
996
                              task_name, len(gdirs))
997

998
            # Run the task
999
            # If small gdir size, no need for temporary files
1000
            if len(gdirs) < tmp_file_size or not path:
5✔
1001
                return task_func(gdirs, input_filesuffix=input_filesuffix,
5✔
1002
                                 path=path, **kwargs)
1003

1004
            # Otherwise, divide and conquer
1005
            sub_gdirs = [gdirs[i: i + tmp_file_size] for i in
1✔
1006
                         range(0, len(gdirs), tmp_file_size)]
1007

1008
            tmp_paths = [os.path.join(cfg.PATHS['working_dir'],
1✔
1009
                                      'compile_tmp_{:06d}.nc'.format(i))
1010
                         for i in range(len(sub_gdirs))]
1011

1012
            try:
1✔
1013
                for spath, sgdirs in zip(tmp_paths, sub_gdirs):
1✔
1014
                    task_func(sgdirs, input_filesuffix=input_filesuffix,
1✔
1015
                              path=spath, **kwargs)
1016
            except BaseException:
×
1017
                # If something wrong, delete the tmp files
1018
                for f in tmp_paths:
×
1019
                    try:
×
1020
                        os.remove(f)
×
1021
                    except FileNotFoundError:
×
1022
                        pass
×
1023
                raise
×
1024

1025
            # Ok, now merge and return
1026
            try:
1✔
1027
                with xr.open_mfdataset(tmp_paths, combine='nested',
1✔
1028
                                       concat_dim='rgi_id') as ds:
1029
                    # the .load() is actually quite uncool here, but it solves
1030
                    # an unbelievable stalling problem in multiproc
1031
                    ds.load().to_netcdf(path)
1✔
1032
            except TypeError:
×
1033
                # xr < v 0.13
1034
                with xr.open_mfdataset(tmp_paths, concat_dim='rgi_id') as ds:
×
1035
                    # the .load() is actually quite uncool here, but it solves
1036
                    # an unbelievable stalling problem in multiproc
1037
                    ds.load().to_netcdf(path)
×
1038

1039
            # We can't return the dataset without loading it, so we don't
1040
            return None
1✔
1041

1042
        return _compile_to_netcdf
9✔
1043

1044

1045
@entity_task(log)
9✔
1046
def merge_consecutive_run_outputs(gdir,
9✔
1047
                                  input_filesuffix_1=None,
1048
                                  input_filesuffix_2=None,
1049
                                  output_filesuffix=None,
1050
                                  delete_input=False):
1051
    """Merges the output of two model_diagnostics files into one.
1052

1053
    It assumes that the last time of file1 is equal to the first time of file2.
1054

1055
    Parameters
1056
    ----------
1057
    gdir : the glacier directory
1058
    input_filesuffix_1 : str
1059
        how to recognize the first file
1060
    input_filesuffix_2 : str
1061
        how to recognize the second file
1062
    output_filesuffix : str
1063
        where to write the output (default: no suffix)
1064

1065
    Returns
1066
    -------
1067
    The merged dataset
1068
    """
1069

1070
    # Read in the input files and check
1071
    fp1 = gdir.get_filepath('model_diagnostics', filesuffix=input_filesuffix_1)
1✔
1072
    with xr.open_dataset(fp1) as ds:
1✔
1073
        ds1 = ds.load()
1✔
1074
    fp2 = gdir.get_filepath('model_diagnostics', filesuffix=input_filesuffix_2)
1✔
1075
    with xr.open_dataset(fp2) as ds:
1✔
1076
        ds2 = ds.load()
1✔
1077
    if ds1.time[-1] != ds2.time[0]:
1✔
1078
        raise InvalidWorkflowError('The two files are incompatible by time')
×
1079

1080
    # Samity check for all variables as well
1081
    for v in ds1:
1✔
1082
        if not np.all(np.isfinite(ds1[v].data[-1])):
1✔
1083
            # This is the last year of hydro output - we will discard anyway
1084
            continue
1✔
1085
        if np.allclose(ds1[v].data[-1], ds2[v].data[0]):
1✔
1086
            # This means that we're OK - the two match
1087
            continue
1✔
1088

1089
        # This has to be a bucket of some sort, probably snow or calving
1090
        if len(ds2[v].data.shape) == 1:
1✔
1091
            if ds2[v].data[0] != 0:
1✔
1092
                raise InvalidWorkflowError('The two files seem incompatible '
×
1093
                                           f'by data on variable : {v}')
1094
            bucket = ds1[v].data[-1]
1✔
1095
        elif len(ds2[v].data.shape) == 2:
1✔
1096
            if ds2[v].data[0, 0] != 0:
1✔
1097
                raise InvalidWorkflowError('The two files seem incompatible '
×
1098
                                           f'by data on variable : {v}')
1099
            bucket = ds1[v].data[-1, -1]
1✔
1100
        # Carry it to the rest
1101
        ds2[v] = ds2[v] + bucket
1✔
1102

1103
    # Merge by removing the last step of file 1 and delete the files if asked
1104
    out_ds = xr.concat([ds1.isel(time=slice(0, -1)), ds2], dim='time')
1✔
1105
    if delete_input:
1✔
1106
        os.remove(fp1)
×
1107
        os.remove(fp2)
×
1108
    # Write out and return
1109
    fp = gdir.get_filepath('model_diagnostics', filesuffix=output_filesuffix)
1✔
1110
    out_ds.to_netcdf(fp)
1✔
1111
    return out_ds
1✔
1112

1113

1114
@global_task(log)
9✔
1115
@compile_to_netcdf(log)
9✔
1116
def compile_run_output(gdirs, path=True, input_filesuffix='',
9✔
1117
                       use_compression=True):
1118
    """Compiles the output of the model runs of several gdirs into one file.
1119

1120
    Parameters
1121
    ----------
1122
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1123
        the glacier directories to process
1124
    path : str
1125
        where to store (default is on the working dir).
1126
        Set to `False` to disable disk storage.
1127
    input_filesuffix : str
1128
        the filesuffix of the files to be compiled
1129
    use_compression : bool
1130
        use zlib compression on the output netCDF files
1131

1132
    Returns
1133
    -------
1134
    ds : :py:class:`xarray.Dataset`
1135
        compiled output
1136
    """
1137

1138
    # Get the dimensions of all this
1139
    rgi_ids = [gd.rgi_id for gd in gdirs]
4✔
1140

1141
    # To find the longest time, we have to open all files unfortunately, we
1142
    # also create a list of all data variables (in case not all files contain
1143
    # the same data variables), and finally we decide on the name of "3d"
1144
    # variables in case we have daily
1145
    time_info = {}
4✔
1146
    time_keys = ['hydro_year', 'hydro_month', 'calendar_year', 'calendar_month']
4✔
1147
    allowed_data_vars = ['volume_m3', 'volume_bsl_m3', 'volume_bwl_m3',
4✔
1148
                         'volume_m3_min_h',  # only here for back compatibility
1149
                         # as it is a variable in gdirs v1.6 2023.1
1150
                         'area_m2', 'area_m2_min_h', 'length_m', 'calving_m3',
1151
                         'calving_rate_myr', 'off_area',
1152
                         'on_area', 'model_mb', 'is_fixed_geometry_spinup']
1153
    for gi in range(10):
4✔
1154
        allowed_data_vars += [f'terminus_thick_{gi}']
4✔
1155
    # this hydro variables can be _monthly or _daily
1156
    hydro_vars = ['melt_off_glacier', 'melt_on_glacier',
4✔
1157
                  'liq_prcp_off_glacier', 'liq_prcp_on_glacier',
1158
                  'snowfall_off_glacier', 'snowfall_on_glacier',
1159
                  'melt_residual_off_glacier', 'melt_residual_on_glacier',
1160
                  'snow_bucket', 'residual_mb']
1161
    for v in hydro_vars:
4✔
1162
        allowed_data_vars += [v]
4✔
1163
        allowed_data_vars += [v + '_monthly']
4✔
1164
        allowed_data_vars += [v + '_daily']
4✔
1165
    data_vars = {}
4✔
1166
    name_2d_dim = 'month_2d'
4✔
1167
    contains_3d_data = False
4✔
1168
    for gd in gdirs:
4✔
1169
        fp = gd.get_filepath('model_diagnostics', filesuffix=input_filesuffix)
4✔
1170
        try:
4✔
1171
            with ncDataset(fp) as ds:
4✔
1172
                time = ds.variables['time'][:]
4✔
1173
                if 'time' not in time_info:
4✔
1174
                    time_info['time'] = time
4✔
1175
                    for cn in time_keys:
4✔
1176
                        time_info[cn] = ds.variables[cn][:]
4✔
1177
                else:
1178
                    # Here we may need to append or add stuff
1179
                    ot = time_info['time']
4✔
1180
                    if time[0] > ot[-1] or ot[-1] < time[0]:
4✔
1181
                        raise InvalidWorkflowError('Trying to compile output '
×
1182
                                                   'without overlap.')
1183
                    if time[-1] > ot[-1]:
4✔
1184
                        p = np.nonzero(time == ot[-1])[0][0] + 1
×
1185
                        time_info['time'] = np.append(ot, time[p:])
×
1186
                        for cn in time_keys:
×
1187
                            time_info[cn] = np.append(time_info[cn],
×
1188
                                                      ds.variables[cn][p:])
1189
                    if time[0] < ot[0]:
4✔
1190
                        p = np.nonzero(time == ot[0])[0][0]
×
1191
                        time_info['time'] = np.append(time[:p], ot)
×
1192
                        for cn in time_keys:
×
1193
                            time_info[cn] = np.append(ds.variables[cn][:p],
×
1194
                                                      time_info[cn])
1195

1196
                # check if their are new data variables and add them
1197
                for vn in ds.variables:
4✔
1198
                    # exclude time variables
1199
                    if vn in ['month_2d', 'calendar_month_2d',
4✔
1200
                              'hydro_month_2d']:
1201
                        name_2d_dim = 'month_2d'
2✔
1202
                        contains_3d_data = True
2✔
1203
                    elif vn in ['day_2d', 'calendar_day_2d', 'hydro_day_2d']:
4✔
1204
                        name_2d_dim = 'day_2d'
×
1205
                        contains_3d_data = True
×
1206
                    elif vn in allowed_data_vars:
4✔
1207
                        # check if data variable is new
1208
                        if vn not in data_vars.keys():
4✔
1209
                            data_vars[vn] = dict()
4✔
1210
                            data_vars[vn]['dims'] = ds.variables[vn].dimensions
4✔
1211
                            data_vars[vn]['attrs'] = dict()
4✔
1212
                            for attr in ds.variables[vn].ncattrs():
4✔
1213
                                if attr not in ['_FillValue', 'coordinates',
4✔
1214
                                                'dtype']:
1215
                                    data_vars[vn]['attrs'][attr] = getattr(
4✔
1216
                                        ds.variables[vn], attr)
1217
                    elif vn not in ['time'] + time_keys:
4✔
1218
                        # This check has future developments in mind.
1219
                        # If you end here it means the current data variable is
1220
                        # not under the allowed_data_vars OR not under the
1221
                        # defined time dimensions. If it is a new data variable
1222
                        # add it to allowed_data_vars above (also add it to
1223
                        # test_compile_run_output). If it is a new dimension
1224
                        # handle it in the if/elif statements.
1225
                        raise InvalidParamsError(f'The data variable "{vn}" '
×
1226
                                                 'is not known. Is it new or '
1227
                                                 'is it a new dimension? '
1228
                                                 'Check comment above this '
1229
                                                 'raise for more info!')
1230

1231
            # If this worked, keep it as template
1232
            ppath = fp
4✔
1233
        except FileNotFoundError:
×
1234
            pass
×
1235

1236
    if 'time' not in time_info:
4✔
1237
        raise RuntimeError('Found no valid glaciers!')
×
1238

1239
    # OK found it, open it and prepare the output
1240
    with xr.open_dataset(ppath) as ds_diag:
4✔
1241

1242
        # Prepare output
1243
        ds = xr.Dataset()
4✔
1244

1245
        # Global attributes
1246
        ds.attrs['description'] = 'OGGM model output'
4✔
1247
        ds.attrs['oggm_version'] = __version__
4✔
1248
        ds.attrs['calendar'] = '365-day no leap'
4✔
1249
        ds.attrs['creation_date'] = strftime("%Y-%m-%d %H:%M:%S", gmtime())
4✔
1250

1251
        # Copy coordinates
1252
        time = time_info['time']
4✔
1253
        ds.coords['time'] = ('time', time)
4✔
1254
        ds['time'].attrs['description'] = 'Floating year'
4✔
1255
        # New coord
1256
        ds.coords['rgi_id'] = ('rgi_id', rgi_ids)
4✔
1257
        ds['rgi_id'].attrs['description'] = 'RGI glacier identifier'
4✔
1258
        # This is just taken from there
1259
        for cn in ['hydro_year', 'hydro_month',
4✔
1260
                   'calendar_year', 'calendar_month']:
1261
            ds.coords[cn] = ('time', time_info[cn])
4✔
1262
            ds[cn].attrs['description'] = ds_diag[cn].attrs['description']
4✔
1263

1264
        # Prepare the 2D variables
1265
        shape = (len(time), len(rgi_ids))
4✔
1266
        out_2d = dict()
4✔
1267
        for vn in data_vars:
4✔
1268
            if name_2d_dim in data_vars[vn]['dims']:
4✔
1269
                continue
2✔
1270
            var = dict()
4✔
1271
            var['data'] = np.full(shape, np.nan)
4✔
1272
            var['attrs'] = data_vars[vn]['attrs']
4✔
1273
            out_2d[vn] = var
4✔
1274

1275
        # 1D Variables
1276
        out_1d = dict()
4✔
1277
        for vn, attrs in [('water_level', {'description': 'Calving water level',
4✔
1278
                                           'units': 'm'}),
1279
                          ('glen_a', {'description': 'Simulation Glen A',
1280
                                      'units': ''}),
1281
                          ('fs', {'description': 'Simulation sliding parameter',
1282
                                  'units': ''}),
1283
                          ]:
1284
            var = dict()
4✔
1285
            var['data'] = np.full(len(rgi_ids), np.nan)
4✔
1286
            var['attrs'] = attrs
4✔
1287
            out_1d[vn] = var
4✔
1288

1289
        # Maybe 3D?
1290
        out_3d = dict()
4✔
1291
        if contains_3d_data:
4✔
1292
            # We have some 3d vars
1293
            month_2d = ds_diag[name_2d_dim]
2✔
1294
            ds.coords[name_2d_dim] = (name_2d_dim, month_2d.data)
2✔
1295
            cn = f'calendar_{name_2d_dim}'
2✔
1296
            ds.coords[cn] = (name_2d_dim, ds_diag[cn].values)
2✔
1297

1298
            shape = (len(time), len(month_2d), len(rgi_ids))
2✔
1299
            for vn in data_vars:
2✔
1300
                if name_2d_dim not in data_vars[vn]['dims']:
2✔
1301
                    continue
2✔
1302
                var = dict()
2✔
1303
                var['data'] = np.full(shape, np.nan)
2✔
1304
                var['attrs'] = data_vars[vn]['attrs']
2✔
1305
                out_3d[vn] = var
2✔
1306

1307
    # Read out
1308
    for i, gdir in enumerate(gdirs):
4✔
1309
        try:
4✔
1310
            ppath = gdir.get_filepath('model_diagnostics',
4✔
1311
                                      filesuffix=input_filesuffix)
1312
            with ncDataset(ppath) as ds_diag:
4✔
1313
                it = ds_diag.variables['time'][:]
4✔
1314
                a = np.nonzero(time == it[0])[0][0]
4✔
1315
                b = np.nonzero(time == it[-1])[0][0] + 1
4✔
1316
                for vn, var in out_2d.items():
4✔
1317
                    # try statement if some data variables not in all files
1318
                    try:
4✔
1319
                        var['data'][a:b, i] = ds_diag.variables[vn][:]
4✔
1320
                    except KeyError:
1✔
1321
                        pass
1✔
1322
                for vn, var in out_3d.items():
4✔
1323
                    # try statement if some data variables not in all files
1324
                    try:
2✔
1325
                        var['data'][a:b, :, i] = ds_diag.variables[vn][:]
2✔
1326
                    except KeyError:
1✔
1327
                        pass
1✔
1328
                for vn, var in out_1d.items():
4✔
1329
                    var['data'][i] = ds_diag.getncattr(vn)
4✔
1330
        except FileNotFoundError:
×
1331
            pass
×
1332

1333
    # To xarray
1334
    for vn, var in out_2d.items():
4✔
1335
        # Backwards compatibility - to remove one day...
1336
        for r in ['_m3', '_m2', '_myr', '_m']:
4✔
1337
            # Order matters
1338
            vn = regexp.sub(r + '$', '', vn)
4✔
1339
        ds[vn] = (('time', 'rgi_id'), var['data'])
4✔
1340
        ds[vn].attrs = var['attrs']
4✔
1341
    for vn, var in out_3d.items():
4✔
1342
        ds[vn] = (('time', name_2d_dim, 'rgi_id'), var['data'])
2✔
1343
        ds[vn].attrs = var['attrs']
2✔
1344
    for vn, var in out_1d.items():
4✔
1345
        ds[vn] = (('rgi_id', ), var['data'])
4✔
1346
        ds[vn].attrs = var['attrs']
4✔
1347

1348
    # To file?
1349
    if path:
4✔
1350
        enc_var = {'dtype': 'float32'}
4✔
1351
        if use_compression:
4✔
1352
            enc_var['complevel'] = 5
4✔
1353
            enc_var['zlib'] = True
4✔
1354
        encoding = {v: enc_var for v in ds.data_vars}
4✔
1355
        ds.to_netcdf(path, encoding=encoding)
4✔
1356

1357
    return ds
4✔
1358

1359

1360
@global_task(log)
9✔
1361
@compile_to_netcdf(log)
9✔
1362
def compile_climate_input(gdirs, path=True, filename='climate_historical',
9✔
1363
                          input_filesuffix='', use_compression=True):
1364
    """Merge the climate input files in the glacier directories into one file.
1365

1366
    Parameters
1367
    ----------
1368
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1369
        the glacier directories to process
1370
    path : str
1371
        where to store (default is on the working dir).
1372
        Set to `False` to disable disk storage.
1373
    filename : str
1374
        BASENAME of the climate input files
1375
    input_filesuffix : str
1376
        the filesuffix of the files to be compiled
1377
    use_compression : bool
1378
        use zlib compression on the output netCDF files
1379

1380
    Returns
1381
    -------
1382
    ds : :py:class:`xarray.Dataset`
1383
        compiled climate data
1384
    """
1385

1386
    # Get the dimensions of all this
1387
    rgi_ids = [gd.rgi_id for gd in gdirs]
1✔
1388

1389
    # The first gdir might have blown up, try some others
1390
    i = 0
1✔
1391
    while True:
1✔
1392
        if i >= len(gdirs):
1✔
1393
            raise RuntimeError('Found no valid glaciers!')
×
1394
        try:
1✔
1395
            pgdir = gdirs[i]
1✔
1396
            ppath = pgdir.get_filepath(filename=filename,
1✔
1397
                                       filesuffix=input_filesuffix)
1398
            with xr.open_dataset(ppath) as ds_clim:
1✔
1399
                ds_clim.time.values
1✔
1400
            # If this worked, we have a valid gdir
1401
            break
1✔
1402
        except BaseException:
×
1403
            i += 1
×
1404

1405
    with xr.open_dataset(ppath) as ds_clim:
1✔
1406
        cyrs = ds_clim['time.year']
1✔
1407
        cmonths = ds_clim['time.month']
1✔
1408
        sm = cfg.PARAMS['hydro_month_' + pgdir.hemisphere]
1✔
1409
        hyrs, hmonths = calendardate_to_hydrodate(cyrs, cmonths, start_month=sm)
1✔
1410
        time = date_to_floatyear(cyrs, cmonths)
1✔
1411

1412
    # Prepare output
1413
    ds = xr.Dataset()
1✔
1414

1415
    # Global attributes
1416
    ds.attrs['description'] = 'OGGM model output'
1✔
1417
    ds.attrs['oggm_version'] = __version__
1✔
1418
    ds.attrs['calendar'] = '365-day no leap'
1✔
1419
    ds.attrs['creation_date'] = strftime("%Y-%m-%d %H:%M:%S", gmtime())
1✔
1420

1421
    # Coordinates
1422
    ds.coords['time'] = ('time', time)
1✔
1423
    ds.coords['rgi_id'] = ('rgi_id', rgi_ids)
1✔
1424
    ds.coords['calendar_year'] = ('time', cyrs.data)
1✔
1425
    ds.coords['calendar_month'] = ('time', cmonths.data)
1✔
1426
    ds.coords['hydro_year'] = ('time', hyrs)
1✔
1427
    ds.coords['hydro_month'] = ('time', hmonths)
1✔
1428
    ds['time'].attrs['description'] = 'Floating year'
1✔
1429
    ds['rgi_id'].attrs['description'] = 'RGI glacier identifier'
1✔
1430
    ds['hydro_year'].attrs['description'] = 'Hydrological year'
1✔
1431
    ds['hydro_month'].attrs['description'] = 'Hydrological month'
1✔
1432
    ds['calendar_year'].attrs['description'] = 'Calendar year'
1✔
1433
    ds['calendar_month'].attrs['description'] = 'Calendar month'
1✔
1434

1435
    shape = (len(time), len(rgi_ids))
1✔
1436
    temp = np.zeros(shape) * np.nan
1✔
1437
    prcp = np.zeros(shape) * np.nan
1✔
1438

1439
    ref_hgt = np.zeros(len(rgi_ids)) * np.nan
1✔
1440
    ref_pix_lon = np.zeros(len(rgi_ids)) * np.nan
1✔
1441
    ref_pix_lat = np.zeros(len(rgi_ids)) * np.nan
1✔
1442

1443
    for i, gdir in enumerate(gdirs):
1✔
1444
        try:
1✔
1445
            ppath = gdir.get_filepath(filename=filename,
1✔
1446
                                      filesuffix=input_filesuffix)
1447
            with xr.open_dataset(ppath) as ds_clim:
1✔
1448
                prcp[:, i] = ds_clim.prcp.values
1✔
1449
                temp[:, i] = ds_clim.temp.values
1✔
1450
                ref_hgt[i] = ds_clim.ref_hgt
1✔
1451
                ref_pix_lon[i] = ds_clim.ref_pix_lon
1✔
1452
                ref_pix_lat[i] = ds_clim.ref_pix_lat
1✔
1453
        except BaseException:
×
1454
            pass
×
1455

1456
    ds['temp'] = (('time', 'rgi_id'), temp)
1✔
1457
    ds['temp'].attrs['units'] = 'DegC'
1✔
1458
    ds['temp'].attrs['description'] = '2m Temperature at height ref_hgt'
1✔
1459
    ds['prcp'] = (('time', 'rgi_id'), prcp)
1✔
1460
    ds['prcp'].attrs['units'] = 'kg m-2'
1✔
1461
    ds['prcp'].attrs['description'] = 'total monthly precipitation amount'
1✔
1462
    ds['ref_hgt'] = ('rgi_id', ref_hgt)
1✔
1463
    ds['ref_hgt'].attrs['units'] = 'm'
1✔
1464
    ds['ref_hgt'].attrs['description'] = 'reference height'
1✔
1465
    ds['ref_pix_lon'] = ('rgi_id', ref_pix_lon)
1✔
1466
    ds['ref_pix_lon'].attrs['description'] = 'longitude'
1✔
1467
    ds['ref_pix_lat'] = ('rgi_id', ref_pix_lat)
1✔
1468
    ds['ref_pix_lat'].attrs['description'] = 'latitude'
1✔
1469

1470
    if path:
1✔
1471
        enc_var = {'dtype': 'float32'}
1✔
1472
        if use_compression:
1✔
1473
            enc_var['complevel'] = 5
1✔
1474
            enc_var['zlib'] = True
1✔
1475
        vars = ['temp', 'prcp']
1✔
1476
        encoding = {v: enc_var for v in vars}
1✔
1477
        ds.to_netcdf(path, encoding=encoding)
1✔
1478
    return ds
1✔
1479

1480

1481
@global_task(log)
9✔
1482
def compile_task_log(gdirs, task_names=[], filesuffix='', path=True,
9✔
1483
                     append=True):
1484
    """Gathers the log output for the selected task(s)
1485

1486
    Parameters
1487
    ----------
1488
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1489
        the glacier directories to process
1490
    task_names : list of str
1491
        The tasks to check for
1492
    filesuffix : str
1493
        add suffix to output file
1494
    path:
1495
        Set to `True` in order  to store the info in the working directory
1496
        Set to a path to store the file to your chosen location
1497
        Set to `False` to omit disk storage
1498
    append:
1499
        If a task log file already exists in the working directory, the new
1500
        logs will be added to the existing file
1501

1502
    Returns
1503
    -------
1504
    out : :py:class:`pandas.DataFrame`
1505
        log output
1506
    """
1507

1508
    out_df = []
3✔
1509
    for gdir in gdirs:
3✔
1510
        d = OrderedDict()
3✔
1511
        d['rgi_id'] = gdir.rgi_id
3✔
1512
        for task_name in task_names:
3✔
1513
            ts = gdir.get_task_status(task_name)
3✔
1514
            if ts is None:
3✔
1515
                ts = ''
1✔
1516
            d[task_name] = ts.replace(',', ' ')
3✔
1517
        out_df.append(d)
3✔
1518

1519
    out = pd.DataFrame(out_df).set_index('rgi_id')
3✔
1520
    if path:
3✔
1521
        if path is True:
3✔
1522
            path = os.path.join(cfg.PATHS['working_dir'],
3✔
1523
                                'task_log' + filesuffix + '.csv')
1524
        if os.path.exists(path) and append:
3✔
1525
            odf = pd.read_csv(path, index_col=0)
1✔
1526
            out = odf.join(out, rsuffix='_n')
1✔
1527
        out.to_csv(path)
3✔
1528
    return out
3✔
1529

1530

1531
@global_task(log)
9✔
1532
def compile_task_time(gdirs, task_names=[], filesuffix='', path=True,
9✔
1533
                      append=True):
1534
    """Gathers the time needed for the selected task(s) to run
1535

1536
    Parameters
1537
    ----------
1538
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1539
        the glacier directories to process
1540
    task_names : list of str
1541
        The tasks to check for
1542
    filesuffix : str
1543
        add suffix to output file
1544
    path:
1545
        Set to `True` in order  to store the info in the working directory
1546
        Set to a path to store the file to your chosen location
1547
        Set to `False` to omit disk storage
1548
    append:
1549
        If a task log file already exists in the working directory, the new
1550
        logs will be added to the existing file
1551

1552
    Returns
1553
    -------
1554
    out : :py:class:`pandas.DataFrame`
1555
        log output
1556
    """
1557

1558
    out_df = []
1✔
1559
    for gdir in gdirs:
1✔
1560
        d = OrderedDict()
1✔
1561
        d['rgi_id'] = gdir.rgi_id
1✔
1562
        for task_name in task_names:
1✔
1563
            d[task_name] = gdir.get_task_time(task_name)
1✔
1564
        out_df.append(d)
1✔
1565

1566
    out = pd.DataFrame(out_df).set_index('rgi_id')
1✔
1567
    if path:
1✔
1568
        if path is True:
1✔
1569
            path = os.path.join(cfg.PATHS['working_dir'],
1✔
1570
                                'task_time' + filesuffix + '.csv')
1571
        if os.path.exists(path) and append:
1✔
1572
            odf = pd.read_csv(path, index_col=0)
1✔
1573
            out = odf.join(out, rsuffix='_n')
1✔
1574
        out.to_csv(path)
1✔
1575
    return out
1✔
1576

1577

1578
@entity_task(log)
9✔
1579
def glacier_statistics(gdir, settings_filesuffix='',
9✔
1580
                       inversion_only=False, apply_func=None):
1581
    """Gather as much statistics as possible about this glacier.
1582

1583
    It can be used to do result diagnostics and other stuffs. If the data
1584
    necessary for a statistic is not available (e.g.: flowlines length) it
1585
    will simply be ignored.
1586

1587
    Parameters
1588
    ----------
1589
    inversion_only : bool
1590
        if one wants to summarize the inversion output only (including calving)
1591
    apply_func : function
1592
        if one wants to summarize further information about a glacier, set
1593
        this kwarg to a function that accepts a glacier directory as first
1594
        positional argument, and the directory to fill in with data as
1595
        second argument. The directory should only store scalar values (strings,
1596
        float, int)
1597
    """
1598

1599
    d = OrderedDict()
6✔
1600

1601
    # Easy stats - this should always be possible
1602
    d['rgi_id'] = gdir.rgi_id
6✔
1603
    d['rgi_region'] = gdir.rgi_region
6✔
1604
    d['rgi_subregion'] = gdir.rgi_subregion
6✔
1605
    d['name'] = gdir.name
6✔
1606
    d['cenlon'] = gdir.cenlon
6✔
1607
    d['cenlat'] = gdir.cenlat
6✔
1608
    d['rgi_area_km2'] = gdir.rgi_area_km2
6✔
1609
    d['rgi_year'] = gdir.rgi_date
6✔
1610
    d['glacier_type'] = gdir.glacier_type
6✔
1611
    d['terminus_type'] = gdir.terminus_type
6✔
1612
    d['is_tidewater'] = gdir.is_tidewater
6✔
1613
    d['status'] = gdir.status
6✔
1614

1615
    # The rest is less certain. We put these in a try block and see
1616
    # We're good with any error - we store the dict anyway below
1617
    # TODO: should be done with more preselected errors
1618
    with warnings.catch_warnings():
6✔
1619
        warnings.filterwarnings("ignore", category=RuntimeWarning)
6✔
1620

1621
        try:
6✔
1622
            # Grid stuff
1623
            d['grid_dx'] = gdir.grid.dx
6✔
1624
            d['grid_nx'] = gdir.grid.nx
6✔
1625
            d['grid_ny'] = gdir.grid.ny
6✔
1626
        except BaseException:
1✔
1627
            pass
1✔
1628

1629
        try:
6✔
1630
            # Geom stuff
1631
            outline = gdir.read_shapefile('outlines')
6✔
1632
            d['geometry_type'] = outline.type.iloc[0]
6✔
1633
            d['geometry_is_valid'] = outline.is_valid.iloc[0]
6✔
1634
            d['geometry_area_km2'] = outline.to_crs({'proj': 'cea'}).area.iloc[0] * 1e-6
6✔
1635
        except BaseException:
×
1636
            pass
×
1637

1638
        try:
6✔
1639
            # Inversion
1640
            if gdir.has_file('inversion_output'):
6✔
1641
                vol = []
5✔
1642
                vol_bsl = []
5✔
1643
                vol_bwl = []
5✔
1644
                cl = gdir.read_pickle('inversion_output')
5✔
1645
                for c in cl:
5✔
1646
                    vol.extend(c['volume'])
5✔
1647
                    vol_bsl.extend(c.get('volume_bsl', [0]))
5✔
1648
                    vol_bwl.extend(c.get('volume_bwl', [0]))
5✔
1649
                d['inv_volume_km3'] = np.nansum(vol) * 1e-9
5✔
1650
                area = gdir.rgi_area_km2
5✔
1651
                d['vas_volume_km3'] = 0.034 * (area ** 1.375)
5✔
1652
                # BSL / BWL
1653
                d['inv_volume_bsl_km3'] = np.nansum(vol_bsl) * 1e-9
5✔
1654
                d['inv_volume_bwl_km3'] = np.nansum(vol_bwl) * 1e-9
5✔
1655
        except BaseException:
×
1656
            pass
×
1657

1658
        try:
6✔
1659
            # Diagnostics
1660
            diags = gdir.get_diagnostics()
6✔
1661
            for k, v in diags.items():
6✔
1662
                d[k] = v
6✔
1663
        except BaseException:
×
1664
            pass
×
1665

1666
        if inversion_only:
6✔
1667
            return d
×
1668

1669
        try:
6✔
1670
            # Error log
1671
            errlog = gdir.get_error_log()
6✔
1672
            if errlog is not None:
6✔
1673
                d['error_task'] = errlog.split(';')[-2]
3✔
1674
                d['error_msg'] = errlog.split(';')[-1]
3✔
1675
            else:
1676
                d['error_task'] = None
6✔
1677
                d['error_msg'] = None
6✔
1678
        except BaseException:
×
1679
            pass
×
1680

1681
        try:
6✔
1682
            # Masks related stuff
1683
            fpath = gdir.get_filepath('gridded_data')
6✔
1684
            with ncDataset(fpath) as nc:
6✔
1685
                mask = nc.variables['glacier_mask'][:] == 1
6✔
1686
                topo = nc.variables['topo'][:][mask]
6✔
1687
            d['dem_mean_elev'] = np.mean(topo)
6✔
1688
            d['dem_med_elev'] = np.median(topo)
6✔
1689
            d['dem_min_elev'] = np.min(topo)
6✔
1690
            d['dem_max_elev'] = np.max(topo)
6✔
1691
        except BaseException:
2✔
1692
            pass
2✔
1693

1694
        try:
6✔
1695
            # Ext related stuff
1696
            fpath = gdir.get_filepath('gridded_data')
6✔
1697
            with ncDataset(fpath) as nc:
6✔
1698
                ext = nc.variables['glacier_ext'][:] == 1
6✔
1699
                mask = nc.variables['glacier_mask'][:] == 1
6✔
1700
                topo = nc.variables['topo'][:]
6✔
1701
            d['dem_max_elev_on_ext'] = np.max(topo[ext])
6✔
1702
            d['dem_min_elev_on_ext'] = np.min(topo[ext])
6✔
1703
            a = np.sum(mask & (topo > d['dem_max_elev_on_ext']))
6✔
1704
            d['dem_perc_area_above_max_elev_on_ext'] = a / np.sum(mask)
6✔
1705
            # Terminus loc
1706
            j, i = np.nonzero((topo[ext].min() == topo) & ext)
6✔
1707
            lon, lat = gdir.grid.ij_to_crs(i[0], j[0], crs=salem.wgs84)
6✔
1708
            d['terminus_lon'] = lon
6✔
1709
            d['terminus_lat'] = lat
6✔
1710
        except BaseException:
2✔
1711
            pass
2✔
1712

1713
        try:
6✔
1714
            # Centerlines
1715
            cls = gdir.read_pickle('centerlines')
6✔
1716
            longest = 0.
6✔
1717
            for cl in cls:
6✔
1718
                longest = np.max([longest, cl.dis_on_line[-1]])
6✔
1719
            d['n_centerlines'] = len(cls)
6✔
1720
            d['longest_centerline_km'] = longest * gdir.grid.dx / 1000.
6✔
1721
        except BaseException:
2✔
1722
            pass
2✔
1723

1724
        try:
6✔
1725
            # Flowline related stuff
1726
            h = np.array([])
6✔
1727
            widths = np.array([])
6✔
1728
            slope = np.array([])
6✔
1729
            fls = gdir.read_pickle('inversion_flowlines')
6✔
1730
            dx = fls[0].dx * gdir.grid.dx
6✔
1731
            for fl in fls:
6✔
1732
                hgt = fl.surface_h
6✔
1733
                h = np.append(h, hgt)
6✔
1734
                widths = np.append(widths, fl.widths * gdir.grid.dx)
6✔
1735
                slope = np.append(slope, np.arctan(-np.gradient(hgt, dx)))
5✔
1736
                length = len(hgt) * dx
5✔
1737
            d['main_flowline_length'] = length
5✔
1738
            d['inv_flowline_glacier_area'] = np.sum(widths * dx)
5✔
1739
            d['flowline_mean_elev'] = np.average(h, weights=widths)
5✔
1740
            d['flowline_max_elev'] = np.max(h)
5✔
1741
            d['flowline_min_elev'] = np.min(h)
5✔
1742
            d['flowline_avg_slope'] = np.mean(slope)
5✔
1743
            d['flowline_avg_width'] = np.mean(widths)
5✔
1744
            d['flowline_last_width'] = fls[-1].widths[-1] * gdir.grid.dx
5✔
1745
            d['flowline_last_5_widths'] = np.mean(fls[-1].widths[-5:] *
5✔
1746
                                                  gdir.grid.dx)
1747
        except BaseException:
2✔
1748
            pass
2✔
1749

1750
        try:
6✔
1751
            # climate
1752
            info = gdir.get_climate_info()
6✔
1753
            for k, v in info.items():
6✔
1754
                d[k] = v
6✔
1755
        except BaseException:
×
1756
            pass
×
1757

1758
        try:
6✔
1759
            # MB calib
1760
            for k in ['rgi_id', 'bias', 'melt_f', 'prcp_fac', 'temp_bias',
6✔
1761
                      'reference_mb', 'reference_mb_err', 'reference_period',
1762
                      'mb_global_params', 'baseline_climate_source']:
1763
                v = gdir.settings[k]
6✔
1764
                if np.isscalar(v):
5✔
1765
                    d[k] = v
5✔
1766
                else:
1767
                    for k2, v2 in v.items():
5✔
1768
                        d[k2] = v2
4✔
1769
        except BaseException:
3✔
1770
            pass
3✔
1771

1772
        if apply_func:
6✔
1773
            # User defined statistics
1774
            try:
3✔
1775
                apply_func(gdir, d)
3✔
1776
            except BaseException:
×
1777
                pass
×
1778

1779
    return d
6✔
1780

1781

1782
@global_task(log)
9✔
1783
def compile_glacier_statistics(gdirs, filesuffix='', path=True,
9✔
1784
                               inversion_only=False, apply_func=None):
1785
    """Gather as much statistics as possible about a list of glaciers.
1786

1787
    It can be used to do result diagnostics and other stuffs. If the data
1788
    necessary for a statistic is not available (e.g.: flowlines length) it
1789
    will simply be ignored.
1790

1791
    Parameters
1792
    ----------
1793
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1794
        the glacier directories to process
1795
    filesuffix : str
1796
        add suffix to output file
1797
    path : str, bool
1798
        Set to "True" in order  to store the info in the working directory
1799
        Set to a path to store the file to your chosen location
1800
    inversion_only : bool
1801
        if one wants to summarize the inversion output only (including calving)
1802
    apply_func : function
1803
        if one wants to summarize further information about a glacier, set
1804
        this kwarg to a function that accepts a glacier directory as first
1805
        positional argument, and the directory to fill in with data as
1806
        second argument. The directory should only store scalar values (strings,
1807
        float, int).
1808
        !Careful! For multiprocessing, the function cannot be located at the
1809
        top level, i.e. you may need to import it from a module for this to work,
1810
        or from a dummy class (https://stackoverflow.com/questions/8804830)
1811
    """
1812
    from oggm.workflow import execute_entity_task
6✔
1813

1814
    out_df = execute_entity_task(glacier_statistics, gdirs,
6✔
1815
                                 apply_func=apply_func,
1816
                                 inversion_only=inversion_only)
1817

1818
    out = pd.DataFrame(out_df).set_index('rgi_id')
6✔
1819

1820
    if path:
6✔
1821
        if path is True:
6✔
1822
            out.to_csv(os.path.join(cfg.PATHS['working_dir'],
5✔
1823
                                    ('glacier_statistics' +
1824
                                     filesuffix + '.csv')))
1825
        else:
1826
            out.to_csv(path)
2✔
1827
    return out
6✔
1828

1829

1830
@entity_task(log)
9✔
1831
def _write_fl_diagnostics(gdir, input_filesuffix='', folder_map=None):
9✔
1832
    """Copy the flowline diagnostics file to the folder in folder_map.
1833
    """
1834
    try:
1✔
1835
        src_fp = gdir.get_filepath('fl_diagnostics', filesuffix=input_filesuffix)
1✔
1836
        dst_folder = folder_map[gdir.rgi_id]
1✔
1837
        original_name = os.path.basename(src_fp)
1✔
1838
        dst_filename = f"{gdir.rgi_id}_{original_name}"
1✔
1839
        dst_fp = os.path.join(dst_folder, dst_filename)
1✔
1840
        shutil.copy(src_fp, dst_fp)
1✔
1841
    except:
×
1842
        pass
×
1843

1844

1845
@global_task(log)
9✔
1846
def compile_fl_diagnostics(gdirs, *,
9✔
1847
                           path=True,
1848
                           group_size=1000,
1849
                           input_filesuffix='',
1850
                           compress=True,
1851
                           delete_folders=False):
1852
    """Write the flowline diagnostics to batches of tar files.
1853

1854
    This is mostly useful for people not using OGGM.
1855

1856
    Parameters
1857
    ----------
1858
    gdirs:
1859
        the list of GlacierDir to process.
1860
    path: str or bool
1861
        Set to "True" in order to store the files in the working directory
1862
        Set to a str path to store the files to your chosen location
1863
        Must be a path to a dir
1864
    group_size : int
1865
        The number of glaciers per tarfile
1866
    input_filesuffix : str
1867
        the input filesuffix to use for the fl_diagnostics files
1868
        (e.g. '_historical')
1869
    compress : bool
1870
        also compress the files in a tar file
1871
    delete_folders : bool
1872
        also deletes the tared folders
1873
    """
1874
    from oggm.workflow import execute_entity_task
1✔
1875

1876
    if path is True:
1✔
1877
        path = os.path.join(cfg.PATHS['working_dir'],
×
1878
                            'fl_diagnostics' + input_filesuffix)
1879

1880
    # Assign each glacier to a batch folder based on its index
1881
    mkdir(path)
1✔
1882
    folder_map = {}
1✔
1883
    for idx, gdir in enumerate(gdirs):
1✔
1884
        batch_idx = (idx // group_size) * group_size
1✔
1885
        batch_name = f"RGI{gdir.rgi_version}-{gdir.rgi_region}."
1✔
1886
        batch_name += f'{batch_idx:05d}'[:2]
1✔
1887
        batch_dir = os.path.join(path, batch_name)
1✔
1888
        mkdir(batch_dir)
1✔
1889
        folder_map[gdir.rgi_id] = batch_dir
1✔
1890

1891
    log.workflow('compile_fl_diagnostics to {} ...'.format(path))
1✔
1892

1893
    execute_entity_task(_write_fl_diagnostics, gdirs,
1✔
1894
                        input_filesuffix=input_filesuffix,
1895
                        folder_map=folder_map)
1896

1897
    if compress:
1✔
1898
        # Get unique batch folders
1899
        batch_folders = set(folder_map.values())
1✔
1900
        for batch_folder in sorted(batch_folders):
1✔
1901
            batch_name = os.path.basename(batch_folder)
1✔
1902
            tar_path = os.path.join(path, f"{batch_name}.tar.gz")
1✔
1903
            with tarfile.open(tar_path, "w:gz") as tar:
1✔
1904
                tar.add(batch_folder, arcname=batch_name)
1✔
1905

1906
            if delete_folders:
1✔
1907
                shutil.rmtree(batch_folder)
×
1908

1909

1910
@entity_task(log)
9✔
1911
def read_glacier_hypsometry(gdir):
9✔
1912
    """Utility function to read the glacier hypsometry in the folder.
1913

1914
    Parameters
1915
    ----------
1916
    gdir :  :py:class:`oggm.GlacierDirectory` object
1917
        the glacier directory to process
1918

1919
    Returns
1920
    -------
1921
    the dataframe
1922
    """
1923
    try:
×
1924
        out = pd.read_csv(gdir.get_filepath('hypsometry')).iloc[0]
×
1925
    except:
×
1926
        out = pd.Series({'rgi_id': gdir.rgi_id})
×
1927
    return out
×
1928

1929

1930
@global_task(log)
9✔
1931
def compile_glacier_hypsometry(gdirs, filesuffix='', path=True,
9✔
1932
                               add_column=None):
1933
    """Gather as much statistics as possible about a list of glaciers.
1934

1935
    It can be used to do result diagnostics and other stuffs. If the data
1936
    necessary for a statistic is not available (e.g.: flowlines length) it
1937
    will simply be ignored.
1938

1939
    Parameters
1940
    ----------
1941
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1942
        the glacier directories to process
1943
    filesuffix : str
1944
        add suffix to output file
1945
    path : str, bool
1946
        Set to "True" in order  to store the info in the working directory
1947
        Set to a path to store the file to your chosen location
1948
    add_column : tuple
1949
        if you feel like adding a key - value pair to the compiled dataframe
1950
    """
1951
    from oggm.workflow import execute_entity_task
×
1952

1953
    out_df = execute_entity_task(read_glacier_hypsometry, gdirs)
×
1954

1955
    out = pd.DataFrame(out_df).set_index('rgi_id')
×
1956
    if add_column is not None:
×
1957
        out[add_column[0]] = add_column[1]
×
1958
    if path:
×
1959
        if path is True:
×
1960
            out.to_csv(os.path.join(cfg.PATHS['working_dir'],
×
1961
                                    ('glacier_hypsometry' +
1962
                                     filesuffix + '.csv')))
1963
        else:
1964
            out.to_csv(path)
×
1965
    return out
×
1966

1967

1968
@global_task(log)
9✔
1969
def compile_fixed_geometry_mass_balance(gdirs, settings_filesuffix='',
9✔
1970
                                        filesuffix='',
1971
                                        path=True, csv=False,
1972
                                        use_inversion_flowlines=True,
1973
                                        ys=None, ye=None, years=None,
1974
                                        climate_filename='climate_historical',
1975
                                        climate_input_filesuffix='',
1976
                                        temperature_bias=None,
1977
                                        precipitation_factor=None):
1978

1979
    """Compiles a table of specific mass balance timeseries for all glaciers.
1980

1981
    The file is stored in a hdf file (not csv) per default. Use pd.read_hdf
1982
    to open it.
1983

1984
    Parameters
1985
    ----------
1986
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
1987
        the glacier directories to process
1988
    settings_filesuffix: str
1989
        You can use a different set of settings by providing a filesuffix. This
1990
        is useful for sensitivity experiments.
1991
    filesuffix : str
1992
        add suffix to output file
1993
    path : str, bool
1994
        Set to "True" in order  to store the info in the working directory
1995
        Set to a path to store the file to your chosen location (file
1996
        extension matters)
1997
    csv : bool
1998
        Set to store the data in csv instead of hdf.
1999
    use_inversion_flowlines : bool
2000
        whether to use the inversion flowlines or the model flowlines
2001
    ys : int
2002
        start year of the model run (default: from the climate file)
2003
        date)
2004
    ye : int
2005
        end year of the model run (default: from the climate file)
2006
    years : array of ints
2007
        override ys and ye with the years of your choice
2008
    climate_filename : str
2009
        name of the climate file, e.g. 'climate_historical' (default) or
2010
        'gcm_data'
2011
    climate_input_filesuffix: str
2012
        filesuffix for the input climate file
2013
    temperature_bias : float
2014
        add a bias to the temperature timeseries
2015
    precipitation_factor: float
2016
        multiply a factor to the precipitation time series
2017
        default is None and means that the precipitation factor from the
2018
        calibration is applied which is cfg.PARAMS['prcp_fac']
2019
    """
2020

2021
    from oggm.workflow import execute_entity_task
3✔
2022
    from oggm.core.massbalance import fixed_geometry_mass_balance
3✔
2023

2024
    out_df = execute_entity_task(fixed_geometry_mass_balance, gdirs,
3✔
2025
                                 settings_filesuffix=settings_filesuffix,
2026
                                 use_inversion_flowlines=use_inversion_flowlines,
2027
                                 ys=ys, ye=ye, years=years, climate_filename=climate_filename,
2028
                                 climate_input_filesuffix=climate_input_filesuffix,
2029
                                 temperature_bias=temperature_bias,
2030
                                 precipitation_factor=precipitation_factor)
2031

2032
    for idx, s in enumerate(out_df):
3✔
2033
        if s is None:
3✔
2034
            out_df[idx] = pd.Series(np.nan)
×
2035

2036
    out = pd.concat(out_df, axis=1, keys=[gd.rgi_id for gd in gdirs])
3✔
2037
    out = out.dropna(axis=0, how='all')
3✔
2038

2039
    if path:
3✔
2040
        if path is True:
3✔
2041
            fpath = os.path.join(cfg.PATHS['working_dir'],
1✔
2042
                                 'fixed_geometry_mass_balance' + filesuffix)
2043
            if csv:
1✔
2044
                out.to_csv(fpath + '.csv')
×
2045
            else:
2046
                out.to_hdf(fpath + '.hdf', key='df')
1✔
2047
        else:
2048
            ext = os.path.splitext(path)[-1]
2✔
2049
            if ext.lower() == '.csv':
2✔
2050
                out.to_csv(path)
2✔
2051
            elif ext.lower() == '.hdf':
×
2052
                out.to_hdf(path, key='df')
×
2053
    return out
3✔
2054

2055

2056
@global_task(log)
9✔
2057
def compile_ela(gdirs, settings_filesuffix='', filesuffix='',
9✔
2058
                path=True, csv=False, ys=None, ye=None,
2059
                years=None, climate_filename='climate_historical', temperature_bias=None,
2060
                precipitation_factor=None, climate_input_filesuffix='',
2061
                mb_model_class=None):
2062
    """Compiles a table of ELA timeseries for all glaciers for a given years,
2063
    using the mb_model_class (default MonthlyTIModel).
2064

2065
    The file is stored in a hdf file (not csv) per default. Use pd.read_hdf
2066
    to open it.
2067

2068
    Parameters
2069
    ----------
2070
    gdirs : list of :py:class:`oggm.GlacierDirectory` objects
2071
        the glacier directories to process
2072
    settings_filesuffix: str
2073
        You can use a different set of settings by providing a filesuffix. This
2074
        is useful for sensitivity experiments.
2075
    filesuffix : str
2076
        add suffix to output file
2077
    path : str, bool
2078
        Set to "True" in order  to store the info in the working directory
2079
        Set to a path to store the file to your chosen location (file
2080
        extension matters)
2081
    csv: bool
2082
        Set to store the data in csv instead of hdf.
2083
    ys : int
2084
        start year
2085
    ye : int
2086
        end year
2087
    years : array of ints
2088
        override ys and ye with the years of your choice
2089
    climate_filename : str
2090
        name of the climate file, e.g. 'climate_historical' (default) or
2091
        'gcm_data'
2092
    climate_input_filesuffix : str
2093
        filesuffix for the input climate file
2094
    temperature_bias : float
2095
        add a bias to the temperature timeseries
2096
    precipitation_factor: float
2097
        multiply a factor to the precipitation time series
2098
        default is None and means that the precipitation factor from the
2099
        calibration is applied which is cfg.PARAMS['prcp_fac']
2100
    mb_model_class : MassBalanceModel class
2101
        the MassBalanceModel class to use, default is MonthlyTIModel
2102
    """
2103
    from oggm.workflow import execute_entity_task
1✔
2104
    from oggm.core.massbalance import compute_ela, MonthlyTIModel
1✔
2105

2106
    if mb_model_class is None:
1✔
2107
        mb_model_class = MonthlyTIModel
1✔
2108

2109
    out_df = execute_entity_task(compute_ela, gdirs,
1✔
2110
                                 settings_filesuffix=settings_filesuffix,
2111
                                 ys=ys, ye=ye, years=years,
2112
                                 climate_filename=climate_filename,
2113
                                 climate_input_filesuffix=climate_input_filesuffix,
2114
                                 temperature_bias=temperature_bias,
2115
                                 precipitation_factor=precipitation_factor,
2116
                                 mb_model_class=mb_model_class)
2117

2118
    for idx, s in enumerate(out_df):
1✔
2119
        if s is None:
1✔
2120
            out_df[idx] = pd.Series(np.nan)
×
2121

2122
    out = pd.concat(out_df, axis=1, keys=[gd.rgi_id for gd in gdirs])
1✔
2123
    out = out.dropna(axis=0, how='all')
1✔
2124

2125
    if path:
1✔
2126
        if path is True:
1✔
2127
            fpath = os.path.join(cfg.PATHS['working_dir'],
1✔
2128
                                 'ELA' + filesuffix)
2129
            if csv:
1✔
2130
                out.to_csv(fpath + '.csv')
1✔
2131
            else:
2132
                out.to_hdf(fpath + '.hdf', key='df')
1✔
2133
        else:
2134
            ext = os.path.splitext(path)[-1]
×
2135
            if ext.lower() == '.csv':
×
2136
                out.to_csv(path)
×
2137
            elif ext.lower() == '.hdf':
×
2138
                out.to_hdf(path, key='df')
×
2139
    return out
1✔
2140

2141

2142
@entity_task(log)
9✔
2143
def climate_statistics(gdir, add_climate_period=1995, halfsize=15,
9✔
2144
                       input_filesuffix=''):
2145
    """Gather as much statistics as possible about this glacier.
2146

2147
    It can be used to do result diagnostics and other stuffs. If the data
2148
    necessary for a statistic is not available (e.g.: flowlines length) it
2149
    will simply be ignored.
2150

2151
    Important note: the climate is extracted from the mass-balance model and
2152
    is therefore "corrected" according to the mass-balance calibration scheme
2153
    (e.g. the precipitation factor and the temp bias correction). For more
2154
    flexible information about the raw climate data, use `compile_climate_input`
2155
    or `raw_climate_statistics`.
2156

2157
    Parameters
2158
    ----------
2159
    add_climate_period : int or list of ints
2160
        compile climate statistics for the halfsize*2 + 1 yrs period
2161
        around the selected date.
2162
    halfsize : int
2163
        the half size of the window
2164
    """
2165
    from oggm.core.massbalance import (ConstantMassBalance,
3✔
2166
                                       MultipleFlowlineMassBalance)
2167

2168
    d = OrderedDict()
3✔
2169

2170
    # Easy stats - this should always be possible
2171
    d['rgi_id'] = gdir.rgi_id
3✔
2172
    d['rgi_region'] = gdir.rgi_region
3✔
2173
    d['rgi_subregion'] = gdir.rgi_subregion
3✔
2174
    d['name'] = gdir.name
3✔
2175
    d['cenlon'] = gdir.cenlon
3✔
2176
    d['cenlat'] = gdir.cenlat
3✔
2177
    d['rgi_area_km2'] = gdir.rgi_area_km2
3✔
2178
    d['glacier_type'] = gdir.glacier_type
3✔
2179
    d['terminus_type'] = gdir.terminus_type
3✔
2180
    d['status'] = gdir.status
3✔
2181

2182
    # The rest is less certain
2183
    with warnings.catch_warnings():
3✔
2184
        warnings.filterwarnings("ignore", category=RuntimeWarning)
3✔
2185
        try:
3✔
2186
            # Flowline related stuff
2187
            h = np.array([])
3✔
2188
            widths = np.array([])
3✔
2189
            fls = gdir.read_pickle('inversion_flowlines')
3✔
2190
            dx = fls[0].dx * gdir.grid.dx
3✔
2191
            for fl in fls:
3✔
2192
                hgt = fl.surface_h
3✔
2193
                h = np.append(h, hgt)
3✔
2194
                widths = np.append(widths, fl.widths * dx)
3✔
2195
            d['flowline_mean_elev'] = np.average(h, weights=widths)
3✔
2196
            d['flowline_max_elev'] = np.max(h)
3✔
2197
            d['flowline_min_elev'] = np.min(h)
3✔
2198
        except BaseException:
1✔
2199
            pass
1✔
2200

2201
        # Climate and MB at specified dates
2202
        add_climate_period = tolist(add_climate_period)
3✔
2203
        for y0 in add_climate_period:
3✔
2204
            try:
3✔
2205
                fs = '{}-{}'.format(y0 - halfsize, y0 + halfsize)
3✔
2206
                mbcl = ConstantMassBalance
3✔
2207
                mbmod = MultipleFlowlineMassBalance(gdir, mb_model_class=mbcl,
3✔
2208
                                                    y0=y0, halfsize=halfsize,
2209
                                                    use_inversion_flowlines=True,
2210
                                                    input_filesuffix=input_filesuffix)
2211
                h, w, mbh = mbmod.get_annual_mb_on_flowlines()
3✔
2212
                mbh = mbh * cfg.SEC_IN_YEAR * cfg.PARAMS['ice_density']
3✔
2213
                pacc = np.where(mbh >= 0)
3✔
2214
                pab = np.where(mbh < 0)
3✔
2215
                d[fs + '_aar'] = np.sum(w[pacc]) / np.sum(w)
3✔
2216
                try:
3✔
2217
                    # Try to get the slope
2218
                    mb_slope, _, _, _, _ = stats.linregress(h[pab], mbh[pab])
3✔
2219
                    d[fs + '_mb_grad'] = mb_slope
3✔
2220
                except BaseException:
×
2221
                    # we don't mind if something goes wrong
2222
                    d[fs + '_mb_grad'] = np.nan
×
2223
                d[fs + '_ela_h'] = mbmod.get_ela()
3✔
2224
                # Climate
2225
                t, tm, p, ps = mbmod.flowline_mb_models[0].get_annual_climate(
3✔
2226
                    [d[fs + '_ela_h'],
2227
                     d['flowline_mean_elev'],
2228
                     d['flowline_max_elev'],
2229
                     d['flowline_min_elev']])
2230
                for n, v in zip(['temp', 'tempmelt', 'prcpsol'], [t, tm, ps]):
3✔
2231
                    d[fs + '_avg_' + n + '_ela_h'] = v[0]
3✔
2232
                    d[fs + '_avg_' + n + '_mean_elev'] = v[1]
3✔
2233
                    d[fs + '_avg_' + n + '_max_elev'] = v[2]
3✔
2234
                    d[fs + '_avg_' + n + '_min_elev'] = v[3]
3✔
2235
                d[fs + '_avg_prcp'] = p[0]
3✔
2236
            except BaseException:
1✔
2237
                pass
1✔
2238

2239
    return d
3✔
2240

2241

2242
@entity_task(log)
9✔
2243
def raw_climate_statistics(gdir, add_climate_period=1995, halfsize=15,
9✔
2244
                           input_filesuffix=''):
2245
    """Gather as much statistics as possible about this glacier.
2246

2247
    This is like "climate_statistics" but without relying on the
2248
    mass-balance model, i.e. closer to the actual data (uncorrected)
2249

2250
    Parameters
2251
    ----------
2252
    add_climate_period : int or list of ints
2253
        compile climate statistics for the 30 yrs period around the selected
2254
        date.
2255
    """
2256
    d = OrderedDict()
1✔
2257
    # Easy stats - this should always be possible
2258
    d['rgi_id'] = gdir.rgi_id
1✔
2259
    d['rgi_region'] = gdir.rgi_region
1✔
2260
    d['rgi_subregion'] = gdir.rgi_subregion
1✔
2261
    d['name'] = gdir.name
1✔
2262
    d['cenlon'] = gdir.cenlon
1✔
2263
    d['cenlat'] = gdir.cenlat
1✔
2264
    d['rgi_area_km2'] = gdir.rgi_area_km2
1✔
2265
    d['glacier_type'] = gdir.glacier_type
1✔
2266
    d['terminus_type'] = gdir.terminus_type
1✔
2267
    d['status'] = gdir.status
1✔
2268

2269
    # The rest is less certain
2270
    with warnings.catch_warnings():
1✔
2271
        warnings.filterwarnings("ignore", category=RuntimeWarning)
1✔
2272

2273
        # Climate and MB at specified dates
2274
        add_climate_period = tolist(add_climate_period)
1✔
2275

2276
        # get non-corrected winter daily mean prcp (kg m-2 day-1) for
2277
        # the chosen time period
2278
        for y0 in add_climate_period:
1✔
2279
            fs = '{}-{}'.format(y0 - halfsize, y0 + halfsize)
1✔
2280
            try:
1✔
2281
                # get non-corrected winter daily mean prcp (kg m-2 day-1)
2282
                # it is easier to get this directly from the raw climate files
2283
                fp = gdir.get_filepath('climate_historical',
1✔
2284
                                       filesuffix=input_filesuffix)
2285
                with xr.open_dataset(fp).prcp as ds_pr:
1✔
2286
                    # just select winter months
2287
                    if gdir.hemisphere == 'nh':
1✔
2288
                        m_winter = [10, 11, 12, 1, 2, 3, 4]
1✔
2289
                    else:
2290
                        m_winter = [4, 5, 6, 7, 8, 9, 10]
×
2291
                    ds_pr_winter = ds_pr.where(ds_pr['time.month'].isin(m_winter), drop=True)
1✔
2292
                    # select the correct year time period
2293
                    ds_pr_winter = ds_pr_winter.sel(time=slice(f'{fs[:4]}-01-01',
1✔
2294
                                                               f'{fs[-4:]}-12-01'))
2295
                    # check if we have the full time period
2296
                    n_years = int(fs[-4:]) - int(fs[:4]) + 1
1✔
2297
                    assert len(ds_pr_winter.time) == n_years * 7, 'chosen time-span invalid'
1✔
2298
                    ds_d_pr_winter_mean = (ds_pr_winter / ds_pr_winter.time.dt.daysinmonth).mean()
1✔
2299
                    d[f'{fs}_uncorrected_winter_daily_mean_prcp'] = ds_d_pr_winter_mean.values
1✔
2300
            except BaseException:
1✔
2301
                pass
1✔
2302
    return d
1✔
2303

2304

2305
@global_task(log)
9✔
2306
def compile_climate_statistics(gdirs, filesuffix='', path=True,
9✔
2307
                               add_climate_period=1995,
2308
                               halfsize=15,
2309
                               add_raw_climate_statistics=False,
2310
                               input_filesuffix=''):
2311
    """Gather as much statistics as possible about a list of glaciers.
2312

2313
    It can be used to do result diagnostics and other stuffs. If the data
2314
    necessary for a statistic is not available (e.g.: flowlines length) it
2315
    will simply be ignored.
2316

2317
    Parameters
2318
    ----------
2319
    gdirs: the list of GlacierDir to process.
2320
    filesuffix : str
2321
        add suffix to output file
2322
    path : str, bool
2323
        Set to "True" in order  to store the info in the working directory
2324
        Set to a path to store the file to your chosen location
2325
    add_climate_period : int or list of ints
2326
        compile climate statistics for the 30 yrs period around the selected
2327
        date.
2328
    input_filesuffix : str
2329
        filesuffix of the used climate_historical file, default is no filesuffix
2330
    """
2331
    from oggm.workflow import execute_entity_task
3✔
2332

2333
    out_df = execute_entity_task(climate_statistics, gdirs,
3✔
2334
                                 add_climate_period=add_climate_period,
2335
                                 halfsize=halfsize,
2336
                                 input_filesuffix=input_filesuffix)
2337
    out = pd.DataFrame(out_df).set_index('rgi_id')
3✔
2338

2339
    if add_raw_climate_statistics:
3✔
2340
        out_df = execute_entity_task(raw_climate_statistics, gdirs,
1✔
2341
                                     add_climate_period=add_climate_period,
2342
                                     halfsize=halfsize,
2343
                                     input_filesuffix=input_filesuffix)
2344
        out = out.merge(pd.DataFrame(out_df).set_index('rgi_id'))
1✔
2345

2346
    if path:
3✔
2347
        if path is True:
3✔
2348
            out.to_csv(os.path.join(cfg.PATHS['working_dir'],
3✔
2349
                                    ('climate_statistics' +
2350
                                     filesuffix + '.csv')))
2351
        else:
2352
            out.to_csv(path)
1✔
2353
    return out
3✔
2354

2355

2356
def extend_past_climate_run(past_run_file=None,
9✔
2357
                            fixed_geometry_mb_file=None,
2358
                            glacier_statistics_file=None,
2359
                            path=False,
2360
                            use_compression=True):
2361
    """Utility function to extend past MB runs prior to the RGI date.
2362

2363
    We use a fixed geometry (and a fixed calving rate) for all dates prior
2364
    to the RGI date.
2365

2366
    This is not parallelized, i.e a bit slow.
2367

2368
    Parameters
2369
    ----------
2370
    past_run_file : str
2371
        path to the historical run (nc)
2372
    fixed_geometry_mb_file : str
2373
        path to the MB file (csv)
2374
    glacier_statistics_file : str
2375
        path to the glacier stats file (csv)
2376
    path : str
2377
        where to store the file
2378
    use_compression : bool
2379

2380
    Returns
2381
    -------
2382
    the extended dataset
2383
    """
2384

2385
    log.workflow('Applying extend_past_climate_run on '
2✔
2386
                 '{}'.format(past_run_file))
2387

2388
    fixed_geometry_mb_df = pd.read_csv(fixed_geometry_mb_file, index_col=0,
2✔
2389
                                       low_memory=False)
2390
    stats_df = pd.read_csv(glacier_statistics_file, index_col=0,
2✔
2391
                           low_memory=False)
2392

2393
    with xr.open_dataset(past_run_file) as past_ds:
2✔
2394

2395
        # We need at least area and vol to do something
2396
        if 'volume' not in past_ds.data_vars or 'area' not in past_ds.data_vars:
2✔
2397
            raise InvalidWorkflowError('Need both volume and area to proceed')
×
2398

2399
        y0_run = int(past_ds.time[0])
2✔
2400
        y1_run = int(past_ds.time[-1])
2✔
2401
        if (y1_run - y0_run + 1) != len(past_ds.time):
2✔
2402
            raise NotImplementedError('Currently only supports annual outputs')
2403
        y0_clim = int(fixed_geometry_mb_df.index[0])
2✔
2404
        y1_clim = int(fixed_geometry_mb_df.index[-1])
2✔
2405
        if y0_clim > y0_run or y1_clim < y0_run:
2✔
2406
            raise InvalidWorkflowError('Dates do not match.')
×
2407
        if y1_clim != y1_run - 1:
2✔
2408
            raise InvalidWorkflowError('Dates do not match.')
×
2409
        if len(past_ds.rgi_id) != len(fixed_geometry_mb_df.columns):
2✔
2410
            # This might happen if we are testing on new directories
2411
            fixed_geometry_mb_df = fixed_geometry_mb_df[past_ds.rgi_id]
×
2412
        if len(past_ds.rgi_id) != len(stats_df.index):
2✔
2413
            stats_df = stats_df.loc[past_ds.rgi_id]
×
2414

2415
        # Make sure we agree on order
2416
        df = fixed_geometry_mb_df[past_ds.rgi_id]
2✔
2417

2418
        # Output data
2419
        years = np.arange(y0_clim, y1_run+1)
2✔
2420
        ods = past_ds.reindex({'time': years})
2✔
2421

2422
        # Time
2423
        ods['hydro_year'].data[:] = years
2✔
2424
        ods['hydro_month'].data[:] = ods['hydro_month'][-1].item()
2✔
2425
        ods['calendar_year'].data[:] = years
2✔
2426
        ods['calendar_month'].data[:] = ods['calendar_month'][-1].item()
2✔
2427
        for vn in ['hydro_year', 'hydro_month', 'calendar_year', 'calendar_month']:
2✔
2428
            ods[vn] = ods[vn].astype(int)
2✔
2429

2430
        # New vars
2431
        for vn in ['volume', 'volume_m3_min_h', 'volume_bsl', 'volume_bwl',
2✔
2432
                   'area', 'area_m2_min_h', 'length', 'calving', 'calving_rate']:
2433
            if vn in ods.data_vars:
2✔
2434
                ods[vn + '_ext'] = ods[vn].copy(deep=True)
2✔
2435
                ods[vn + '_ext'].attrs['description'] += ' (extended with MB data)'
2✔
2436

2437
        vn = 'volume_fixed_geom_ext'
2✔
2438
        ods[vn] = ods['volume'].copy(deep=True)
2✔
2439
        ods[vn].attrs['description'] += ' (replaced with fixed geom data)'
2✔
2440

2441
        rho = cfg.PARAMS['ice_density']
2✔
2442
        # Loop over the ids
2443
        for i, rid in enumerate(ods.rgi_id.data):
2✔
2444
            # Both do not need to be same length but they need to start same
2445
            mb_ts = df.values[:, i]
2✔
2446
            orig_vol_ts = ods.volume_ext.data[:, i]
2✔
2447
            if not (np.isfinite(mb_ts[-1]) and np.isfinite(orig_vol_ts[-1])):
2✔
2448
                # Not a valid glacier
2449
                continue
×
2450
            if np.isfinite(orig_vol_ts[0]):
2✔
2451
                # Nothing to extend, really
2452
                continue
×
2453

2454
            # First valid id
2455
            fid = np.argmax(np.isfinite(orig_vol_ts))
2✔
2456

2457
            # Add calving to the mix
2458
            try:
2✔
2459
                calv_flux = stats_df.loc[rid, 'calving_flux'] * 1e9
2✔
2460
                calv_rate = stats_df.loc[rid, 'calving_rate_myr']
×
2461
            except KeyError:
2✔
2462
                calv_flux = 0
2✔
2463
                calv_rate = 0
2✔
2464
            if not np.isfinite(calv_flux):
2✔
2465
                calv_flux = 0
×
2466
            if not np.isfinite(calv_rate):
2✔
2467
                calv_rate = 0
×
2468

2469
            # Fill area and length which stays constant before date
2470
            orig_area_ts = ods.area_ext.data[:, i]
2✔
2471
            orig_area_ts[:fid] = orig_area_ts[fid]
2✔
2472

2473
            # We convert SMB to volume
2474
            mb_vol_ts = (mb_ts / rho * orig_area_ts[fid] - calv_flux).cumsum()
2✔
2475
            calv_ts = (mb_ts * 0 + calv_flux).cumsum()
2✔
2476

2477
            # The -1 is because the volume change is known at end of year
2478
            mb_vol_ts = mb_vol_ts + orig_vol_ts[fid] - mb_vol_ts[fid-1]
2✔
2479

2480
            # Now back to netcdf
2481
            ods.volume_fixed_geom_ext.data[1:, i] = mb_vol_ts
2✔
2482
            ods.volume_ext.data[1:fid, i] = mb_vol_ts[0:fid-1]
2✔
2483
            ods.area_ext.data[:, i] = orig_area_ts
2✔
2484

2485
            # Optional variables
2486
            if 'length' in ods.data_vars:
2✔
2487
                orig_length_ts = ods.length_ext.data[:, i]
2✔
2488
                orig_length_ts[:fid] = orig_length_ts[fid]
2✔
2489
                ods.length_ext.data[:, i] = orig_length_ts
2✔
2490

2491
            if 'calving' in ods.data_vars:
2✔
2492
                orig_calv_ts = ods.calving_ext.data[:, i]
1✔
2493
                # The -1 is because the volume change is known at end of year
2494
                calv_ts = calv_ts + orig_calv_ts[fid] - calv_ts[fid-1]
1✔
2495
                ods.calving_ext.data[1:fid, i] = calv_ts[0:fid-1]
1✔
2496

2497
            if 'calving_rate' in ods.data_vars:
2✔
2498
                orig_calv_rate_ts = ods.calving_rate_ext.data[:, i]
1✔
2499
                # +1 because calving rate at year 0 is unknown from the dyns model
2500
                orig_calv_rate_ts[:fid+1] = calv_rate
1✔
2501
                ods.calving_rate_ext.data[:, i] = orig_calv_rate_ts
1✔
2502

2503
            # Extend vol bsl by assuming that % stays constant
2504
            if 'volume_bsl' in ods.data_vars:
2✔
2505
                bsl = ods.volume_bsl.data[fid, i] / ods.volume.data[fid, i]
1✔
2506
                ods.volume_bsl_ext.data[:fid, i] = bsl * ods.volume_ext.data[:fid, i]
1✔
2507
            if 'volume_bwl' in ods.data_vars:
2✔
2508
                bwl = ods.volume_bwl.data[fid, i] / ods.volume.data[fid, i]
1✔
2509
                ods.volume_bwl_ext.data[:fid, i] = bwl * ods.volume_ext.data[:fid, i]
1✔
2510

2511
        # Remove old vars
2512
        for vn in list(ods.data_vars):
2✔
2513
            if '_ext' not in vn and 'time' in ods[vn].dims:
2✔
2514
                del ods[vn]
2✔
2515

2516
        # Rename vars to their old names
2517
        ods = ods.rename(dict((o, o.replace('_ext', ''))
2✔
2518
                              for o in ods.data_vars))
2519

2520
        # Remove t0 (which is nan)
2521
        ods = ods.isel(time=slice(1, None))
2✔
2522

2523
        # To file?
2524
        if path:
2✔
2525
            enc_var = {'dtype': 'float32'}
2✔
2526
            if use_compression:
2✔
2527
                enc_var['complevel'] = 5
2✔
2528
                enc_var['zlib'] = True
2✔
2529
            encoding = {v: enc_var for v in ods.data_vars}
2✔
2530
            ods.to_netcdf(path, encoding=encoding)
2✔
2531

2532
    return ods
2✔
2533

2534

2535
def idealized_gdir(surface_h, widths_m, map_dx, flowline_dx=1,
9✔
2536
                   base_dir=None, reset=False):
2537
    """Creates a glacier directory with flowline input data only.
2538

2539
    This is useful for testing, or for idealized experiments.
2540

2541
    Parameters
2542
    ----------
2543
    surface_h : ndarray
2544
        the surface elevation of the flowline's grid points (in m).
2545
    widths_m : ndarray
2546
        the widths of the flowline's grid points (in m).
2547
    map_dx : float
2548
        the grid spacing (in m)
2549
    flowline_dx : int
2550
        the flowline grid spacing (in units of map_dx, often it should be 1)
2551
    base_dir : str
2552
        path to the directory where to open the directory.
2553
        Defaults to `cfg.PATHS['working_dir'] + /per_glacier/`
2554
    reset : bool, default=False
2555
        empties the directory at construction
2556

2557
    Returns
2558
    -------
2559
    a GlacierDirectory instance
2560
    """
2561

2562
    from oggm.core.centerlines import Centerline
1✔
2563

2564
    # Area from geometry
2565
    area_km2 = np.sum(widths_m * map_dx * flowline_dx) * 1e-6
1✔
2566

2567
    # Dummy entity - should probably also change the geometry
2568
    entity = gpd.read_file(get_demo_file('Hintereisferner_RGI5.shp')).iloc[0]
1✔
2569
    entity.Area = area_km2
1✔
2570
    entity.CenLat = 0
1✔
2571
    entity.CenLon = 0
1✔
2572
    entity.Name = ''
1✔
2573
    entity.RGIId = 'RGI50-00.00000'
1✔
2574
    entity.O1Region = '00'
1✔
2575
    entity.O2Region = '0'
1✔
2576
    gdir = GlacierDirectory(entity, base_dir=base_dir, reset=reset)
1✔
2577
    gdir.write_shapefile(gpd.GeoDataFrame([entity], crs='EPSG:4326'), 'outlines')
1✔
2578

2579
    # Idealized flowline
2580
    coords = np.arange(0, len(surface_h) - 0.5, 1)
1✔
2581
    line = shpg.LineString(np.vstack([coords, coords * 0.]).T)
1✔
2582
    fl = Centerline(line, dx=flowline_dx, surface_h=surface_h, map_dx=map_dx)
1✔
2583
    fl.widths = widths_m / map_dx
1✔
2584
    fl.is_rectangular = np.ones(fl.nx).astype(bool)
1✔
2585
    gdir.write_pickle([fl], 'inversion_flowlines')
1✔
2586

2587
    # Idealized map
2588
    grid = salem.Grid(nxny=(1, 1), dxdy=(map_dx, map_dx), x0y0=(0, 0))
1✔
2589
    grid.to_json(gdir.get_filepath('glacier_grid'))
1✔
2590

2591
    return gdir
1✔
2592

2593

2594
def _back_up_retry(func, exceptions, max_count=5):
9✔
2595
    """Re-Try an action up to max_count times.
2596
    """
2597

2598
    count = 0
4✔
2599
    while count < max_count:
4✔
2600
        try:
4✔
2601
            if count > 0:
4✔
2602
                time.sleep(random.uniform(0.05, 0.1))
×
2603
            return func()
4✔
2604
        except exceptions:
1✔
2605
            count += 1
×
2606
            if count >= max_count:
×
2607
                raise
×
2608

2609

2610
def _robust_extract(to_dir, *args, **kwargs):
9✔
2611
    """For some obscure reason this operation randomly fails.
2612

2613
    Try to make it more robust.
2614
    """
2615

2616
    def func():
4✔
2617
        with tarfile.open(*args, **kwargs) as tf:
4✔
2618
            if not len(tf.getnames()):
4✔
2619
                raise RuntimeError("Empty tarfile")
×
2620
            tf.extractall(os.path.dirname(to_dir))
4✔
2621

2622
    _back_up_retry(func, FileExistsError)
4✔
2623

2624

2625
def robust_tar_extract(from_tar, to_dir, delete_tar=False):
9✔
2626
    """Extract a tar file - also checks for a "tar in tar" situation"""
2627

2628
    if os.path.isfile(from_tar):
4✔
2629
        _robust_extract(to_dir, from_tar, 'r')
1✔
2630
    else:
2631
        # maybe a tar in tar
2632
        base_tar = os.path.dirname(from_tar) + '.tar'
4✔
2633
        if not os.path.isfile(base_tar):
4✔
2634
            raise FileNotFoundError('Could not find a tarfile with path: '
×
2635
                                    '{}'.format(from_tar))
2636
        if delete_tar:
4✔
2637
            raise InvalidParamsError('Cannot delete tar in tar.')
×
2638
        # Open the tar
2639
        bname = os.path.basename(from_tar)
4✔
2640
        dirbname = os.path.basename(os.path.dirname(from_tar))
4✔
2641

2642
        def func():
4✔
2643
            with tarfile.open(base_tar, 'r') as tf:
4✔
2644
                i_from_tar = tf.getmember(os.path.join(dirbname, bname))
4✔
2645
                with tf.extractfile(i_from_tar) as fileobj:
4✔
2646
                    _robust_extract(to_dir, fileobj=fileobj)
4✔
2647

2648
        _back_up_retry(func, RuntimeError)
4✔
2649

2650
    if delete_tar:
4✔
2651
        os.remove(from_tar)
1✔
2652

2653

2654
class GlacierDirectory(object):
9✔
2655
    """Organizes read and write access to the glacier's files.
2656

2657
    It handles a glacier directory created in a base directory (default
2658
    is the "per_glacier" folder in the working directory). The role of a
2659
    GlacierDirectory is to give access to file paths and to I/O operations.
2660
    The user should not care about *where* the files are
2661
    located, but should know their name (see :ref:`basenames`).
2662

2663
    If the directory does not exist, it will be created.
2664

2665
    See :ref:`glacierdir` for more information.
2666

2667
    Attributes
2668
    ----------
2669
    dir : str
2670
        path to the directory
2671
    base_dir : str
2672
        path to the base directory
2673
    rgi_id : str
2674
        The glacier's RGI identifier
2675
    glims_id : str
2676
        The glacier's GLIMS identifier (when available)
2677
    rgi_area_km2 : float
2678
        The glacier's RGI area (km2)
2679
    cenlon, cenlat : float
2680
        The glacier centerpoint's lon/lat
2681
    rgi_date : int
2682
        The RGI's BGNDATE year attribute if available. Otherwise, defaults to
2683
        the median year for the RGI region
2684
    rgi_region : str
2685
        The RGI region ID
2686
    rgi_subregion : str
2687
        The RGI subregion ID
2688
    rgi_version : str
2689
        The RGI version name
2690
    rgi_region_name : str
2691
        The RGI region name
2692
    rgi_subregion_name : str
2693
        The RGI subregion name
2694
    name : str
2695
        The RGI glacier name (if available)
2696
    hemisphere : str
2697
        `nh` or `sh`
2698
    glacier_type : str
2699
        The RGI glacier type ('Glacier', 'Ice cap', 'Perennial snowfield',
2700
        'Seasonal snowfield')
2701
    terminus_type : str
2702
        The RGI terminus type ('Land-terminating', 'Marine-terminating',
2703
        'Lake-terminating', 'Dry calving', 'Regenerated', 'Shelf-terminating')
2704
    is_tidewater : bool
2705
        Is the glacier a calving glacier?
2706
    is_lake_terminating : bool
2707
        Is the glacier a lake terminating glacier?
2708
    is_nominal : bool
2709
        Is the glacier an RGI nominal glacier?
2710
    is_icecap : bool
2711
        Is the glacier an ice cap?
2712
    extent_ll : list
2713
        Extent of the glacier in lon/lat
2714
    logfile : str
2715
        Path to the log file (txt)
2716
    inversion_calving_rate : float
2717
        Calving rate used for the inversion
2718
    grid
2719
    dem_info
2720
    dem_daterange
2721
    intersects_ids
2722
    rgi_area_m2
2723
    rgi_area_km2
2724
    """
2725

2726
    def __init__(self, rgi_entity, base_dir=None, reset=False,
9✔
2727
                 from_tar=False, delete_tar=False, settings_filesuffix='',
2728
                 observations_filesuffix=''):
2729
        """Creates a new directory or opens an existing one.
2730

2731
        Parameters
2732
        ----------
2733
        rgi_entity : a ``geopandas.GeoSeries`` or str
2734
            glacier entity read from the shapefile (or a valid RGI ID if the
2735
            directory exists)
2736
        base_dir : str
2737
            path to the directory where to open the directory.
2738
            Defaults to `cfg.PATHS['working_dir'] + /per_glacier/`
2739
        reset : bool, default=False
2740
            empties the directory at construction (careful!)
2741
        from_tar : str or bool, default=False
2742
            path to a tar file to extract the gdir data from. If set to `True`,
2743
            will check for a tar file at the expected location in `base_dir`.
2744
        delete_tar : bool, default=False
2745
            delete the original tar file after extraction.
2746
        settings_filesuffix : str, default=''
2747
            a filesuffix for a settings file to use
2748
        observations_filesuffix : str, default=''
2749
            a filesuffix for a observations file to use
2750
        """
2751

2752
        if base_dir is None:
7✔
2753
            if not cfg.PATHS.get('working_dir', None):
7✔
2754
                raise ValueError("Need a valid PATHS['working_dir']!")
×
2755
            base_dir = os.path.join(cfg.PATHS['working_dir'], 'per_glacier')
7✔
2756

2757
        # RGI IDs are also valid entries
2758
        if isinstance(rgi_entity, str):
7✔
2759
            # Get the meta from the shape file directly
2760
            if from_tar:
5✔
2761
                _dir = os.path.join(base_dir, rgi_entity[:-6], rgi_entity[:-3],
4✔
2762
                                    rgi_entity)
2763
                # Avoid bad surprises
2764
                if os.path.exists(_dir):
4✔
2765
                    shutil.rmtree(_dir)
4✔
2766
                if from_tar is True:
4✔
2767
                    from_tar = _dir + '.tar.gz'
×
2768
                robust_tar_extract(from_tar, _dir, delete_tar=delete_tar)
4✔
2769
                from_tar = False  # to not re-unpack later below
4✔
2770
                _shp = os.path.join(_dir, 'outlines.shp')
4✔
2771
            else:
2772
                _shp = os.path.join(base_dir, rgi_entity[:-6], rgi_entity[:-3],
5✔
2773
                                    rgi_entity, 'outlines.shp')
2774
            rgi_entity = self._read_shapefile_from_path(_shp)
5✔
2775
            crs = salem.check_crs(rgi_entity.crs)
5✔
2776
            rgi_entity = rgi_entity.iloc[0]
5✔
2777
            g = rgi_entity['geometry']
5✔
2778
            xx, yy = salem.transform_proj(crs, salem.wgs84,
5✔
2779
                                          [g.bounds[0], g.bounds[2]],
2780
                                          [g.bounds[1], g.bounds[3]])
2781
            write_shp = False
5✔
2782
        else:
2783
            g = rgi_entity['geometry']
7✔
2784
            xx, yy = ([g.bounds[0], g.bounds[2]],
7✔
2785
                      [g.bounds[1], g.bounds[3]])
2786
            write_shp = True
7✔
2787

2788
        # Extent of the glacier in lon/lat
2789
        self.extent_ll = [xx, yy]
7✔
2790

2791
        is_rgi7 = False
7✔
2792
        is_glacier_complex = False
7✔
2793
        try:
7✔
2794
            self.rgi_id = rgi_entity.rgi_id
7✔
2795
            is_rgi7 = True
2✔
2796
            try:
2✔
2797
                self.glims_id = rgi_entity.glims_id
2✔
2798
            except AttributeError:
2✔
2799
                # Complex product
2800
                self.glims_id = ''
2✔
2801
                is_glacier_complex = True
2✔
2802
        except AttributeError:
7✔
2803
            # RGI V6
2804
            self.rgi_id = rgi_entity.RGIId
7✔
2805
            self.glims_id = rgi_entity.GLIMSId
7✔
2806

2807
        # Root directory
2808
        self.base_dir = os.path.normpath(base_dir)
7✔
2809
        self.dir = os.path.join(self.base_dir, self.rgi_id[:-6],
7✔
2810
                                self.rgi_id[:-3], self.rgi_id)
2811

2812
        # Do we have to extract the files first?
2813
        if (reset or from_tar) and os.path.exists(self.dir):
7✔
2814
            shutil.rmtree(self.dir)
6✔
2815

2816
        if from_tar:
7✔
2817
            if from_tar is True:
1✔
2818
                from_tar = self.dir + '.tar.gz'
1✔
2819
            robust_tar_extract(from_tar, self.dir, delete_tar=delete_tar)
1✔
2820
            write_shp = False
1✔
2821
        else:
2822
            mkdir(self.dir)
7✔
2823

2824
        if not os.path.isdir(self.dir):
7✔
2825
            raise RuntimeError('GlacierDirectory %s does not exist!' % self.dir)
×
2826

2827
        # define the initial settings for this gdir
2828
        self._settings_filesuffix = settings_filesuffix
7✔
2829
        self.settings = self.get_settings(settings_filesuffix=settings_filesuffix)
7✔
2830

2831
        # define the initial observations for this gdir
2832
        self._observations_filesuffix = observations_filesuffix
7✔
2833
        self.observations = self.get_observations(
7✔
2834
            observations_filesuffix=observations_filesuffix)
2835

2836
        # Do we want to use the RGI center point or ours?
2837
        if self.settings['use_rgi_area']:
7✔
2838
            if is_rgi7:
7✔
2839
                self.cenlon = float(rgi_entity.cenlon)
2✔
2840
                self.cenlat = float(rgi_entity.cenlat)
2✔
2841
            else:
2842
                self.cenlon = float(rgi_entity.CenLon)
7✔
2843
                self.cenlat = float(rgi_entity.CenLat)
7✔
2844
        else:
2845
            cenlon, cenlat = rgi_entity.geometry.representative_point().xy
1✔
2846
            self.cenlon = float(cenlon[0])
1✔
2847
            self.cenlat = float(cenlat[0])
1✔
2848

2849
        if is_glacier_complex:
7✔
2850
            rgi_entity['glac_name'] = ''
2✔
2851
            rgi_entity['src_date'] = '2000-01-01 00:00:00'
2✔
2852
            if 'dem_source' not in rgi_entity:
2✔
2853
                rgi_entity['dem_source'] = None
2✔
2854
            rgi_entity['term_type'] = 9
2✔
2855

2856
        if is_rgi7:
7✔
2857
            self.rgi_region = rgi_entity.o1region
2✔
2858
            self.rgi_subregion = rgi_entity.o2region
2✔
2859
            name = rgi_entity.glac_name
2✔
2860
            rgi_datestr = rgi_entity.src_date
2✔
2861
            self.rgi_version = '70G'
2✔
2862
            self.glacier_type = 'Glacier'
2✔
2863
            self.status = 'Glacier'
2✔
2864
            ttkeys = {0: 'Land-terminating',
2✔
2865
                      1: 'Marine-terminating',
2866
                      2: 'Lake-terminating',
2867
                      3: 'Shelf-terminating',
2868
                      9: 'Not assigned',
2869
                      }
2870
            self.terminus_type = ttkeys[int(rgi_entity['term_type'])]
2✔
2871
            if is_glacier_complex:
2✔
2872
                self.rgi_version = '70C'
2✔
2873
                self.glacier_type = 'Glacier complex'
2✔
2874
                self.status = 'Glacier complex'
2✔
2875
            self.rgi_dem_source = rgi_entity.dem_source
2✔
2876
            self.utm_zone = rgi_entity.utm_zone
2✔
2877

2878
            # New attrs
2879
            try:
2✔
2880
                self.rgi_termlon = rgi_entity.termlon
2✔
2881
                self.rgi_termlat = rgi_entity.termlat
2✔
2882
            except AttributeError:
2✔
2883
                pass
2✔
2884
        else:
2885
            self.rgi_region = '{:02d}'.format(int(rgi_entity.O1Region))
7✔
2886
            self.rgi_subregion = f'{self.rgi_region}-{int(rgi_entity.O2Region):02d}'
7✔
2887
            name = rgi_entity.Name
7✔
2888
            rgi_datestr = rgi_entity.BgnDate
7✔
2889

2890
            try:
7✔
2891
                # RGI5
2892
                gtype = rgi_entity.GlacType
7✔
2893
            except AttributeError:
7✔
2894
                # RGI V6
2895
                gtype = [str(rgi_entity.Form), str(rgi_entity.TermType)]
7✔
2896

2897
            try:
7✔
2898
                # RGI5
2899
                gstatus = rgi_entity.RGIFlag[0]
7✔
2900
            except AttributeError:
7✔
2901
                # RGI V6
2902
                gstatus = rgi_entity.Status
7✔
2903

2904
            rgi_version = self.rgi_id.split('-')[0][-2:]
7✔
2905
            if rgi_version not in ['50', '60', '61']:
7✔
2906
                raise RuntimeError('RGI Version not supported: '
×
2907
                                   '{}'.format(self.rgi_version))
2908
            self.rgi_version = rgi_version
7✔
2909

2910
            # Mechanism to pass DEM source via the RGI entity
2911
            self.rgi_dem_source = rgi_entity.get('dem_source', None)
7✔
2912

2913
            # Read glacier attrs
2914
            gtkeys = {'0': 'Glacier',
7✔
2915
                      '1': 'Ice cap',
2916
                      '2': 'Perennial snowfield',
2917
                      '3': 'Seasonal snowfield',
2918
                      '9': 'Not assigned',
2919
                      }
2920
            ttkeys = {'0': 'Land-terminating',
7✔
2921
                      '1': 'Marine-terminating',
2922
                      '2': 'Lake-terminating',
2923
                      '3': 'Dry calving',
2924
                      '4': 'Regenerated',
2925
                      '5': 'Shelf-terminating',
2926
                      '9': 'Not assigned',
2927
                      }
2928
            stkeys = {'0': 'Glacier or ice cap',
7✔
2929
                      '1': 'Glacier complex',
2930
                      '2': 'Nominal glacier',
2931
                      '9': 'Not assigned',
2932
                      }
2933
            self.glacier_type = gtkeys[gtype[0]]
7✔
2934
            self.terminus_type = ttkeys[gtype[1]]
7✔
2935
            self.status = stkeys['{}'.format(gstatus)]
7✔
2936

2937
        # remove spurious characters and trailing blanks
2938
        self.name = filter_rgi_name(name)
7✔
2939

2940
        # RGI region
2941
        reg_names, subreg_names = parse_rgi_meta(version=self.rgi_version[0])
7✔
2942
        reg_name = reg_names.loc[int(self.rgi_region)]
7✔
2943

2944
        # RGI V6
2945
        if not isinstance(reg_name, str):
7✔
2946
            reg_name = reg_name.values[0]
7✔
2947

2948
        self.rgi_region_name = self.rgi_region + ': ' + reg_name
7✔
2949
        try:
7✔
2950
            subreg_name = subreg_names.loc[self.rgi_subregion]
7✔
2951
            # RGI V6
2952
            if not isinstance(subreg_name, str):
7✔
2953
                subreg_name = subreg_name.values[0]
7✔
2954
            self.rgi_subregion_name = self.rgi_subregion + ': ' + subreg_name
7✔
2955
        except KeyError:
×
2956
            self.rgi_subregion_name = self.rgi_subregion + ': NoName'
×
2957

2958
        # Decide what is a tidewater glacier
2959
        user = cfg.PARAMS['tidewater_type']
7✔
2960
        if user == 1:
7✔
2961
            sel = ['Marine-terminating']
×
2962
        elif user == 2:
7✔
2963
            sel = ['Marine-terminating', 'Shelf-terminating']
7✔
2964
        elif user == 3:
2✔
2965
            sel = ['Marine-terminating', 'Lake-terminating']
×
2966
        elif user == 4:
2✔
2967
            sel = ['Marine-terminating', 'Lake-terminating', 'Shelf-terminating']
2✔
2968
        else:
2969
            raise InvalidParamsError("PARAMS['tidewater_type'] not understood")
×
2970
        self.is_tidewater = self.terminus_type in sel
7✔
2971
        self.is_lake_terminating = self.terminus_type == 'Lake-terminating'
7✔
2972
        self.is_marine_terminating = self.terminus_type == 'Marine-terminating'
7✔
2973
        self.is_shelf_terminating = self.terminus_type == 'Shelf-terminating'
7✔
2974
        self.is_nominal = self.status == 'Nominal glacier'
7✔
2975
        self.inversion_calving_rate = 0.
7✔
2976
        self.is_icecap = self.glacier_type == 'Ice cap'
7✔
2977

2978
        # Hemisphere
2979
        if self.cenlat < 0 or self.rgi_region == '16':
7✔
2980
            self.hemisphere = 'sh'
×
2981
        else:
2982
            self.hemisphere = 'nh'
7✔
2983

2984
        # convert the date
2985
        rgi_date = int(rgi_datestr[0:4])
7✔
2986
        if rgi_date < 0:
7✔
2987
            rgi_date = RGI_DATE[self.rgi_region]
1✔
2988
        self.rgi_date = rgi_date
7✔
2989

2990
        # logging file
2991
        self.logfile = os.path.join(self.dir, 'log.txt')
7✔
2992

2993
        if write_shp:
7✔
2994
            # Write shapefile
2995
            self._reproject_and_write_shapefile(rgi_entity)
7✔
2996

2997
        # Optimization
2998
        self._mbdf = None
7✔
2999
        self._mbprofdf = None
7✔
3000
        self._mbprofdf_cte_dh = None
7✔
3001

3002
    def __repr__(self):
3003

3004
        summary = ['<oggm.GlacierDirectory>']
3005
        summary += ['  RGI id: ' + self.rgi_id]
3006
        summary += ['  Region: ' + self.rgi_region_name]
3007
        summary += ['  Subregion: ' + self.rgi_subregion_name]
3008
        if self.name:
3009
            summary += ['  Name: ' + self.name]
3010
        summary += ['  Glacier type: ' + str(self.glacier_type)]
3011
        summary += ['  Terminus type: ' + str(self.terminus_type)]
3012
        summary += ['  Status: ' + str(self.status)]
3013
        summary += ['  Area: ' + str(self.rgi_area_km2) + ' km2']
3014
        summary += ['  Lon, Lat: (' + str(self.cenlon) + ', ' +
3015
                    str(self.cenlat) + ')']
3016
        if os.path.isfile(self.get_filepath('glacier_grid')):
3017
            summary += ['  Grid (nx, ny): (' + str(self.grid.nx) + ', ' +
3018
                        str(self.grid.ny) + ')']
3019
            summary += ['  Grid (dx, dy): (' + str(self.grid.dx) + ', ' +
3020
                        str(self.grid.dy) + ')']
3021
        return '\n'.join(summary) + '\n'
3022

3023
    def _reproject_and_write_shapefile(self, entity):
9✔
3024
        # Make a local glacier map
3025
        if cfg.PARAMS['map_proj'] == 'utm':
7✔
3026
            if entity.get('utm_zone', False):
×
3027
                # RGI7 has an utm zone
3028
                proj4_str = {'proj': 'utm', 'zone': entity['utm_zone']}
×
3029
            else:
3030
                # Find it out
3031
                from pyproj.aoi import AreaOfInterest
×
3032
                from pyproj.database import query_utm_crs_info
×
3033
                utm_crs_list = query_utm_crs_info(
×
3034
                    datum_name="WGS 84",
3035
                    area_of_interest=AreaOfInterest(
3036
                        west_lon_degree=self.cenlon,
3037
                        south_lat_degree=self.cenlat,
3038
                        east_lon_degree=self.cenlon,
3039
                        north_lat_degree=self.cenlat,
3040
                    ),
3041
                )
3042
                proj4_str = utm_crs_list[0].code
×
3043
        elif cfg.PARAMS['map_proj'] == 'tmerc':
7✔
3044
            params = dict(name='tmerc', lat_0=0., lon_0=self.cenlon,
7✔
3045
                          k=0.9996, x_0=0, y_0=0, datum='WGS84')
3046
            proj4_str = ("+proj={name} +lat_0={lat_0} +lon_0={lon_0} +k={k} "
7✔
3047
                         "+x_0={x_0} +y_0={y_0} +datum={datum}".format(**params))
3048
        else:
3049
            raise InvalidParamsError("cfg.PARAMS['map_proj'] must be one of "
×
3050
                                     "'tmerc', 'utm'.")
3051
        # Reproject
3052
        proj_in = pyproj.Proj("epsg:4326", preserve_units=True)
7✔
3053
        proj_out = pyproj.Proj(proj4_str, preserve_units=True)
7✔
3054

3055
        # transform geometry to map
3056
        project = partial(transform_proj, proj_in, proj_out)
7✔
3057
        geometry = shp_trafo(project, entity['geometry'])
7✔
3058
        if len(self.rgi_id) == 23 and (not geometry.is_valid or
7✔
3059
                                       type(geometry) != shpg.Polygon):
3060
            # In RGI7 we know that the geometries are valid in the source file,
3061
            # so we have to validate them after projection them as well
3062
            # Try buffer first
3063
            geometry = geometry.buffer(0)
×
3064
            if not geometry.is_valid:
×
3065
                correct = recursive_valid_polygons([geometry], crs=proj4_str)
×
3066
                if len(correct) != 1:
×
3067
                    raise RuntimeError('Cant correct this geometry')
×
3068
                geometry = correct[0]
×
3069
            if type(geometry) != shpg.Polygon:
×
3070
                raise ValueError(f'{self.rgi_id}: geometry not valid')
×
3071
        elif not cfg.PARAMS['keep_multipolygon_outlines']:
7✔
3072
            geometry = multipolygon_to_polygon(geometry, gdir=self)
7✔
3073

3074
        # Save transformed geometry to disk
3075
        entity = entity.copy()
7✔
3076
        entity['geometry'] = geometry
7✔
3077

3078
        # Do we want to use the RGI area or ours?
3079
        if not cfg.PARAMS['use_rgi_area']:
7✔
3080
            # Update Area
3081
            try:
1✔
3082
                area = geometry.area * 1e-6
1✔
3083
            except:
×
3084
                area = geometry.area_m2 * 1e-6
×
3085
            entity['Area'] = area
1✔
3086

3087
        # Avoid fiona bug: https://github.com/Toblerity/Fiona/issues/365
3088
        for k, s in entity.items():
7✔
3089
            if type(s) in [np.int32, np.int64]:
7✔
3090
                entity[k] = int(s)
7✔
3091
        towrite = gpd.GeoDataFrame(entity).T.set_geometry('geometry')
7✔
3092
        towrite.set_crs(crs=proj4_str, inplace=True, allow_override=True)
7✔
3093

3094
        # Write shapefile
3095
        self.write_shapefile(towrite, 'outlines')
7✔
3096

3097
        # Also transform the intersects if necessary
3098
        gdf = cfg.PARAMS['intersects_gdf']
7✔
3099
        if len(gdf) > 0:
7✔
3100
            try:
6✔
3101
                gdf = gdf.loc[((gdf.RGIId_1 == self.rgi_id) |
6✔
3102
                               (gdf.RGIId_2 == self.rgi_id))]
3103
            except AttributeError:
2✔
3104
                gdf = gdf.loc[((gdf.rgi_g_id_1 == self.rgi_id) |
2✔
3105
                               (gdf.rgi_g_id_2 == self.rgi_id))]
3106

3107
            if len(gdf) > 0:
6✔
3108
                gdf = salem.transform_geopandas(gdf, to_crs=proj_out)
6✔
3109
                if hasattr(gdf.crs, 'srs'):
6✔
3110
                    # salem uses pyproj
3111
                    gdf.set_crs(gdf.crs.srs, allow_override=True, inplace=True)
6✔
3112
                self.write_shapefile(gdf, 'intersects')
6✔
3113
        else:
3114
            # Sanity check
3115
            if cfg.PARAMS['use_intersects'] and not self.rgi_version == '70C':
6✔
3116
                raise InvalidParamsError(
×
3117
                    'You seem to have forgotten to set the '
3118
                    'intersects file for this run. OGGM '
3119
                    'works better with such a file. If you '
3120
                    'know what your are doing, set '
3121
                    "cfg.PARAMS['use_intersects'] = False to "
3122
                    "suppress this error.")
3123

3124
    def grid_from_params(self):
9✔
3125
        """If the glacier_grid.json file is lost, reconstruct it."""
3126
        from oggm.core.gis import glacier_grid_params
1✔
3127
        utm_proj, nx, ny, ulx, uly, dx = glacier_grid_params(self)
1✔
3128
        x0y0 = (ulx+dx/2, uly-dx/2)  # To pixel center coordinates
1✔
3129
        return salem.Grid(proj=utm_proj, nxny=(nx, ny), dxdy=(dx, -dx),
1✔
3130
                          x0y0=x0y0)
3131

3132
    @lazy_property
9✔
3133
    def grid(self):
9✔
3134
        """A ``salem.Grid`` handling the georeferencing of the local grid"""
3135
        try:
7✔
3136
            return salem.Grid.from_json(self.get_filepath('glacier_grid'))
7✔
3137
        except FileNotFoundError:
1✔
3138
            raise InvalidWorkflowError('This glacier directory seems to '
1✔
3139
                                       'have lost its glacier_grid.json file.'
3140
                                       'Use .grid_from_params(), but make sure'
3141
                                       'that the PARAMS are the ones you '
3142
                                       'want.')
3143

3144
    @lazy_property
9✔
3145
    def rgi_area_km2(self):
9✔
3146
        """The glacier's RGI area (km2)."""
3147
        try:
7✔
3148
            _area = self.read_shapefile('outlines')['Area']
7✔
3149
        except OSError:
3✔
3150
            raise RuntimeError('No outlines available')
×
3151
        except KeyError:
3✔
3152
            # RGI V7
3153
            _area = self.read_shapefile('outlines')['area_km2']
3✔
3154
        return float(_area.iloc[0])
7✔
3155

3156
    @lazy_property
9✔
3157
    def intersects_ids(self):
9✔
3158
        """The glacier's intersects RGI ids."""
3159
        try:
3✔
3160
            gdf = self.read_shapefile('intersects')
3✔
3161
            try:
3✔
3162
                ids = np.append(gdf['RGIId_1'], gdf['RGIId_2'])
3✔
3163
            except KeyError:
2✔
3164
                ids = np.append(gdf['rgi_g_id_1'], gdf['rgi_g_id_2'])
2✔
3165
            ids = list(np.unique(np.sort(ids)))
3✔
3166
            ids.remove(self.rgi_id)
3✔
3167
            return ids
3✔
3168
        except OSError:
×
3169
            return []
×
3170

3171
    @lazy_property
9✔
3172
    def dem_daterange(self):
9✔
3173
        """Years in which most of the DEM data was acquired"""
3174
        source_txt = self.get_filepath('dem_source')
1✔
3175
        if os.path.isfile(source_txt):
1✔
3176
            with open(source_txt, 'r') as f:
1✔
3177
                for line in f.readlines():
1✔
3178
                    if 'Date range:' in line:
1✔
3179
                        return tuple(map(int, line.split(':')[1].split('-')))
1✔
3180
        # we did not find the information in the dem_source file
3181
        log.warning('No DEM date range specified in `dem_source.txt`')
1✔
3182
        return None
1✔
3183

3184
    @lazy_property
9✔
3185
    def dem_info(self):
9✔
3186
        """More detailed information on the acquisition of the DEM data"""
3187
        source_file = self.get_filepath('dem_source')
1✔
3188
        source_text = ''
1✔
3189
        if os.path.isfile(source_file):
1✔
3190
            with open(source_file, 'r') as f:
1✔
3191
                for line in f.readlines():
1✔
3192
                    source_text += line
1✔
3193
        else:
3194
            log.warning('No DEM source file found.')
×
3195
        return source_text
1✔
3196

3197
    @property
9✔
3198
    def rgi_area_m2(self):
9✔
3199
        """The glacier's RGI area (m2)."""
3200
        return self.rgi_area_km2 * 10**6
7✔
3201

3202
    def get_filepath(self, filename, delete=False, filesuffix='',
9✔
3203
                     _deprecation_check=True):
3204
        """Absolute path to a specific file.
3205

3206
        Parameters
3207
        ----------
3208
        filename : str
3209
            file name (must be listed in cfg.BASENAME)
3210
        delete : bool
3211
            delete the file if exists
3212
        filesuffix : str
3213
            append a suffix to the filename (useful for model runs). Note
3214
            that the BASENAME remains same.
3215

3216
        Returns
3217
        -------
3218
        The absolute path to the desired file
3219
        """
3220

3221
        if filename not in cfg.BASENAMES:
7✔
3222
            raise ValueError(filename + ' not in cfg.BASENAMES.')
×
3223

3224
        fname = cfg.BASENAMES[filename]
7✔
3225
        if filesuffix:
7✔
3226
            fname = fname.split('.')
5✔
3227
            assert len(fname) == 2
5✔
3228
            fname = fname[0] + filesuffix + '.' + fname[1]
5✔
3229

3230
        out = os.path.join(self.dir, fname)
7✔
3231
        if delete and os.path.isfile(out):
7✔
3232
            os.remove(out)
4✔
3233
        return out
7✔
3234

3235
    def has_file(self, filename, filesuffix='', _deprecation_check=True):
9✔
3236
        """Checks if a file exists.
3237

3238
        Parameters
3239
        ----------
3240
        filename : str
3241
            file name (must be listed in cfg.BASENAME)
3242
        filesuffix : str
3243
            append a suffix to the filename (useful for model runs). Note
3244
            that the BASENAME remains same.
3245
        """
3246
        fp = self.get_filepath(filename, filesuffix=filesuffix,
7✔
3247
                               _deprecation_check=_deprecation_check)
3248
        if '.shp' in fp and cfg.PARAMS['use_tar_shapefiles']:
7✔
3249
            fp = fp.replace('.shp', '.tar')
7✔
3250
            if cfg.PARAMS['use_compression']:
7✔
3251
                fp += '.gz'
7✔
3252
        return os.path.exists(fp)
7✔
3253

3254
    def add_to_diagnostics(self, key, value):
9✔
3255
        """Write a key, value pair to the gdir's runtime diagnostics.
3256

3257
        Parameters
3258
        ----------
3259
        key : str
3260
            dict entry key
3261
        value : str or number
3262
            dict entry value
3263
        """
3264

3265
        d = self.get_diagnostics()
7✔
3266
        d[key] = value
7✔
3267
        with open(self.get_filepath('diagnostics'), 'w') as f:
7✔
3268
            json.dump(d, f)
7✔
3269

3270
    def get_diagnostics(self):
9✔
3271
        """Read the gdir's runtime diagnostics.
3272

3273
        Returns
3274
        -------
3275
        the diagnostics dict
3276
        """
3277
        # If not there, create an empty one
3278
        if not self.has_file('diagnostics'):
7✔
3279
            with open(self.get_filepath('diagnostics'), 'w') as f:
7✔
3280
                json.dump(dict(), f)
7✔
3281

3282
        # Read and return
3283
        with open(self.get_filepath('diagnostics'), 'r') as f:
7✔
3284
            out = json.load(f)
7✔
3285
        return out
7✔
3286

3287
    def read_pickle(self, filename, use_compression=None, filesuffix=''):
9✔
3288
        """Reads a pickle located in the directory.
3289

3290
        Parameters
3291
        ----------
3292
        filename : str
3293
            file name (must be listed in cfg.BASENAME)
3294
        use_compression : bool
3295
            whether or not the file ws compressed. Default is to use
3296
            cfg.PARAMS['use_compression'] for this (recommended)
3297
        filesuffix : str
3298
            append a suffix to the filename (useful for experiments).
3299

3300
        Returns
3301
        -------
3302
        An object read from the pickle
3303
        """
3304

3305
        use_comp = (use_compression if use_compression is not None
7✔
3306
                    else cfg.PARAMS['use_compression'])
3307
        _open = gzip.open if use_comp else open
7✔
3308
        fp = self.get_filepath(filename, filesuffix=filesuffix)
7✔
3309
        with _open(fp, 'rb') as f:
7✔
3310
            try:
7✔
3311
                out = pickle.load(f)
7✔
3312
            except ModuleNotFoundError as err:
×
3313
                if err.name == "shapely.io":
×
3314
                    err.msg = "You need shapely version 2.0 or higher for this to work."
×
3315
                raise
×
3316

3317
        # Some new attrs to add to old pre-processed directories
3318
        if filename == 'model_flowlines':
7✔
3319
            if getattr(out[0], 'map_trafo', None) is None:
7✔
3320
                try:
1✔
3321
                    # This may fail for very old gdirs
3322
                    grid = self.grid
1✔
3323
                except InvalidWorkflowError:
×
3324
                    return out
×
3325

3326
                # Add the trafo
3327
                trafo = partial(grid.ij_to_crs, crs=salem.wgs84)
1✔
3328
                for fl in out:
1✔
3329
                    fl.map_trafo = trafo
1✔
3330

3331
        return out
7✔
3332

3333
    def write_pickle(self, var, filename, use_compression=None, filesuffix=''):
9✔
3334
        """ Writes a variable to a pickle on disk.
3335

3336
        Parameters
3337
        ----------
3338
        var : object
3339
            the variable to write to disk
3340
        filename : str
3341
            file name (must be listed in cfg.BASENAME)
3342
        use_compression : bool
3343
            whether or not the file ws compressed. Default is to use
3344
            cfg.PARAMS['use_compression'] for this (recommended)
3345
        filesuffix : str
3346
            append a suffix to the filename (useful for experiments).
3347
        """
3348
        use_comp = (use_compression if use_compression is not None
7✔
3349
                    else cfg.PARAMS['use_compression'])
3350
        _open = gzip.open if use_comp else open
7✔
3351
        fp = self.get_filepath(filename, filesuffix=filesuffix)
7✔
3352
        with _open(fp, 'wb') as f:
7✔
3353
            pickle.dump(var, f, protocol=4)
7✔
3354

3355
    def read_json(self, filename, filesuffix='', allow_empty=False):
9✔
3356
        """Reads a JSON file located in the directory.
3357

3358
        Parameters
3359
        ----------
3360
        filename : str
3361
            file name (must be listed in cfg.BASENAME)
3362
        filesuffix : str
3363
            append a suffix to the filename (useful for experiments).
3364
        allow_empty : bool
3365
            if True, does not raise an error if the file is not there.
3366

3367
        Returns
3368
        -------
3369
        A dictionary read from the JSON file
3370
        """
3371

3372
        fp = self.get_filepath(filename, filesuffix=filesuffix)
4✔
3373
        if allow_empty:
4✔
3374
            try:
×
3375
                with open(fp, 'r') as f:
×
3376
                    out = json.load(f)
×
3377
            except FileNotFoundError:
×
3378
                out = {}
×
3379
        else:
3380
            with open(fp, 'r') as f:
4✔
3381
                out = json.load(f)
3✔
3382
        return out
3✔
3383

3384
    def write_json(self, var, filename, filesuffix=''):
9✔
3385
        """ Writes a variable to a pickle on disk.
3386

3387
        Parameters
3388
        ----------
3389
        var : object
3390
            the variable to write to JSON (must be a dictionary)
3391
        filename : str
3392
            file name (must be listed in cfg.BASENAME)
3393
        filesuffix : str
3394
            append a suffix to the filename (useful for experiments).
3395
        """
3396

3397
        def np_convert(o):
1✔
3398
            if isinstance(o, np.int64):
×
3399
                return int(o)
×
3400
            raise TypeError
×
3401

3402
        fp = self.get_filepath(filename, filesuffix=filesuffix)
1✔
3403
        with open(fp, 'w') as f:
1✔
3404
            json.dump(var, f, default=np_convert)
1✔
3405

3406
    def read_yml(self, filename, filesuffix='', allow_empty=False):
9✔
3407
        """Reads a yml file located in the directory.
3408

3409
        Parameters
3410
        ----------
3411
        filename : str
3412
            file name (must be listed in cfg.BASENAME)
3413
        filesuffix : str
3414
            append a suffix to the filename (useful for experiments).
3415
        allow_empty : bool
3416
            if True, does not raise an error if the file is not there.
3417

3418
        Returns
3419
        -------
3420
        A dictionary read from the yml file
3421
        """
3422

3423
        fp = self.get_filepath(filename, filesuffix=filesuffix)
4✔
3424
        if allow_empty:
4✔
3425
            try:
×
3426
                with open(fp, 'r') as f:
×
3427
                    out = yaml.safe_load(f)
×
3428
            except FileNotFoundError:
×
3429
                out = {}
×
3430
        else:
3431
            with open(fp, 'r') as f:
4✔
3432
                out = yaml.safe_load(f)
4✔
3433
        return out
4✔
3434

3435
    def write_yml(self, var, filename, filesuffix=''):
9✔
3436
        """ Writes a variable to a yml file on disk.
3437

3438
        Parameters
3439
        ----------
3440
        var : object
3441
            the variable to write to yml (must be a dictionary)
3442
        filename : str
3443
            file name (must be listed in cfg.BASENAME)
3444
        filesuffix : str
3445
            append a suffix to the filename (useful for experiments).
3446
        """
3447

3448
        fp = self.get_filepath(filename, filesuffix=filesuffix)
1✔
3449
        with open(fp, 'w') as f:
1✔
3450
            yaml.dump(var, f)
1✔
3451

3452
    def get_climate_info(self, input_filesuffix=''):
9✔
3453
        """Convenience function to read attributes of the historical climate.
3454

3455
        Parameters
3456
        ----------
3457
        input_filesuffix : str
3458
            input_filesuffix of the climate_historical that should be used.
3459
        """
3460
        out = {}
7✔
3461
        try:
7✔
3462
            f = self.get_filepath('climate_historical',
7✔
3463
                                  filesuffix=input_filesuffix)
3464
            with ncDataset(f) as nc:
7✔
3465
                out['baseline_climate_source'] = nc.climate_source
7✔
3466
                try:
7✔
3467
                    out['baseline_yr_0'] = nc.yr_0
7✔
3468
                except AttributeError:
1✔
3469
                    # needed for back-compatibility before v1.6
3470
                    out['baseline_yr_0'] = nc.hydro_yr_0
1✔
3471
                try:
7✔
3472
                    out['baseline_yr_1'] = nc.yr_1
7✔
3473
                except AttributeError:
1✔
3474
                    # needed for back-compatibility before v1.6
3475
                    out['baseline_yr_1'] = nc.hydro_yr_1
1✔
3476
                out['baseline_climate_ref_hgt'] = nc.ref_hgt
7✔
3477
                out['baseline_climate_ref_pix_lon'] = nc.ref_pix_lon
7✔
3478
                out['baseline_climate_ref_pix_lat'] = nc.ref_pix_lat
7✔
3479
        except FileNotFoundError:
2✔
3480
            pass
2✔
3481

3482
        return out
7✔
3483

3484
    def read_text(self, filename, filesuffix=''):
9✔
3485
        """Reads a text file located in the directory.
3486

3487
        Parameters
3488
        ----------
3489
        filename : str
3490
            file name (must be listed in cfg.BASENAME)
3491
        filesuffix : str
3492
            append a suffix to the filename (useful for experiments).
3493

3494
        Returns
3495
        -------
3496
        the text
3497
        """
3498

3499
        fp = self.get_filepath(filename, filesuffix=filesuffix)
×
3500
        with open(fp, 'r') as f:
×
3501
            out = f.read()
×
3502
        return out
×
3503

3504
    @classmethod
9✔
3505
    def _read_shapefile_from_path(cls, fp):
9✔
3506
        if '.shp' not in fp:
7✔
3507
            raise ValueError('File ending not that of a shapefile')
×
3508

3509
        if cfg.PARAMS['use_tar_shapefiles']:
7✔
3510
            fp = 'tar://' + fp.replace('.shp', '.tar')
7✔
3511
            if cfg.PARAMS['use_compression']:
7✔
3512
                fp += '.gz'
7✔
3513

3514
        shp = gpd.read_file(fp)
7✔
3515

3516
        # .properties file is created for compressed shapefiles. github: #904
3517
        _properties = fp.replace('tar://', '') + '.properties'
7✔
3518
        if os.path.isfile(_properties):
7✔
3519
            # remove it, to keep GDir slim
3520
            os.remove(_properties)
7✔
3521

3522
        return shp
7✔
3523

3524
    def read_shapefile(self, filename, filesuffix=''):
9✔
3525
        """Reads a shapefile located in the directory.
3526

3527
        Parameters
3528
        ----------
3529
        filename : str
3530
            file name (must be listed in cfg.BASENAME)
3531
        filesuffix : str
3532
            append a suffix to the filename (useful for experiments).
3533

3534
        Returns
3535
        -------
3536
        A geopandas.DataFrame
3537
        """
3538
        fp = self.get_filepath(filename, filesuffix=filesuffix)
7✔
3539
        return self._read_shapefile_from_path(fp)
7✔
3540

3541
    def write_shapefile(self, var, filename, filesuffix=''):
9✔
3542
        """ Writes a variable to a shapefile on disk.
3543

3544
        Parameters
3545
        ----------
3546
        var : object
3547
            the variable to write to shapefile (must be a geopandas.DataFrame)
3548
        filename : str
3549
            file name (must be listed in cfg.BASENAME)
3550
        filesuffix : str
3551
            append a suffix to the filename (useful for experiments).
3552
        """
3553
        fp = self.get_filepath(filename, filesuffix=filesuffix)
7✔
3554
        _write_shape_to_disk(var, fp, to_tar=cfg.PARAMS['use_tar_shapefiles'])
7✔
3555

3556
    def write_monthly_climate_file(self, time, prcp, temp,
9✔
3557
                                   ref_pix_hgt, ref_pix_lon, ref_pix_lat, *,
3558
                                   temp_std=None,
3559
                                   time_unit=None,
3560
                                   calendar=None,
3561
                                   source=None,
3562
                                   file_name='climate_historical',
3563
                                   filesuffix=''):
3564
        """Creates a netCDF4 file with climate data timeseries.
3565

3566
        Parameters
3567
        ----------
3568
        time : ndarray
3569
            the time array, in a format understood by netCDF4
3570
        prcp : ndarray
3571
            the precipitation array (unit: 'kg m-2 month-1')
3572
        temp : ndarray
3573
            the temperature array (unit: 'degC')
3574
        ref_pix_hgt : float
3575
            the elevation of the dataset's reference altitude
3576
            (for correction). In practice, it is the same altitude as the
3577
            baseline climate.
3578
        ref_pix_lon : float
3579
            the location of the gridded data's grid point
3580
        ref_pix_lat : float
3581
            the location of the gridded data's grid point
3582
        temp_std : ndarray, optional
3583
            the daily standard deviation of temperature (useful for PyGEM)
3584
        time_unit : str
3585
            the reference time unit for your time array. This should be chosen
3586
            depending on the length of your data. The default is to choose
3587
            it ourselves based on the starting year.
3588
        calendar : str
3589
            If you use an exotic calendar (e.g. 'noleap')
3590
        source : str
3591
            the climate data source (required)
3592
        file_name : str
3593
            How to name the file
3594
        filesuffix : str
3595
            Apply a suffix to the file
3596
        """
3597

3598
        if isinstance(prcp, xr.DataArray):
7✔
3599
            prcp = prcp.values
4✔
3600
        if isinstance(temp, xr.DataArray):
7✔
3601
            temp = temp.values
×
3602
        if isinstance(temp_std, xr.DataArray):
7✔
3603
            temp_std = temp_std.values
×
3604
        # overwrite as default
3605
        fpath = self.get_filepath(file_name, filesuffix=filesuffix)
7✔
3606
        if os.path.exists(fpath):
7✔
3607
            os.remove(fpath)
4✔
3608

3609
        if source is None:
7✔
3610
            raise InvalidParamsError('`source` kwarg is required')
×
3611

3612
        zlib = cfg.PARAMS['compress_climate_netcdf']
7✔
3613

3614
        try:
7✔
3615
            y0 = time[0].year
7✔
3616
            y1 = time[-1].year
7✔
3617
        except AttributeError:
4✔
3618
            time = pd.DatetimeIndex(time)
4✔
3619
            y0 = time[0].year
4✔
3620
            y1 = time[-1].year
4✔
3621

3622
        if time_unit is None:
7✔
3623
            # http://pandas.pydata.org/pandas-docs/stable/timeseries.html
3624
            # #timestamp-limitations
3625
            if y0 > 1800:
7✔
3626
                time_unit = 'days since 1801-01-01 00:00:00'
7✔
3627
            elif y0 >= 0:
1✔
3628
                time_unit = ('days since {:04d}-01-01 '
1✔
3629
                             '00:00:00'.format(time[0].year))
3630
            else:
3631
                raise InvalidParamsError('Time format not supported')
×
3632

3633
        with ncDataset(fpath, 'w', format='NETCDF4') as nc:
7✔
3634
            nc.ref_hgt = ref_pix_hgt
7✔
3635
            nc.ref_pix_lon = ref_pix_lon
7✔
3636
            nc.ref_pix_lat = ref_pix_lat
7✔
3637
            nc.ref_pix_dis = haversine(self.cenlon, self.cenlat,
7✔
3638
                                       ref_pix_lon, ref_pix_lat)
3639
            nc.climate_source = source
7✔
3640

3641
            nc.yr_0 = y0
7✔
3642
            nc.yr_1 = y1
7✔
3643

3644
            nc.createDimension('time', None)
7✔
3645

3646
            nc.author = 'OGGM'
7✔
3647
            nc.author_info = 'Open Global Glacier Model'
7✔
3648

3649
            timev = nc.createVariable('time', 'i4', ('time',))
7✔
3650

3651
            tatts = {'units': time_unit}
7✔
3652
            if calendar is None:
7✔
3653
                calendar = 'standard'
7✔
3654

3655
            tatts['calendar'] = calendar
7✔
3656
            try:
7✔
3657
                numdate = netCDF4.date2num([t for t in time], time_unit,
7✔
3658
                                           calendar=calendar)
3659
            except TypeError:
×
3660
                # numpy's broken datetime only works for us precision
3661
                time = time.astype('M8[us]').astype(datetime.datetime)
×
3662
                numdate = netCDF4.date2num(time, time_unit, calendar=calendar)
×
3663

3664
            timev.setncatts(tatts)
7✔
3665
            timev[:] = numdate
7✔
3666

3667
            v = nc.createVariable('prcp', 'f4', ('time',), zlib=zlib)
7✔
3668
            v.units = 'kg m-2'
7✔
3669
            v.long_name = 'total monthly precipitation amount'
7✔
3670

3671
            v[:] = prcp
7✔
3672

3673
            v = nc.createVariable('temp', 'f4', ('time',), zlib=zlib)
7✔
3674
            v.units = 'degC'
7✔
3675
            v.long_name = '2m temperature at height ref_hgt'
7✔
3676
            v[:] = temp
7✔
3677

3678
            if temp_std is not None:
7✔
3679
                v = nc.createVariable('temp_std', 'f4', ('time',), zlib=zlib)
2✔
3680
                v.units = 'degC'
2✔
3681
                v.long_name = 'standard deviation of daily temperatures'
2✔
3682
                v[:] = temp_std
2✔
3683

3684
    def get_inversion_flowline_hw(self):
9✔
3685
        """ Shortcut function to read the heights and widths of the glacier.
3686

3687
        Parameters
3688
        ----------
3689

3690
        Returns
3691
        -------
3692
        (height, widths) in units of m
3693
        """
3694

3695
        h = np.array([])
4✔
3696
        w = np.array([])
4✔
3697
        fls = self.read_pickle('inversion_flowlines')
4✔
3698
        for fl in fls:
4✔
3699
            w = np.append(w, fl.widths)
4✔
3700
            h = np.append(h, fl.surface_h)
4✔
3701
        return h, w * self.grid.dx
4✔
3702

3703
    def set_ref_mb_data(self, mb_df=None):
9✔
3704
        """Adds reference mass balance data to this glacier.
3705

3706
        The format should be a dataframe with the years as index and
3707
        'ANNUAL_BALANCE' as values in mm yr-1.
3708
        """
3709

3710
        if self.is_tidewater:
6✔
3711
            log.warning('You are trying to set MB data on a tidewater glacier!'
×
3712
                        ' These data will be ignored by the MB model '
3713
                        'calibration routine.')
3714

3715
        if mb_df is None:
6✔
3716

3717
            flink, mbdatadir = get_wgms_files()
6✔
3718
            c = 'RGI{}0_ID'.format(self.rgi_version[0])
6✔
3719
            wid = flink.loc[flink[c] == self.rgi_id]
6✔
3720
            if len(wid) == 0:
6✔
3721
                raise RuntimeError('Not a reference glacier!')
1✔
3722
            wid = wid.WGMS_ID.values[0]
6✔
3723

3724
            # file
3725
            reff = os.path.join(mbdatadir,
6✔
3726
                                'mbdata_WGMS-{:05d}.csv'.format(wid))
3727
            # list of years
3728
            mb_df = pd.read_csv(reff).set_index('YEAR')
6✔
3729

3730
        # Quality checks
3731
        if 'ANNUAL_BALANCE' not in mb_df:
6✔
3732
            raise InvalidParamsError('Need an "ANNUAL_BALANCE" column in the '
×
3733
                                     'dataframe.')
3734
        mb_df.index.name = 'YEAR'
6✔
3735
        self._mbdf = mb_df
6✔
3736

3737
    def get_ref_mb_data(self, y0=None, y1=None, input_filesuffix=''):
9✔
3738
        """Get the reference mb data from WGMS (for some glaciers only!).
3739

3740
        Raises an Error if it isn't a reference glacier at all.
3741

3742
        Parameters
3743
        ----------
3744
        y0 : int
3745
            override the default behavior which is to check the available
3746
            climate data (or PARAMS['ref_mb_valid_window']) and decide
3747
        y1 : int
3748
            override the default behavior which is to check the available
3749
            climate data (or PARAMS['ref_mb_valid_window']) and decide
3750
        input_filesuffix : str
3751
            input_filesuffix of the climate_historical that should be used
3752
            if y0 and y1 are not given. The default is to take the
3753
            climate_historical without input_filesuffix
3754
        """
3755

3756
        if self._mbdf is None:
6✔
3757
            self.set_ref_mb_data()
6✔
3758

3759
        # logic for period
3760
        t0, t1 = cfg.PARAMS['ref_mb_valid_window']
6✔
3761
        if t0 > 0 and y0 is None:
6✔
3762
            y0 = t0
×
3763
        if t1 > 0 and y1 is None:
6✔
3764
            y1 = t1
×
3765

3766
        if y0 is None or y1 is None:
6✔
3767
            ci = self.get_climate_info(input_filesuffix=input_filesuffix)
6✔
3768
            if 'baseline_yr_0' not in ci:
6✔
3769
                raise InvalidWorkflowError('Please process some climate data '
1✔
3770
                                           'before call')
3771
            y0 = ci['baseline_yr_0'] if y0 is None else y0
6✔
3772
            y1 = ci['baseline_yr_1'] if y1 is None else y1
6✔
3773

3774
        if len(self._mbdf) > 1:
6✔
3775
            out = self._mbdf.loc[y0:y1]
6✔
3776
        else:
3777
            # Some files are just empty
3778
            out = self._mbdf
×
3779
        return out.dropna(subset=['ANNUAL_BALANCE'])
6✔
3780

3781
    def get_ref_mb_profile(self, input_filesuffix='', constant_dh=False, obs_ratio_needed=0):
9✔
3782
        """Get the reference mb profile data from WGMS (if available!).
3783

3784
        Returns None if this glacier has no profile and an Error if it isn't
3785
        a reference glacier at all.
3786

3787
        Parameters
3788
        ----------
3789
        input_filesuffix : str
3790
            input_filesuffix of the climate_historical that should be used. The
3791
            default is to take the climate_historical without input_filesuffix
3792
        constant_dh : boolean
3793
            If set to True, it outputs the MB profiles with a constant step size
3794
            of dh=50m by using interpolation. This can be useful for comparisons
3795
            between years. Default is False which gives the raw
3796
            elevation-dependent point MB
3797
        obs_ratio_needed : float
3798
            necessary relative amount of observations per elevation band in order
3799
            to be included in the MB profile (0<=obs_ratio_needed<=1).
3800
            If obs_ratio_needed set to 0, the output shows all elevation-band
3801
            observations (default is 0).
3802
            When estimating mean MB profiles, it is advisable to set obs_ratio_needed
3803
            to 0.6. E.g. if there are in total 5 years of measurements only those elevation
3804
            bands with at least 3 years of measurements are used. If obs_ratio_needed is not
3805
            0, constant_dh has to be set to True.
3806
        """
3807

3808
        if obs_ratio_needed != 0 and constant_dh is False:
1✔
3809
            raise InvalidParamsError('If a filter is applied, you have to set'
×
3810
                                     ' constant_dh to True')
3811
        if obs_ratio_needed < 0 or obs_ratio_needed > 1:
1✔
3812
            raise InvalidParamsError('obs_ratio_needed is the ratio of necessary relative amount'
×
3813
                                     'of observations per elevation band. It has to be between'
3814
                                     '0 and 1!')
3815

3816
        if self._mbprofdf is None and not constant_dh:
1✔
3817
            flink, mbdatadir = get_wgms_files()
1✔
3818
            c = 'RGI{}0_ID'.format(self.rgi_version[0])
1✔
3819
            wid = flink.loc[flink[c] == self.rgi_id]
1✔
3820
            if len(wid) == 0:
1✔
3821
                raise RuntimeError('Not a reference glacier!')
×
3822
            wid = wid.WGMS_ID.values[0]
1✔
3823

3824
            # file
3825
            mbdatadir = os.path.join(os.path.dirname(mbdatadir), 'mb_profiles')
1✔
3826
            reff = os.path.join(mbdatadir,
1✔
3827
                                'profile_WGMS-{:05d}.csv'.format(wid))
3828
            if not os.path.exists(reff):
1✔
3829
                return None
×
3830
            # list of years
3831
            self._mbprofdf = pd.read_csv(reff, index_col=0)
1✔
3832

3833
        if self._mbprofdf_cte_dh is None and constant_dh:
1✔
3834
            flink, mbdatadir = get_wgms_files()
1✔
3835
            c = 'RGI{}0_ID'.format(self.rgi_version[0])
1✔
3836
            wid = flink.loc[flink[c] == self.rgi_id]
1✔
3837
            if len(wid) == 0:
1✔
3838
                raise RuntimeError('Not a reference glacier!')
×
3839
            wid = wid.WGMS_ID.values[0]
1✔
3840

3841
            # file
3842
            mbdatadir = os.path.join(os.path.dirname(mbdatadir), 'mb_profiles_constant_dh')
1✔
3843
            reff = os.path.join(mbdatadir,
1✔
3844
                                'profile_constant_dh_WGMS-{:05d}.csv'.format(wid))
3845
            if not os.path.exists(reff):
1✔
3846
                return None
×
3847
            # list of years
3848
            self._mbprofdf_cte_dh = pd.read_csv(reff, index_col=0)
1✔
3849

3850
        ci = self.get_climate_info(input_filesuffix=input_filesuffix)
1✔
3851
        if 'baseline_yr_0' not in ci:
1✔
3852
            raise RuntimeError('Please process some climate data before call')
×
3853
        y0 = ci['baseline_yr_0']
1✔
3854
        y1 = ci['baseline_yr_1']
1✔
3855
        if not constant_dh:
1✔
3856
            if len(self._mbprofdf) > 1:
1✔
3857
                out = self._mbprofdf.loc[y0:y1]
1✔
3858
            else:
3859
                # Some files are just empty
3860
                out = self._mbprofdf
×
3861
        else:
3862
            if len(self._mbprofdf_cte_dh) > 1:
1✔
3863
                out = self._mbprofdf_cte_dh.loc[y0:y1]
1✔
3864
                if obs_ratio_needed != 0:
1✔
3865
                    # amount of years with any observation
3866
                    n_obs = len(out.index)
1✔
3867
                    # amount of years with observations for each elevation band
3868
                    n_obs_h = out.describe().loc['count']
1✔
3869
                    # relative amount of observations per elevation band
3870
                    rel_obs_h = n_obs_h / n_obs
1✔
3871
                    # select only those elevation bands with a specific ratio
3872
                    # of years with available measurements
3873
                    out = out[rel_obs_h[rel_obs_h >= obs_ratio_needed].index]
1✔
3874

3875
            else:
3876
                # Some files are just empty
3877
                out = self._mbprofdf_cte_dh
×
3878
        out.columns = [float(c) for c in out.columns]
1✔
3879
        return out.dropna(axis=1, how='all').dropna(axis=0, how='all')
1✔
3880

3881
    def get_ref_length_data(self):
9✔
3882
        """Get the glacier length data from P. Leclercq's data base.
3883

3884
         https://folk.uio.no/paulwl/data.php
3885

3886
         For some glaciers only!
3887
         """
3888

3889
        df = pd.read_csv(get_demo_file('rgi_leclercq_links_2014_RGIV6.csv'))
1✔
3890
        df = df.loc[df.RGI_ID == self.rgi_id]
1✔
3891
        if len(df) == 0:
1✔
3892
            raise RuntimeError('No length data found for this glacier!')
×
3893
        ide = df.LID.values[0]
1✔
3894

3895
        f = get_demo_file('Glacier_Lengths_Leclercq.nc')
1✔
3896
        with xr.open_dataset(f) as dsg:
1✔
3897
            # The database is not sorted by ID. Don't ask me...
3898
            grp_id = np.argwhere(dsg['index'].values == ide)[0][0] + 1
1✔
3899
        with xr.open_dataset(f, group=str(grp_id)) as ds:
1✔
3900
            df = ds.to_dataframe()
1✔
3901
            df.name = ds.glacier_name
1✔
3902
        return df
1✔
3903

3904
    def log(self, task_name, *, err=None, task_time=None):
9✔
3905
        """Logs a message to the glacier directory.
3906

3907
        It is usually called by the :py:class:`entity_task` decorator, normally
3908
        you shouldn't take care about that.
3909

3910
        Parameters
3911
        ----------
3912
        func : a function
3913
            the function which wants to log
3914
        err : Exception
3915
            the exception which has been raised by func (if no exception was
3916
            raised, a success is logged)
3917
        time : float
3918
            the time (in seconds) that the task needed to run
3919
        """
3920

3921
        # a line per function call
3922
        nowsrt = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
7✔
3923
        line = nowsrt + ';' + task_name + ';'
7✔
3924

3925
        if task_time is not None:
7✔
3926
            line += 'time:{};'.format(task_time)
7✔
3927

3928
        if err is None:
7✔
3929
            line += 'SUCCESS'
7✔
3930
        else:
3931
            line += err.__class__.__name__ + ': {}'.format(err)\
6✔
3932

3933
        line = line.replace('\n', ' ')
7✔
3934

3935
        count = 0
7✔
3936
        while count < 5:
7✔
3937
            try:
7✔
3938
                with open(self.logfile, 'a') as logfile:
7✔
3939
                    logfile.write(line + '\n')
7✔
3940
                break
7✔
3941
            except FileNotFoundError:
×
3942
                # I really don't know when this error happens
3943
                # In this case sleep and try again
3944
                time.sleep(0.05)
×
3945
                count += 1
×
3946

3947
        if count == 5:
7✔
3948
            log.warning('Could not write to logfile: ' + line)
×
3949

3950
    def get_task_status(self, task_name):
9✔
3951
        """Opens this directory's log file to check for a task's outcome.
3952

3953
        Parameters
3954
        ----------
3955
        task_name : str
3956
            the name of the task which has to be tested for
3957

3958
        Returns
3959
        -------
3960
        The last message for this task (SUCCESS if was successful),
3961
        None if the task was not run yet
3962
        """
3963

3964
        if not os.path.isfile(self.logfile):
7✔
3965
            return None
7✔
3966

3967
        with open(self.logfile) as logfile:
7✔
3968
            lines = logfile.readlines()
7✔
3969

3970
        lines = [l.replace('\n', '') for l in lines
7✔
3971
                 if ';' in l and (task_name == l.split(';')[1])]
3972
        if lines:
7✔
3973
            # keep only the last log
3974
            return lines[-1].split(';')[-1]
7✔
3975
        else:
3976
            return None
7✔
3977

3978
    def get_task_time(self, task_name):
9✔
3979
        """Opens this directory's log file to check for a task's run time.
3980

3981
        Parameters
3982
        ----------
3983
        task_name : str
3984
            the name of the task which has to be tested for
3985

3986
        Returns
3987
        -------
3988
        The timing that the last call of this task needed.
3989
        None if the task was not run yet, or if it errored
3990
        """
3991

3992
        if not os.path.isfile(self.logfile):
1✔
3993
            return None
×
3994

3995
        with open(self.logfile) as logfile:
1✔
3996
            lines = logfile.readlines()
1✔
3997

3998
        lines = [l.replace('\n', '') for l in lines
1✔
3999
                 if task_name == l.split(';')[1]]
4000
        if lines:
1✔
4001
            line = lines[-1]
1✔
4002
            # Last log is message
4003
            if 'ERROR' in line.split(';')[-1] or 'time:' not in line:
1✔
4004
                return None
1✔
4005
            # Get the time
4006
            return float(line.split('time:')[-1].split(';')[0])
1✔
4007
        else:
4008
            return None
1✔
4009

4010
    def get_error_log(self):
9✔
4011
        """Reads the directory's log file to find the invalid task (if any).
4012

4013
        Returns
4014
        -------
4015
        The first error message in this log, None if all good
4016
        """
4017

4018
        if not os.path.isfile(self.logfile):
6✔
4019
            return None
1✔
4020

4021
        with open(self.logfile) as logfile:
6✔
4022
            lines = logfile.readlines()
6✔
4023

4024
        for l in lines:
6✔
4025
            if 'SUCCESS' in l:
6✔
4026
                continue
6✔
4027
            return l.replace('\n', '')
3✔
4028

4029
        # OK all good
4030
        return None
6✔
4031

4032
    @property
9✔
4033
    def settings_filesuffix(self):
9✔
4034
        return self._settings_filesuffix
×
4035

4036
    @settings_filesuffix.setter
9✔
4037
    def settings_filesuffix(self, value):
9✔
4038
        self._settings_filesuffix = value
7✔
4039
        self.settings = self.get_settings(settings_filesuffix=value)
7✔
4040

4041
    def get_settings(self, settings_filesuffix):
9✔
4042
        return ModelSettings(self, filesuffix=settings_filesuffix,
7✔
4043
                             always_reload_data=False)
4044

4045
    @property
9✔
4046
    def observations_filesuffix(self):
9✔
NEW
4047
        return self._observations_filesuffix
×
4048

4049
    @observations_filesuffix.setter
9✔
4050
    def observations_filesuffix(self, value):
9✔
4051
        self._observations_filesuffix = value
7✔
4052
        self.observations = self.get_observations(observations_filesuffix=value)
7✔
4053

4054
    def get_observations(self, observations_filesuffix):
9✔
4055
        return Observations(self, filesuffix=observations_filesuffix)
7✔
4056

4057

4058
@entity_task(log)
9✔
4059
def copy_to_basedir(gdir, base_dir=None, setup='run'):
9✔
4060
    """Copies the glacier directories and their content to a new location.
4061

4062
    This utility function allows to select certain files only, thus
4063
    saving time at copy.
4064

4065
    Parameters
4066
    ----------
4067
    gdir : :py:class:`oggm.GlacierDirectory`
4068
        the glacier directory to copy
4069
    base_dir : str
4070
        path to the new base directory (should end with "per_glacier"
4071
        most of the time)
4072
    setup : str
4073
        set up you want the copied directory to be useful for. Currently
4074
        supported are 'all' (copy the entire directory), 'inversion'
4075
        (copy the necessary files for the inversion AND the run)
4076
        , 'run' (copy the necessary files for a dynamical run) or 'run/spinup'
4077
        (copy the necessary files and all already conducted model runs, e.g.
4078
        from a dynamic spinup).
4079

4080
    Returns
4081
    -------
4082
    New glacier directories from the copied folders
4083
    """
4084
    base_dir = os.path.abspath(base_dir)
4✔
4085
    new_dir = os.path.join(base_dir, gdir.rgi_id[:8], gdir.rgi_id[:11],
4✔
4086
                           gdir.rgi_id)
4087
    if setup == 'run':
4✔
4088
        paths = ['model_flowlines', 'inversion_params', 'outlines',
1✔
4089
                 'settings', 'climate_historical', 'glacier_grid',
4090
                 'gcm_data', 'diagnostics', 'log']
4091
        paths = ('*' + p + '*' for p in paths)
1✔
4092
        shutil.copytree(gdir.dir, new_dir,
1✔
4093
                        ignore=include_patterns(*paths))
4094
    elif setup == 'inversion':
4✔
4095
        paths = ['inversion_params', 'downstream_line', 'outlines',
1✔
4096
                 'inversion_flowlines', 'glacier_grid', 'diagnostics',
4097
                 'settings', 'climate_historical', 'gridded_data',
4098
                 'gcm_data', 'log']
4099
        paths = ('*' + p + '*' for p in paths)
1✔
4100
        shutil.copytree(gdir.dir, new_dir,
1✔
4101
                        ignore=include_patterns(*paths))
4102
    elif setup == 'run/spinup':
4✔
4103
        paths = ['model_flowlines', 'inversion_params', 'outlines',
1✔
4104
                 'settings', 'climate_historical', 'glacier_grid',
4105
                 'gcm_data', 'diagnostics', 'log', 'model_run',
4106
                 'model_diagnostics', 'model_geometry']
4107
        paths = ('*' + p + '*' for p in paths)
1✔
4108
        shutil.copytree(gdir.dir, new_dir,
1✔
4109
                        ignore=include_patterns(*paths))
4110
    elif setup == 'all':
4✔
4111
        shutil.copytree(gdir.dir, new_dir)
4✔
4112
    else:
4113
        raise ValueError('setup not understood: {}'.format(setup))
×
4114
    return GlacierDirectory(gdir.rgi_id, base_dir=base_dir)
4✔
4115

4116

4117
def initialize_merged_gdir(main, tribs=[], glcdf=None,
9✔
4118
                           filename='climate_historical',
4119
                           input_filesuffix='',
4120
                           dem_source=None):
4121
    """Creates a new GlacierDirectory if tributaries are merged to a glacier
4122

4123
    This function should be called after centerlines.intersect_downstream_lines
4124
    and before flowline.merge_tributary_flowlines.
4125
    It will create a new GlacierDirectory, with a suitable DEM and reproject
4126
    the flowlines of the main glacier.
4127

4128
    Parameters
4129
    ----------
4130
    main : oggm.GlacierDirectory
4131
        the main glacier
4132
    tribs : list or dictionary containing oggm.GlacierDirectories
4133
        true tributary glaciers to the main glacier
4134
    glcdf: geopandas.GeoDataFrame
4135
        which contains the main glacier, will be downloaded if None
4136
    filename: str
4137
        Baseline climate file
4138
    input_filesuffix: str
4139
        Filesuffix to the climate file
4140
    dem_source: str
4141
        the DEM source to use
4142
    Returns
4143
    -------
4144
    merged : oggm.GlacierDirectory
4145
        the new GDir
4146
    """
4147
    from oggm.core.gis import define_glacier_region, merged_glacier_masks
×
4148

4149
    # If its a dict, select the relevant ones
4150
    if isinstance(tribs, dict):
×
4151
        tribs = tribs[main.rgi_id]
×
4152
    # make sure tributaries are iterable
4153
    tribs = tolist(tribs)
×
4154

4155
    # read flowlines of the Main glacier
4156
    mfls = main.read_pickle('model_flowlines')
×
4157

4158
    # ------------------------------
4159
    # 0. create the new GlacierDirectory from main glaciers GeoDataFrame
4160
    # Should be passed along, if not download it
4161
    if glcdf is None:
×
4162
        glcdf = get_rgi_glacier_entities([main.rgi_id])
×
4163
    # Get index location of the specific glacier
4164
    idx = glcdf.loc[glcdf.RGIId == main.rgi_id].index
×
4165
    maindf = glcdf.loc[idx].copy()
×
4166

4167
    # add tributary geometries to maindf
4168
    merged_geometry = maindf.loc[idx, 'geometry'].iloc[0].buffer(0)
×
4169
    for trib in tribs:
×
4170
        geom = trib.read_pickle('geometries')['polygon_hr']
×
4171
        geom = salem.transform_geometry(geom, crs=trib.grid)
×
4172
        merged_geometry = merged_geometry.union(geom).buffer(0)
×
4173

4174
    # to get the center point, maximal extensions for DEM and single Polygon:
4175
    new_geometry = merged_geometry.convex_hull
×
4176
    maindf.loc[idx, 'geometry'] = new_geometry
×
4177

4178
    # make some adjustments to the rgi dataframe
4179
    # 1. calculate central point of new glacier
4180
    #    reproject twice to avoid Warning, first to flat projection
4181
    flat_centroid = salem.transform_geometry(new_geometry,
×
4182
                                             to_crs=main.grid).centroid
4183
    #    second reprojection of centroid to wgms
4184
    new_centroid = salem.transform_geometry(flat_centroid, crs=main.grid)
×
4185
    maindf.loc[idx, 'CenLon'] = new_centroid.x
×
4186
    maindf.loc[idx, 'CenLat'] = new_centroid.y
×
4187
    # 2. update names
4188
    maindf.loc[idx, 'RGIId'] += '_merged'
×
4189
    if maindf.loc[idx, 'Name'].iloc[0] is None:
×
4190
        maindf.loc[idx, 'Name'] = main.name + ' (merged)'
×
4191
    else:
4192
        maindf.loc[idx, 'Name'] += ' (merged)'
×
4193

4194
    # finally create new Glacier Directory
4195
    # 1. set dx spacing to the one used for the flowlines
4196
    dx_method = cfg.PARAMS['grid_dx_method']
×
4197
    dx_spacing = cfg.PARAMS['fixed_dx']
×
4198
    cfg.PARAMS['grid_dx_method'] = 'fixed'
×
4199
    cfg.PARAMS['fixed_dx'] = mfls[-1].map_dx
×
4200
    merged = GlacierDirectory(maindf.loc[idx].iloc[0])
×
4201

4202
    # run define_glacier_region to get a fitting DEM and proper grid
4203
    define_glacier_region(merged, entity=maindf.loc[idx].iloc[0],
×
4204
                          source=dem_source)
4205

4206
    # write gridded data and geometries for visualization
4207
    merged_glacier_masks(merged, merged_geometry)
×
4208

4209
    # reset dx method
4210
    cfg.PARAMS['grid_dx_method'] = dx_method
×
4211
    cfg.PARAMS['fixed_dx'] = dx_spacing
×
4212

4213
    # copy main climate file, climate info and calib to new gdir
4214
    climfilename = filename + '_' + main.rgi_id + input_filesuffix + '.nc'
×
4215
    climfile = os.path.join(merged.dir, climfilename)
×
4216
    shutil.copyfile(main.get_filepath(filename, filesuffix=input_filesuffix),
×
4217
                    climfile)
4218
    _mufile = os.path.basename(merged.get_filepath('mb_calib')).split('.')
×
4219
    mufile = _mufile[0] + '_' + main.rgi_id + '.' + _mufile[1]
×
4220
    shutil.copyfile(main.get_filepath('mb_calib'),
×
4221
                    os.path.join(merged.dir, mufile))
4222

4223
    # reproject the flowlines to the new grid
4224
    for nr, fl in reversed(list(enumerate(mfls))):
×
4225

4226
        # 1. Step: Change projection to the main glaciers grid
4227
        _line = salem.transform_geometry(fl.line,
×
4228
                                         crs=main.grid, to_crs=merged.grid)
4229
        # 2. set new line
4230
        fl.set_line(_line)
×
4231

4232
        # 3. set flow to attributes
4233
        if fl.flows_to is not None:
×
4234
            fl.set_flows_to(fl.flows_to)
×
4235
        # remove inflow points, will be set by other flowlines if need be
4236
        fl.inflow_points = []
×
4237

4238
        # 5. set grid size attributes
4239
        dx = [shpg.Point(fl.line.coords[i]).distance(
×
4240
            shpg.Point(fl.line.coords[i+1]))
4241
            for i, pt in enumerate(fl.line.coords[:-1])]  # get distance
4242
        # and check if equally spaced
4243
        if not np.allclose(dx, np.mean(dx), atol=1e-2):
×
4244
            raise RuntimeError('Flowline is not evenly spaced.')
×
4245
        # dx might very slightly change, but should not
4246
        fl.dx = np.mean(dx).round(2)
×
4247
        # map_dx should stay exactly the same
4248
        # fl.map_dx = mfls[-1].map_dx
4249
        fl.dx_meter = fl.map_dx * fl.dx
×
4250

4251
        # replace flowline within the list
4252
        mfls[nr] = fl
×
4253

4254
    # Write the reprojecflowlines
4255
    merged.write_pickle(mfls, 'model_flowlines')
×
4256

4257
    return merged
×
4258

4259

4260
@entity_task(log)
9✔
4261
def gdir_to_tar(gdir, base_dir=None, delete=True):
9✔
4262
    """Writes the content of a glacier directory to a tar file.
4263

4264
    The tar file is located at the same location of the original directory.
4265
    The glacier directory objects are useless if deleted!
4266

4267
    Parameters
4268
    ----------
4269
    base_dir : str
4270
        path to the basedir where to write the directory (defaults to the
4271
        same location of the original directory)
4272
    delete : bool
4273
        delete the original directory afterwards (default)
4274

4275
    Returns
4276
    -------
4277
    the path to the tar file
4278
    """
4279

4280
    source_dir = os.path.normpath(gdir.dir)
1✔
4281
    opath = source_dir + '.tar.gz'
1✔
4282
    if base_dir is not None:
1✔
4283
        opath = os.path.join(base_dir, os.path.relpath(opath, gdir.base_dir))
1✔
4284
        mkdir(os.path.dirname(opath))
1✔
4285

4286
    with tarfile.open(opath, "w:gz") as tar:
1✔
4287
        tar.add(source_dir, arcname=os.path.basename(source_dir))
1✔
4288

4289
    if delete:
1✔
4290
        shutil.rmtree(source_dir)
1✔
4291

4292
    return opath
1✔
4293

4294

4295
def base_dir_to_tar(base_dir=None, delete=True):
9✔
4296
    """Merge the directories into 1000 bundles as tar files.
4297

4298
    The tar file is located at the same location of the original directory.
4299

4300
    Parameters
4301
    ----------
4302
    base_dir : str
4303
        path to the basedir to parse (defaults to the working directory)
4304
    to_base_dir : str
4305
        path to the basedir where to write the directory (defaults to the
4306
        same location of the original directory)
4307
    delete : bool
4308
        delete the original directory tars afterwards (default)
4309
    """
4310

4311
    if base_dir is None:
1✔
4312
        if not cfg.PATHS.get('working_dir', None):
1✔
4313
            raise ValueError("Need a valid PATHS['working_dir']!")
×
4314
        base_dir = os.path.join(cfg.PATHS['working_dir'], 'per_glacier')
1✔
4315

4316
    to_delete = []
1✔
4317
    for dirname, subdirlist, filelist in os.walk(base_dir):
1✔
4318
        # RGI60-01.00
4319
        bname = os.path.basename(dirname)
1✔
4320
        # second argument for RGI7 naming convention
4321
        if not ((len(bname) == 11 and bname[-3] == '.') or
1✔
4322
                (len(bname) == 20 and bname[-3] == '-')):
4323
            continue
1✔
4324
        opath = dirname + '.tar'
1✔
4325
        with tarfile.open(opath, 'w') as tar:
1✔
4326
            tar.add(dirname, arcname=os.path.basename(dirname))
1✔
4327
        if delete:
1✔
4328
            to_delete.append(dirname)
1✔
4329

4330
    for dirname in to_delete:
1✔
4331
        shutil.rmtree(dirname)
1✔
4332

4333

4334
class YAMLFileObject(object):
9✔
4335
    def __init__(self, path, allow_empty=True, always_reload_data=True):
9✔
4336
        self.path = Path(path)
7✔
4337
        self.allow_empty = allow_empty
7✔
4338
        self.always_reload_data = always_reload_data
7✔
4339
        self.data = self._load()
7✔
4340

4341
    def _load(self):
9✔
4342
        if self.path.exists():
7✔
4343
            with open(self.path, 'r') as f:
7✔
4344
                return yaml.safe_load(f) or {}
7✔
4345
        elif not self.allow_empty:
7✔
4346
            raise FileNotFoundError(self.path)
2✔
4347
        return {}
7✔
4348

4349
    def _save(self):
9✔
4350
        with open(self.path, 'w') as f:
7✔
4351
            yaml.safe_dump(self.data, f)
7✔
4352

4353
    def _check_yaml_serializable(self, value):
9✔
4354
        try:
7✔
4355
            yaml.safe_dump(value)
7✔
4356
        except yaml.YAMLError as e:
×
4357
            raise ValueError(f"Value '{value}' is not YAML serializable ({e})")
×
4358

4359
    def _to_native_type(self, value):
9✔
4360
        if isinstance(value, (np.generic,)):
7✔
4361
            return value.item()
7✔
4362
        elif isinstance(value, dict):
7✔
4363
            return {k: self._to_native_type(v) for k, v in value.items()}
7✔
4364
        elif isinstance(value, list) or isinstance(value, np.ndarray):
7✔
4365
            return [self._to_native_type(v) for v in value]
7✔
4366
        return value
7✔
4367

4368
    def get(self, key):
9✔
4369
        if self.always_reload_data:
7✔
4370
            # to be always synced, if several objects work on the same file
4371
            self.data = self._load()
7✔
4372
        if key in self.data:
7✔
4373
            return self.data[key]
7✔
4374
        else:
4375
            raise KeyError(f"Key '{key}' not found!")
×
4376

4377
    def set(self, key, value):
9✔
4378
        # to be always synced, if several objects work on the same file
4379
        self.data = self._load()
7✔
4380
        value = self._to_native_type(value)
7✔
4381
        self._check_yaml_serializable(value)
7✔
4382
        self.data[key] = value
7✔
4383
        self._save()
7✔
4384

4385
    def __getitem__(self, key):
9✔
4386
        return self.get(key)
7✔
4387

4388
    def __setitem__(self, key, value):
9✔
4389
        self.set(key, value)
7✔
4390

4391
    def __repr__(self):
4392
        return repr(self.data)
4393

4394
    def __contains__(self, key):
9✔
4395
        return key in self.data
7✔
4396

4397

4398
class ModelSettings(YAMLFileObject):
9✔
4399
    def __init__(self, gdir, filesuffix='', parent_filesuffix=None,
9✔
4400
                 reset_parent_filesuffix=False, allow_empty=True,
4401
                 always_reload_data=True,
4402
                 ):
4403
        path = gdir.get_filepath('settings', filesuffix=filesuffix)
7✔
4404

4405
        super(ModelSettings, self).__init__(path, allow_empty=allow_empty,
7✔
4406
                                            always_reload_data=always_reload_data,
4407
                                            )
4408

4409
        # this is to inherit parameters from other setting files, the other file
4410
        # is stored with the parent_filesuffix
4411
        if 'parent_filesuffix' not in self.data:
7✔
4412
            if isinstance(parent_filesuffix, str):
7✔
4413
                self.set('parent_filesuffix', parent_filesuffix)
2✔
4414
            else:
4415
                # by default cfg.PARAMS is always the parent
4416
                self.set('parent_filesuffix', 'cfg.PARAMS')
7✔
4417
        elif isinstance(parent_filesuffix, str):
7✔
4418
            if self['parent_filesuffix'] != parent_filesuffix:
×
4419
                if not reset_parent_filesuffix:
×
4420
                    raise InvalidWorkflowError(
×
4421
                        f"Current parent_filesuffix="
4422
                        f"{self['parent_filesuffix']}, you provided "
4423
                        f"{parent_filesuffix}. If you want to set a new value "
4424
                        f"you can use reset_parent_filesuffix=True")
4425
                else:
4426
                    self.set('parent_filesuffix', parent_filesuffix)
×
4427

4428
        self.filesuffix = filesuffix
7✔
4429
        if self.data['parent_filesuffix'] == 'cfg.PARAMS':
7✔
4430
            self.defaults = cfg.PARAMS.copy()
7✔
4431
        else:
4432
            self.defaults = ModelSettings(gdir,
2✔
4433
                                          filesuffix=self.data['parent_filesuffix'],
4434
                                          # check if parent exists
4435
                                          allow_empty=False,
4436
                                          always_reload_data=always_reload_data,
4437
                                          )
4438
        self.gdir = gdir
7✔
4439

4440
    def get(self, key):
9✔
4441
        if self.always_reload_data:
7✔
4442
            # to be always synced, if several objects work on the same file
4443
            self.data = self._load()
2✔
4444
        if key in self.data:
7✔
4445
            return self.data[key]
7✔
4446

4447
        if key in ['bias', 'melt_f', 'prcp_fac', 'temp_bias',
7✔
4448
                   'mb_global_params', 'baseline_climate_source']:
4449
            # this is for backwards compatibility when mb_calib files was used
4450
            try:
4✔
4451
                value = self.gdir.read_json('mb_calib')[key]
4✔
4452
                self.set(key, value)
3✔
4453
                return value
3✔
4454
            except FileNotFoundError:
3✔
4455
                pass
3✔
4456

4457
        if key in ['inversion_glen_a', 'inversion_fs', 'calving_water_level',
7✔
4458
                   ]:  # TODO: add calving variables
4459
            # this is for backwards compatibility when some parameters were
4460
            # stored in the diagnostics
4461
            try:
7✔
4462
                value = self.gdir.get_diagnostics()[key]
7✔
4463
                self.set(key, value)
3✔
4464
                return value
3✔
4465
            except KeyError:
7✔
4466
                pass
7✔
4467

4468
        try:
7✔
4469
            # if a key is used from defaults, also add it to the settings file
4470
            value = self.defaults[key]
7✔
4471
            self.set(key, value)
7✔
4472
            return value
7✔
4473
        except KeyError:
2✔
4474
            raise KeyError(f"Key '{key}' not found!")
2✔
4475

4476
    def __repr__(self):
4477
        return ("filesuffix: "
4478
                f"{self.filesuffix if self.filesuffix != '' else 'None'}\n"
4479
                f"data:{repr(self.data)}")
4480

4481

4482
class Observations(YAMLFileObject):
9✔
4483
    def __init__(self, gdir, filesuffix='', allow_empty=True,
9✔
4484
                 always_reload_data=True,
4485
                 ):
4486
        path = gdir.get_filepath('observations', filesuffix=filesuffix)
7✔
4487

4488
        super(Observations, self).__init__(path, allow_empty=allow_empty,
7✔
4489
                                           always_reload_data=always_reload_data,
4490
                                           )
4491

4492
        self.filesuffix = filesuffix
7✔
4493
        self.gdir = gdir
7✔
4494

4495
    def __repr__(self):
4496
        return ("filesuffix: "
4497
                f"{self.filesuffix if self.filesuffix != '' else 'None'}\n"
4498
                f"data:{repr(self.data)}")
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc